iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV

NVIDIA's Tegra241 Soc has a CMDQ-Virtualization (CMDQV) hardware, extending the standard ARM SMMU v3 IP to support multiple VCMDQs with virtualization capabilities. In terms of command queue, they are very like a standard SMMU CMDQ (or ECMDQs), but only support CS_NONE in the CS field of CMD_SYNC. Add a new tegra241-cmdqv driver, and insert its structure pointer into the existing arm_smmu_device, and then add related function calls in the SMMUv3 driver to interact with the CMDQV driver. In the CMDQV driver, add a minimal part for the in-kernel support: reserve VINTF0 for in-kernel use, and assign some of the VCMDQs to the VINTF0, and select one VCMDQ based on the current CPU ID to execute supported commands. This multi-queue design for in-kernel use gives some limited improvements: up to 20% reduction of invalidation time was measured by a multi-threaded DMA unmap benchmark, compared to a single queue. The other part of the CMDQV driver will be user-space support that gives a hypervisor running on the host OS to talk to the driver for virtualization use cases, allowing VMs to use VCMDQs without trappings, i.e. no VM Exits. This is designed based on IOMMUFD, and its RFC series is also under review. It will provide a guest OS a bigger improvement: 70% to 90% reductions of TLB invalidation time were measured by DMA unmap tests running in a guest, compared to nested SMMU CMDQ (with trappings). As the initial version, the CMDQV driver only supports ACPI configurations. Signed-off-by: Nate Watterson <nwatterson@nvidia.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Co-developed-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Link: https://lore.kernel.org/r/dce50490b2c10b7254fb36aa73ed7ffd812b283a.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon <will@kernel.org>
author: Nate Watterson <nwatterson@nvidia.com> 2024-08-30 00:34:37 +0200
committer: Will Deacon <will@kernel.org> 2024-08-30 16:28:03 +0200
commit: 918eb5c856f6ce4cf93b4b38e4b5e156905c5943 (patch)
tree: a83241569339c9100f9901086e35652426723c04 /drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
parent: iommu/arm-smmu-v3: Add struct arm_smmu_impl_ops (diff)
download: linux-918eb5c856f6ce4cf93b4b38e4b5e156905c5943.tar.xz
linux-918eb5c856f6ce4cf93b4b38e4b5e156905c5943.zip
1 files changed, 32 insertions, 1 deletions
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 01dc752e9338..dee23ec352e7 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -4354,6 +4354,31 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 }
 
 #ifdef CONFIG_ACPI
+#ifdef CONFIG_TEGRA241_CMDQV
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+						struct arm_smmu_device *smmu)
+{
+	const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
+	struct acpi_device *adev;
+
+	/* Look for an NVDA200C node whose _UID matches the SMMU node ID */
+	adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
+	if (adev) {
+		/* Tegra241 CMDQV driver is responsible for put_device() */
+		smmu->impl_dev = &adev->dev;
+		smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
+		dev_info(smmu->dev, "found companion CMDQV device: %s\n",
+			 dev_name(smmu->impl_dev));
+	}
+	kfree(uid);
+}
+#else
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+						struct arm_smmu_device *smmu)
+{
+}
+#endif
+
 static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
 				      struct arm_smmu_device *smmu)
 {
@@ -4368,6 +4393,11 @@ static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
 		break;
 	case ACPI_IORT_SMMU_V3_GENERIC:
+		/*
+		 * Tegra241 implementation stores its SMMU options and impl_dev
+		 * in DSDT. Thus, go through the ACPI tables unconditionally.
+		 */
+		acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
 		break;
 	}
 
@@ -4497,7 +4527,8 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
 	struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
 	int ret;
 
-	/* Add impl probe */
+	if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
+		new_smmu = tegra241_cmdqv_probe(smmu);
 
 	if (new_smmu == ERR_PTR(-ENODEV))
 		return smmu;
author	Nate Watterson <nwatterson@nvidia.com>	2024-08-30 00:34:37 +0200
committer	Will Deacon <will@kernel.org>	2024-08-30 16:28:03 +0200
commit	918eb5c856f6ce4cf93b4b38e4b5e156905c5943 (patch)
tree	a83241569339c9100f9901086e35652426723c04 /drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
parent	iommu/arm-smmu-v3: Add struct arm_smmu_impl_ops (diff)
download	linux-918eb5c856f6ce4cf93b4b38e4b5e156905c5943.tar.xz linux-918eb5c856f6ce4cf93b4b38e4b5e156905c5943.zip