diff options
author | Nicolin Chen <nicolinc@nvidia.com> | 2024-09-05 04:40:43 +0200 |
---|---|---|
committer | Will Deacon <will@kernel.org> | 2024-09-05 17:07:38 +0200 |
commit | 483e0bd8883a40fd3dd3193997a4014337698d72 (patch) | |
tree | 0f91469daa2727d72eb1a441d1b7639c2dae36af /drivers/iommu/arm | |
parent | iommu/tegra241-cmdqv: Drop static at local variable (diff) | |
download | linux-483e0bd8883a40fd3dd3193997a4014337698d72.tar.xz linux-483e0bd8883a40fd3dd3193997a4014337698d72.zip |
iommu/tegra241-cmdqv: Do not allocate vcmdq until dma_set_mask_and_coherent
It's observed that, when the first 4GB of system memory was reserved, all
VCMDQ allocations failed (even with the smallest qsz in the last attempt):
arm-smmu-v3: found companion CMDQV device: NVDA200C:00
arm-smmu-v3: option mask 0x10
arm-smmu-v3: failed to allocate queue (0x8000 bytes) for vcmdq0
acpi NVDA200C:00: tegra241_cmdqv: Falling back to standard SMMU CMDQ
arm-smmu-v3: ias 48-bit, oas 48-bit (features 0x001e1fbf)
arm-smmu-v3: allocated 524288 entries for cmdq
arm-smmu-v3: allocated 524288 entries for evtq
arm-smmu-v3: allocated 524288 entries for priq
This is because the 4GB reserved memory shifted the entire DMA zone from a
lower 32-bit range (on a system without the 4GB carveout) to higher range,
while the dev->coherent_dma_mask was set to DMA_BIT_MASK(32) by default.
The dma_set_mask_and_coherent() call is done in arm_smmu_device_hw_probe()
of the SMMU driver. So any DMA allocation from tegra241_cmdqv_probe() must
wait until the coherent_dma_mask is correctly set.
Move the vintf/vcmdq structure initialization routine into a different op,
"init_structures". Call it at the end of arm_smmu_init_structures(), where
standard SMMU queues get allocated.
Most of the impl_ops aren't ready until vintf/vcmdq structure are init-ed.
So replace the full impl_ops with an init_ops in __tegra241_cmdqv_probe().
And switch to tegra241_cmdqv_impl_ops later in arm_smmu_init_structures().
Note that tegra241_cmdqv_impl_ops does not link to the new init_structures
op after this switch, since there is no point in having it once it's done.
Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV")
Reported-by: Matt Ochs <mochs@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/530993c3aafa1b0fc3d879b8119e13c629d12e2b.1725503154.git.nicolinc@nvidia.com
Signed-off-by: Will Deacon <will@kernel.org>
Diffstat (limited to 'drivers/iommu/arm')
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 9 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 83 |
3 files changed, 60 insertions, 33 deletions
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b2de56dfceb9..df852ab04fd7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3744,7 +3744,14 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) if (ret) return ret; - return arm_smmu_init_strtab(smmu); + ret = arm_smmu_init_strtab(smmu); + if (ret) + return ret; + + if (smmu->impl_ops && smmu->impl_ops->init_structures) + return smmu->impl_ops->init_structures(smmu); + + return 0; } static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index e044ce5b5372..e8320e9341d7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -643,6 +643,7 @@ struct arm_smmu_strtab_cfg { struct arm_smmu_impl_ops { int (*device_reset)(struct arm_smmu_device *smmu); void (*device_remove)(struct arm_smmu_device *smmu); + int (*init_structures)(struct arm_smmu_device *smmu); struct arm_smmu_cmdq *(*get_secondary_cmdq)( struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent); }; diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 0766dc2789cb..fcd13d301fff 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -755,18 +755,65 @@ free_list: return res; } +static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf; + int lidx; + int ret; + + vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); + if (!vintf) + goto out_fallback; + + /* Init VINTF0 for in-kernel use */ + ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); + if (ret) { + dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); + goto free_vintf; + } + + /* Preallocate logical VCMDQs to VINTF0 */ + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + struct tegra241_vcmdq *vcmdq; + + vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); + if (IS_ERR(vcmdq)) + goto free_lvcmdq; + } + + /* Now, we are ready to run all the impl ops */ + smmu->impl_ops = &tegra241_cmdqv_impl_ops; + return 0; + +free_lvcmdq: + for (lidx--; lidx >= 0; lidx--) + tegra241_vintf_free_lvcmdq(vintf, lidx); + tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); +free_vintf: + kfree(vintf); +out_fallback: + dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); + smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; + tegra241_cmdqv_remove(smmu); + return 0; +} + struct dentry *cmdqv_debugfs_dir; static struct arm_smmu_device * __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, int irq) { + static const struct arm_smmu_impl_ops init_ops = { + .init_structures = tegra241_cmdqv_init_structures, + .device_remove = tegra241_cmdqv_remove, + }; struct tegra241_cmdqv *cmdqv = NULL; struct arm_smmu_device *new_smmu; - struct tegra241_vintf *vintf; void __iomem *base; u32 regval; - int lidx; int ret; static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0); @@ -815,26 +862,6 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, ida_init(&cmdqv->vintf_ids); - vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); - if (!vintf) - goto destroy_ids; - - /* Init VINTF0 for in-kernel use */ - ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); - if (ret) { - dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); - goto free_vintf; - } - - /* Preallocate logical VCMDQs to VINTF0 */ - for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { - struct tegra241_vcmdq *vcmdq; - - vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); - if (IS_ERR(vcmdq)) - goto free_lvcmdq; - } - #ifdef CONFIG_IOMMU_DEBUGFS if (!cmdqv_debugfs_dir) { cmdqv_debugfs_dir = @@ -844,19 +871,11 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, } #endif - new_smmu->impl_ops = &tegra241_cmdqv_impl_ops; + /* Provide init-level ops only, until tegra241_cmdqv_init_structures */ + new_smmu->impl_ops = &init_ops; return new_smmu; -free_lvcmdq: - for (lidx--; lidx >= 0; lidx--) - tegra241_vintf_free_lvcmdq(vintf, lidx); - tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); -free_vintf: - kfree(vintf); -destroy_ids: - ida_destroy(&cmdqv->vintf_ids); - kfree(cmdqv->vintfs); free_irq: if (cmdqv->irq > 0) free_irq(cmdqv->irq, cmdqv); |