summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlistair Popple <apopple@nvidia.com>2023-07-25 15:42:07 +0200
committerAndrew Morton <akpm@linux-foundation.org>2023-08-18 19:12:41 +0200
commit1af5a8109904b7f00828e7f9f63f5695b42f8215 (patch)
tree2f37b095fb84abe7f1429c6714182f8d6b7bee1c
parentmmu_notifiers: don't invalidate secondary TLBs as part of mmu_notifier_invali... (diff)
downloadlinux-1af5a8109904b7f00828e7f9f63f5695b42f8215.tar.xz
linux-1af5a8109904b7f00828e7f9f63f5695b42f8215.zip
mmu_notifiers: rename invalidate_range notifier
There are two main use cases for mmu notifiers. One is by KVM which uses mmu_notifier_invalidate_range_start()/end() to manage a software TLB. The other is to manage hardware TLBs which need to use the invalidate_range() callback because HW can establish new TLB entries at any time. Hence using start/end() can lead to memory corruption as these callbacks happen too soon/late during page unmap. mmu notifier users should therefore either use the start()/end() callbacks or the invalidate_range() callbacks. To make this usage clearer rename the invalidate_range() callback to arch_invalidate_secondary_tlbs() and update documention. Link: https://lkml.kernel.org/r/6f77248cd25545c8020a54b4e567e8b72be4dca1.1690292440.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple <apopple@nvidia.com> Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Cc: Andrew Donnellan <ajd@linux.ibm.com> Cc: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com> Cc: Frederic Barrat <fbarrat@linux.ibm.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Kevin Tian <kevin.tian@intel.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Nicolin Chen <nicolinc@nvidia.com> Cc: Robin Murphy <robin.murphy@arm.com> Cc: Sean Christopherson <seanjc@google.com> Cc: SeongJae Park <sj@kernel.org> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Will Deacon <will@kernel.org> Cc: Zhi Wang <zhi.wang.linux@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--arch/arm64/include/asm/tlbflush.h6
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c2
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c8
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--drivers/iommu/amd/iommu_v2.c10
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c13
-rw-r--r--drivers/iommu/intel/svm.c8
-rw-r--r--drivers/misc/ocxl/link.c8
-rw-r--r--include/linux/mmu_notifier.h48
-rw-r--r--mm/huge_memory.c4
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/mmu_notifier.c21
13 files changed, 76 insertions, 63 deletions
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index a99349d10c0e..84a05a0bd2b6 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -253,7 +253,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
dsb(ish);
- mmu_notifier_invalidate_range(mm, 0, -1UL);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
@@ -265,7 +265,7 @@ static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
addr = __TLBI_VADDR(uaddr, ASID(mm));
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
- mmu_notifier_invalidate_range(mm, uaddr & PAGE_MASK,
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK,
(uaddr & PAGE_MASK) + PAGE_SIZE);
}
@@ -400,7 +400,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
scale++;
}
dsb(ish);
- mmu_notifier_invalidate_range(vma->vm_mm, start, end);
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index f3fb49fd32fe..17075c78d4bc 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -39,7 +39,7 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long st
radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize);
else
radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
- mmu_notifier_invalidate_range(vma->vm_mm, start, end);
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
}
void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 4d44902a4962..06e647ef19d1 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -987,7 +987,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
}
}
preempt_enable();
- mmu_notifier_invalidate_range(mm, 0, -1UL);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
EXPORT_SYMBOL(radix__flush_tlb_mm);
@@ -1021,7 +1021,7 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
}
preempt_enable();
- mmu_notifier_invalidate_range(mm, 0, -1UL);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
void radix__flush_all_mm(struct mm_struct *mm)
@@ -1230,7 +1230,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
}
out:
preempt_enable();
- mmu_notifier_invalidate_range(mm, start, end);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -1395,7 +1395,7 @@ static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
}
out:
preempt_enable();
- mmu_notifier_invalidate_range(mm, start, end);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0a5432364c5a..6ab42caaa67a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -283,7 +283,7 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
{
inc_mm_tlb_gen(mm);
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
- mmu_notifier_invalidate_range(mm, 0, -1UL);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 93b2f81f09da..2d253919b3e8 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1037,7 +1037,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
put_flush_tlb_info();
put_cpu();
- mmu_notifier_invalidate_range(mm, start, end);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 261352a23271..2596466cd5a6 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -355,9 +355,9 @@ static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
return container_of(mn, struct pasid_state, mn);
}
-static void mn_invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
struct pasid_state *pasid_state;
struct device_state *dev_state;
@@ -391,8 +391,8 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
}
static const struct mmu_notifier_ops iommu_mn = {
- .release = mn_release,
- .invalidate_range = mn_invalidate_range,
+ .release = mn_release,
+ .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs,
};
static void set_pri_tag_status(struct pasid_state *pasid_state,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 2a19784b698e..dbc812a0e57e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -186,9 +186,10 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd)
}
}
-static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
{
struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
@@ -247,9 +248,9 @@ static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn)
}
static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = {
- .invalidate_range = arm_smmu_mm_invalidate_range,
- .release = arm_smmu_mm_release,
- .free_notifier = arm_smmu_mmu_notifier_free,
+ .arch_invalidate_secondary_tlbs = arm_smmu_mm_arch_invalidate_secondary_tlbs,
+ .release = arm_smmu_mm_release,
+ .free_notifier = arm_smmu_mmu_notifier_free,
};
/* Allocate or get existing MMU notifier for this {domain, mm} pair */
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index e95b339e9cdc..8f6d68006ab6 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -219,9 +219,9 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
}
/* Pages have been freed at this point */
-static void intel_invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
@@ -256,7 +256,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
static const struct mmu_notifier_ops intel_mmuops = {
.release = intel_mm_release,
- .invalidate_range = intel_invalidate_range,
+ .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
};
static DEFINE_MUTEX(pasid_mutex);
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index 4cf4c55a5f00..c06c699c0e7b 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -491,9 +491,9 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle)
}
EXPORT_SYMBOL_GPL(ocxl_link_release);
-static void invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static void arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier);
struct ocxl_link *link = pe_data->link;
@@ -509,7 +509,7 @@ static void invalidate_range(struct mmu_notifier *mn,
}
static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = {
- .invalidate_range = invalidate_range,
+ .arch_invalidate_secondary_tlbs = arch_invalidate_secondary_tlbs,
};
static u64 calculate_cfg_state(bool kernel)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index f2e9edc6aa43..6e3c857606f1 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -187,27 +187,27 @@ struct mmu_notifier_ops {
const struct mmu_notifier_range *range);
/*
- * invalidate_range() is either called between
- * invalidate_range_start() and invalidate_range_end() when the
- * VM has to free pages that where unmapped, but before the
- * pages are actually freed, or outside of _start()/_end() when
- * a (remote) TLB is necessary.
+ * arch_invalidate_secondary_tlbs() is used to manage a non-CPU TLB
+ * which shares page-tables with the CPU. The
+ * invalidate_range_start()/end() callbacks should not be implemented as
+ * invalidate_secondary_tlbs() already catches the points in time when
+ * an external TLB needs to be flushed.
*
- * If invalidate_range() is used to manage a non-CPU TLB with
- * shared page-tables, it not necessary to implement the
- * invalidate_range_start()/end() notifiers, as
- * invalidate_range() already catches the points in time when an
- * external TLB range needs to be flushed. For more in depth
- * discussion on this see Documentation/mm/mmu_notifier.rst
+ * This requires arch_invalidate_secondary_tlbs() to be called while
+ * holding the ptl spin-lock and therefore this callback is not allowed
+ * to sleep.
*
- * Note that this function might be called with just a sub-range
- * of what was passed to invalidate_range_start()/end(), if
- * called between those functions.
+ * This is called by architecture code whenever invalidating a TLB
+ * entry. It is assumed that any secondary TLB has the same rules for
+ * when invalidations are required. If this is not the case architecture
+ * code will need to call this explicitly when required for secondary
+ * TLB invalidation.
*/
- void (*invalidate_range)(struct mmu_notifier *subscription,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end);
+ void (*arch_invalidate_secondary_tlbs)(
+ struct mmu_notifier *subscription,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end);
/*
* These callbacks are used with the get/put interface to manage the
@@ -396,8 +396,8 @@ extern void __mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte);
extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r);
extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r);
-extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
- unsigned long start, unsigned long end);
+extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
+ unsigned long start, unsigned long end);
extern bool
mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range);
@@ -483,11 +483,11 @@ mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
__mmu_notifier_invalidate_range_end(range);
}
-static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
if (mm_has_notifiers(mm))
- __mmu_notifier_invalidate_range(mm, start, end);
+ __mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm)
@@ -664,7 +664,7 @@ void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
{
}
-static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
+static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3ece117de898..e0420de0e2e0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2120,8 +2120,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
- * mmu_notifier_invalidate_range() see comments below inside
- * __split_huge_pmd() ?
+ * mmu_notifier_arch_invalidate_secondary_tlbs() see comments below
+ * inside __split_huge_pmd() ?
*
* We are going from a zero huge page write protected to zero
* small page also write protected so it does not seems useful
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4672752b0b17..5ef7bccda50c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6649,8 +6649,9 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
else
flush_hugetlb_tlb_range(vma, start, end);
/*
- * No need to call mmu_notifier_invalidate_range() we are downgrading
- * page table protection not changing it to point to a new page.
+ * No need to call mmu_notifier_arch_invalidate_secondary_tlbs() we are
+ * downgrading page table protection not changing it to point to a new
+ * page.
*
* See Documentation/mm/mmu_notifier.rst
*/
@@ -7294,7 +7295,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
i_mmap_unlock_write(vma->vm_file->f_mapping);
hugetlb_vma_unlock_write(vma);
/*
- * No need to call mmu_notifier_invalidate_range(), see
+ * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
* Documentation/mm/mmu_notifier.rst.
*/
mmu_notifier_invalidate_range_end(&range);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 453a156d93c0..ec3b068cbbe6 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -585,8 +585,8 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
lock_map_release(&__mmu_notifier_invalidate_range_start_map);
}
-void __mmu_notifier_invalidate_range(struct mm_struct *mm,
- unsigned long start, unsigned long end)
+void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
struct mmu_notifier *subscription;
int id;
@@ -595,9 +595,10 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
hlist_for_each_entry_rcu(subscription,
&mm->notifier_subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) {
- if (subscription->ops->invalidate_range)
- subscription->ops->invalidate_range(subscription, mm,
- start, end);
+ if (subscription->ops->arch_invalidate_secondary_tlbs)
+ subscription->ops->arch_invalidate_secondary_tlbs(
+ subscription, mm,
+ start, end);
}
srcu_read_unlock(&srcu, id);
}
@@ -616,6 +617,16 @@ int __mmu_notifier_register(struct mmu_notifier *subscription,
mmap_assert_write_locked(mm);
BUG_ON(atomic_read(&mm->mm_users) <= 0);
+ /*
+ * Subsystems should only register for invalidate_secondary_tlbs() or
+ * invalidate_range_start()/end() callbacks, not both.
+ */
+ if (WARN_ON_ONCE(subscription &&
+ (subscription->ops->arch_invalidate_secondary_tlbs &&
+ (subscription->ops->invalidate_range_start ||
+ subscription->ops->invalidate_range_end))))
+ return -EINVAL;
+
if (!mm->notifier_subscriptions) {
/*
* kmalloc cannot be called under mm_take_all_locks(), but we