diff options
author | Nadav Amit <namit@vmware.com> | 2021-02-21 00:17:08 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2021-03-06 12:59:10 +0100 |
commit | 2f4305b19fe6a2a261d76c21856c5598f7d878fe (patch) | |
tree | 061ebf3ef8372e4da41a4c113127381207bc1d0c /arch/x86/include/asm/tlbflush.h | |
parent | x86/mm/tlb: Flush remote and local TLBs concurrently (diff) | |
download | linux-2f4305b19fe6a2a261d76c21856c5598f7d878fe.tar.xz linux-2f4305b19fe6a2a261d76c21856c5598f7d878fe.zip |
x86/mm/tlb: Privatize cpu_tlbstate
cpu_tlbstate is mostly private and only the variable is_lazy is shared.
This causes some false-sharing when TLB flushes are performed.
Break cpu_tlbstate intro cpu_tlbstate and cpu_tlbstate_shared, and mark
each one accordingly.
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lore.kernel.org/r/20210220231712.2475218-6-namit@vmware.com
Diffstat (limited to 'arch/x86/include/asm/tlbflush.h')
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 39 |
1 files changed, 21 insertions, 18 deletions
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 3c6681def912..fa952eadbc2e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -90,23 +90,6 @@ struct tlb_state { u16 next_asid; /* - * We can be in one of several states: - * - * - Actively using an mm. Our CPU's bit will be set in - * mm_cpumask(loaded_mm) and is_lazy == false; - * - * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit - * will not be set in mm_cpumask(&init_mm) and is_lazy == false. - * - * - Lazily using a real mm. loaded_mm != &init_mm, our bit - * is set in mm_cpumask(loaded_mm), but is_lazy == true. - * We're heuristically guessing that the CR3 load we - * skipped more than makes up for the overhead added by - * lazy mode. - */ - bool is_lazy; - - /* * If set we changed the page tables in such a way that we * needed an invalidation of all contexts (aka. PCIDs / ASIDs). * This tells us to go invalidate all the non-loaded ctxs[] @@ -151,7 +134,27 @@ struct tlb_state { */ struct tlb_context ctxs[TLB_NR_DYN_ASIDS]; }; -DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); +DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate); + +struct tlb_state_shared { + /* + * We can be in one of several states: + * + * - Actively using an mm. Our CPU's bit will be set in + * mm_cpumask(loaded_mm) and is_lazy == false; + * + * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit + * will not be set in mm_cpumask(&init_mm) and is_lazy == false. + * + * - Lazily using a real mm. loaded_mm != &init_mm, our bit + * is set in mm_cpumask(loaded_mm), but is_lazy == true. + * We're heuristically guessing that the CR3 load we + * skipped more than makes up for the overhead added by + * lazy mode. + */ + bool is_lazy; +}; +DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); bool nmi_uaccess_okay(void); #define nmi_uaccess_okay nmi_uaccess_okay |