From 51115d4d45700fc7c08306f7ba6e68551f526ae5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:10 -0400 Subject: x86, fpu: Merge tolerant_fwait() Commit e2e75c91 merged the math exception handler, allowing both 32-bit and 64-bit to handle math exceptions from kernel mode. Switch to using the 64-bit version of tolerant_fwait() without fnclex, which simply ignores the exception if one is still pending from userspace. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-4-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index a73a8d5a5e69..5d8f9a79fa5d 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -77,15 +77,6 @@ static inline void sanitize_i387_state(struct task_struct *tsk) } #ifdef CONFIG_X86_64 - -/* Ignore delayed exceptions from user space */ -static inline void tolerant_fwait(void) -{ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); -} - static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { int err; @@ -220,11 +211,6 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft); static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif -static inline void tolerant_fwait(void) -{ - asm volatile("fnclex ; fwait"); -} - /* perform fxrstor iff the processor has extended states, otherwise frstor */ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { @@ -344,7 +330,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk) static inline void __clear_fpu(struct task_struct *tsk) { if (task_thread_info(tsk)->status & TS_USEDFPU) { - tolerant_fwait(); + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); task_thread_info(tsk)->status &= ~TS_USEDFPU; stts(); } -- cgit v1.2.3 From bfd946cb891800d408decaae268a3480775178a3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:11 -0400 Subject: x86, fpu: Merge __save_init_fpu() __save_init_fpu() is identical for 32-bit and 64-bit. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-5-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 5d8f9a79fa5d..88065e3a03c0 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -197,12 +197,6 @@ static inline void fpu_save_init(struct fpu *fpu) fpu_clear(fpu); } -static inline void __save_init_fpu(struct task_struct *tsk) -{ - fpu_save_init(&tsk->thread.fpu); - task_thread_info(tsk)->status &= ~TS_USEDFPU; -} - #else /* CONFIG_X86_32 */ #ifdef CONFIG_MATH_EMULATION @@ -285,15 +279,14 @@ end: ; } +#endif /* CONFIG_X86_64 */ + static inline void __save_init_fpu(struct task_struct *tsk) { fpu_save_init(&tsk->thread.fpu); task_thread_info(tsk)->status &= ~TS_USEDFPU; } - -#endif /* CONFIG_X86_64 */ - static inline int fpu_fxrstor_checking(struct fpu *fpu) { return fxrstor_checking(&fpu->state->fxsave); -- cgit v1.2.3 From a4d4fbc7735bba6654b20f859135f9d3f8fe7f76 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:12 -0400 Subject: x86-64, fpu: Disable preemption when using TS_USEDFPU Consolidates code and fixes the below race for 64-bit. commit 9fa2f37bfeb798728241cc4a19578ce6e4258f25 Author: torvalds Date: Tue Sep 2 07:37:25 2003 +0000 Be a lot more careful about TS_USEDFPU and preemption We had some races where we testecd (or set) TS_USEDFPU together with sequences that depended on the setting (like clearing or setting the TS flag in %cr0) and we could be preempted in between, which screws up the FPU state, since preemption will itself change USEDFPU and the TS flag. This makes it a lot more explicit: the "internal" low-level FPU functions ("__xxxx_fpu()") all require preemption to be disabled, and the exported "real" functions will make sure that is the case. One case - in __switch_to() - was switched to the non-preempt-safe internal version, since the scheduler itself has already disabled preemption. BKrev: 3f5448b5WRiQuyzAlbajs3qoQjSobw Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-6-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 15 --------------- arch/x86/kernel/process_64.c | 2 +- 2 files changed, 1 insertion(+), 16 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 88065e3a03c0..8b40a8379cc2 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -387,19 +387,6 @@ static inline void irq_ts_restore(int TS_state) stts(); } -#ifdef CONFIG_X86_64 - -static inline void save_init_fpu(struct task_struct *tsk) -{ - __save_init_fpu(tsk); - stts(); -} - -#define unlazy_fpu __unlazy_fpu -#define clear_fpu __clear_fpu - -#else /* CONFIG_X86_32 */ - /* * These disable preemption on their own and are safe */ @@ -425,8 +412,6 @@ static inline void clear_fpu(struct task_struct *tsk) preempt_enable(); } -#endif /* CONFIG_X86_64 */ - /* * i387 state interaction */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3d9ea531ddd1..b3d7a3a04f38 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -424,7 +424,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) load_TLS(next, cpu); /* Must be after DS reload */ - unlazy_fpu(prev_p); + __unlazy_fpu(prev_p); /* Make sure cpu is ready for new context */ if (preload_fpu) -- cgit v1.2.3 From 820241356d6aa9a895fc10def15794a5a5bfcd98 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:14 -0400 Subject: x86-64, fpu: Simplify constraints for fxsave/fxtstor Use the "R" constraint (legacy register) instead of listing all the possible registers. Clean up the comments as well. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-8-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 8b40a8379cc2..768fcb25900a 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -81,6 +81,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { int err; + /* See comment in fxsave() below. */ asm volatile("1: rex64/fxrstor (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -89,11 +90,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err) -#if 0 /* See comment in fxsave() below. */ - : [fx] "r" (fx), "m" (*fx), "0" (0)); -#else - : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); -#endif + : [fx] "R" (fx), "m" (*fx), "0" (0)); return err; } @@ -140,6 +137,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) if (unlikely(err)) return -EFAULT; + /* See comment in fxsave() below. */ asm volatile("1: rex64/fxsave (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -148,11 +146,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err), "=m" (*fx) -#if 0 /* See comment in fxsave() below. */ - : [fx] "r" (fx), "0" (0)); -#else - : [fx] "cdaSDb" (fx), "0" (0)); -#endif + : [fx] "R" (fx), "0" (0)); if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct))) err = -EFAULT; @@ -165,26 +159,22 @@ static inline void fpu_fxsave(struct fpu *fpu) /* Using "rex64; fxsave %0" is broken because, if the memory operand uses any extended registers for addressing, a second REX prefix will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ -#if 0 - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (fpu->state->fxsave)); -#elif 0 - /* Using, as a workaround, the properly prefixed form below isn't + is a separate instruction), and hence the 64-bitness is lost. + Using "fxsaveq %0" would be the ideal choice, but is only supported + starting with gas 2.16. + asm volatile("fxsaveq %0" + : "=m" (fpu->state->fxsave)); + Using, as a workaround, the properly prefixed form below isn't accepted by any binutils version so far released, complaining that the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). */ - __asm__ __volatile__("rex64/fxsave %0" - : "=m" (fpu->state->fxsave)); -#else - /* This, however, we can work around by forcing the compiler to select + needed for addressing (fix submitted to mainline 2005-11-21). + asm volatile("rex64/fxsave %0" + : "=m" (fpu->state->fxsave)); + This, however, we can work around by forcing the compiler to select an addressing mode that doesn't require extended registers. */ - __asm__ __volatile__("rex64/fxsave (%1)" - : "=m" (fpu->state->fxsave) - : "cdaSDb" (&fpu->state->fxsave)); -#endif + asm volatile("rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); } static inline void fpu_save_init(struct fpu *fpu) -- cgit v1.2.3 From 8eb91a577d7763d21628f6761045328784b1911c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:16 -0400 Subject: x86, fpu: Remove unnecessary ifdefs from i387 code. Remove ifdefs for code that the compiler can optimize away on 64-bit. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-10-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 12 ++++++------ arch/x86/kernel/i387.c | 4 ---- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 768fcb25900a..42b507e16d1d 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf); extern int restore_i387_xstate_ia32(void __user *buf); #endif +#ifdef CONFIG_MATH_EMULATION +extern void finit_soft_fpu(struct i387_soft_struct *soft); +#else +static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} +#endif + #define X87_FSW_ES (1 << 7) /* Exception Summary */ static __always_inline __pure bool use_xsaveopt(void) @@ -189,12 +195,6 @@ static inline void fpu_save_init(struct fpu *fpu) #else /* CONFIG_X86_32 */ -#ifdef CONFIG_MATH_EMULATION -extern void finit_soft_fpu(struct i387_soft_struct *soft); -#else -static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} -#endif - /* perform fxrstor iff the processor has extended states, otherwise frstor */ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f3775f5ef4ab..e795e360bd50 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -79,10 +79,8 @@ static void __cpuinit init_thread_xstate(void) if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); -#ifdef CONFIG_X86_32 else xstate_size = sizeof(struct i387_fsave_struct); -#endif } /* @@ -119,12 +117,10 @@ void __cpuinit fpu_init(void) void fpu_finit(struct fpu *fpu) { -#ifdef CONFIG_X86_32 if (!HAVE_HWFP) { finit_soft_fpu(&fpu->state->soft); return; } -#endif if (cpu_has_fxsr) { struct i387_fxsave_struct *fx = &fpu->state->fxsave; -- cgit v1.2.3 From eec73f813ab0954253e5e2168119c4555f83f07d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:17 -0400 Subject: x86, fpu: Remove PSHUFB_XMM5_* macros The PSHUFB_XMM5_* macros are no longer used. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-11-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 42b507e16d1d..907967e4fa8d 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -465,7 +465,4 @@ extern void fpu_finit(struct fpu *fpu); #endif /* __ASSEMBLY__ */ -#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 -#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 - #endif /* _ASM_X86_I387_H */ -- cgit v1.2.3 From 58a992b9cbaf449aeebd3575c3695a9eb5d95b5e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:18 -0400 Subject: x86-32, fpu: Rewrite fpu_save_init() Rewrite fpu_save_init() to prepare for merging with 64-bit. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-12-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 47 +++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 907967e4fa8d..b45abefb89f2 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -73,6 +73,11 @@ static __always_inline __pure bool use_xsave(void) return static_cpu_has(X86_FEATURE_XSAVE); } +static __always_inline __pure bool use_fxsr(void) +{ + return static_cpu_has(X86_FEATURE_FXSR); +} + extern void __sanitize_i387_state(struct task_struct *); static inline void sanitize_i387_state(struct task_struct *tsk) @@ -211,6 +216,12 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return 0; } +static inline void fpu_fxsave(struct fpu *fpu) +{ + asm volatile("fxsave %[fx]" + : [fx] "=m" (fpu->state->fxsave)); +} + /* We need a safe address that is cheap to find and that is already in L1 during context switch. The best choices are unfortunately different for UP and SMP */ @@ -226,36 +237,24 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) static inline void fpu_save_init(struct fpu *fpu) { if (use_xsave()) { - struct xsave_struct *xstate = &fpu->state->xsave; - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - fpu_xsave(fpu); /* * xsave header may indicate the init state of the FP. */ - if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) - goto end; - - if (unlikely(fx->swd & X87_FSW_ES)) - asm volatile("fnclex"); - - /* - * we can do a simple return here or be paranoid :) - */ - goto clear_state; + if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) + return; + } else if (use_fxsr()) { + fpu_fxsave(fpu); + } else { + asm volatile("fsave %[fx]; fwait" + : [fx] "=m" (fpu->state->fsave)); + return; } - /* Use more nops than strictly needed in case the compiler - varies code */ - alternative_input( - "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4, - "fxsave %[fx]\n" - "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", - X86_FEATURE_FXSR, - [fx] "m" (fpu->state->fxsave), - [fsw] "m" (fpu->state->fxsave.swd) : "memory"); -clear_state: + if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) + asm volatile("fnclex"); + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ @@ -265,8 +264,6 @@ clear_state: "fildl %[addr]", /* set F?P to defined value */ X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); -end: - ; } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3 From b2b57fe053c9cf8b8af5a0e826a465996afed0ff Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:19 -0400 Subject: x86, fpu: Merge fpu_save_init() Make 64-bit use the 32-bit version of fpu_save_init(). Remove unused clear_fpu_state(). Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-13-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 48 ++++----------------------------------------- 1 file changed, 4 insertions(+), 44 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index b45abefb89f2..70626ed96cb5 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -105,36 +105,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return err; } -/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. The kernel data segment can be sometimes 0 and sometimes - new user value. Both should be ok. - Use the PDA as safe address because it should be already in L1. */ -static inline void fpu_clear(struct fpu *fpu) -{ - struct xsave_struct *xstate = &fpu->state->xsave; - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - - /* - * xsave header may indicate the init state of the FP. - */ - if (use_xsave() && - !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) - return; - - if (unlikely(fx->swd & X87_FSW_ES)) - asm volatile("fnclex"); - alternative_input(ASM_NOP8 ASM_NOP2, - " emms\n" /* clear stack tags */ - " fildl %%gs:0", /* load to clear state */ - X86_FEATURE_FXSAVE_LEAK); -} - -static inline void clear_fpu_state(struct task_struct *tsk) -{ - fpu_clear(&tsk->thread.fpu); -} - static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; @@ -188,16 +158,6 @@ static inline void fpu_fxsave(struct fpu *fpu) : [fx] "R" (&fpu->state->fxsave)); } -static inline void fpu_save_init(struct fpu *fpu) -{ - if (use_xsave()) - fpu_xsave(fpu); - else - fpu_fxsave(fpu); - - fpu_clear(fpu); -} - #else /* CONFIG_X86_32 */ /* perform fxrstor iff the processor has extended states, otherwise frstor */ @@ -222,6 +182,8 @@ static inline void fpu_fxsave(struct fpu *fpu) : [fx] "=m" (fpu->state->fxsave)); } +#endif /* CONFIG_X86_64 */ + /* We need a safe address that is cheap to find and that is already in L1 during context switch. The best choices are unfortunately different for UP and SMP */ @@ -259,15 +221,13 @@ static inline void fpu_save_init(struct fpu *fpu) is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ alternative_input( - GENERIC_NOP8 GENERIC_NOP2, + ASM_NOP8 ASM_NOP2, "emms\n\t" /* clear stack tags */ - "fildl %[addr]", /* set F?P to defined value */ + "fildl %P[addr]", /* set F?P to defined value */ X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); } -#endif /* CONFIG_X86_64 */ - static inline void __save_init_fpu(struct task_struct *tsk) { fpu_save_init(&tsk->thread.fpu); -- cgit v1.2.3