From 050966666047b5013fe44944cef9e9605bdf6cfe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 2 Sep 2022 02:50:12 +0100 Subject: alpha: lazy FPU switching On each context switch we save the FPU registers on stack of old process and restore FPU registers from the stack of new one. That allows us to avoid doing that each time we enter/leave the kernel mode; however, that can get suboptimal in some cases. For one thing, we don't need to bother saving anything for kernel threads. For another, if between entering and leaving the kernel a thread gives CPU up more than once, it will do useless work, saving the same values every time, only to discard the saved copy as soon as it returns from switch_to(). Alternative solution: * move the array we save into from switch_stack to thread_info * have a (thread-synchronous) flag set when we save them * have another flag set when they should be restored on return to userland. * do *NOT* save/restore them in do_switch_stack()/undo_switch_stack(). * restore on the exit to user mode if the restore flag had been set. Clear both flags. * on context switch, entry to fork/clone/vfork, before entry into do_signal() and on entry into straced syscall save the registers and set the 'saved' flag unless it had been already set. * on context switch set the 'restore' flag as well. * have copy_thread() set both flags for child, so the registers would be restored once the child returns to userland. * use the saved data in setup_sigcontext(); have restore_sigcontext() set both flags and copy from sigframe to save area. * teach ptrace to look for FPU registers in thread_info instead of switch_stack. * teach isolated accesses to FPU registers (rdfpcr, wrfpcr, etc.) to check the 'saved' flag (under preempt_disable()) and work with the save area if it's been set; if 'saved' flag is found upon write access, set 'restore' flag as well. Signed-off-by: Al Viro Signed-off-by: Matt Turner --- arch/alpha/include/asm/fpu.h | 61 ++++++++++++++++++++++-------------- arch/alpha/include/asm/thread_info.h | 16 ++++++++++ arch/alpha/include/uapi/asm/ptrace.h | 2 ++ 3 files changed, 55 insertions(+), 24 deletions(-) (limited to 'arch/alpha/include') diff --git a/arch/alpha/include/asm/fpu.h b/arch/alpha/include/asm/fpu.h index b9691405e56b..30b24135dd7a 100644 --- a/arch/alpha/include/asm/fpu.h +++ b/arch/alpha/include/asm/fpu.h @@ -15,21 +15,27 @@ rdfpcr(void) { unsigned long tmp, ret; + preempt_disable(); + if (current_thread_info()->status & TS_SAVED_FP) { + ret = current_thread_info()->fp[31]; + } else { #if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67) - __asm__ __volatile__ ( - "ftoit $f0,%0\n\t" - "mf_fpcr $f0\n\t" - "ftoit $f0,%1\n\t" - "itoft %0,$f0" - : "=r"(tmp), "=r"(ret)); + __asm__ __volatile__ ( + "ftoit $f0,%0\n\t" + "mf_fpcr $f0\n\t" + "ftoit $f0,%1\n\t" + "itoft %0,$f0" + : "=r"(tmp), "=r"(ret)); #else - __asm__ __volatile__ ( - "stt $f0,%0\n\t" - "mf_fpcr $f0\n\t" - "stt $f0,%1\n\t" - "ldt $f0,%0" - : "=m"(tmp), "=m"(ret)); + __asm__ __volatile__ ( + "stt $f0,%0\n\t" + "mf_fpcr $f0\n\t" + "stt $f0,%1\n\t" + "ldt $f0,%0" + : "=m"(tmp), "=m"(ret)); #endif + } + preempt_enable(); return ret; } @@ -39,21 +45,28 @@ wrfpcr(unsigned long val) { unsigned long tmp; + preempt_disable(); + if (current_thread_info()->status & TS_SAVED_FP) { + current_thread_info()->status |= TS_RESTORE_FP; + current_thread_info()->fp[31] = val; + } else { #if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67) - __asm__ __volatile__ ( - "ftoit $f0,%0\n\t" - "itoft %1,$f0\n\t" - "mt_fpcr $f0\n\t" - "itoft %0,$f0" - : "=&r"(tmp) : "r"(val)); + __asm__ __volatile__ ( + "ftoit $f0,%0\n\t" + "itoft %1,$f0\n\t" + "mt_fpcr $f0\n\t" + "itoft %0,$f0" + : "=&r"(tmp) : "r"(val)); #else - __asm__ __volatile__ ( - "stt $f0,%0\n\t" - "ldt $f0,%1\n\t" - "mt_fpcr $f0\n\t" - "ldt $f0,%0" - : "=m"(tmp) : "m"(val)); + __asm__ __volatile__ ( + "stt $f0,%0\n\t" + "ldt $f0,%1\n\t" + "mt_fpcr $f0\n\t" + "ldt $f0,%0" + : "=m"(tmp) : "m"(val)); #endif + } + preempt_enable(); } static inline unsigned long diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 354247dca161..4a4d00b37986 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -26,6 +26,7 @@ struct thread_info { int bpt_nsaved; unsigned long bpt_addr[2]; /* breakpoint handling */ unsigned int bpt_insn[2]; + unsigned long fp[32]; }; /* @@ -83,6 +84,9 @@ register unsigned long *current_stack_pointer __asm__ ("$30"); #define TS_UAC_NOFIX 0x0002 /* ! flags as they match */ #define TS_UAC_SIGBUS 0x0004 /* ! userspace part of 'osf_sysinfo' */ +#define TS_SAVED_FP 0x0008 +#define TS_RESTORE_FP 0x0010 + #define SET_UNALIGN_CTL(task,value) ({ \ __u32 status = task_thread_info(task)->status & ~UAC_BITMASK; \ if (value & PR_UNALIGN_NOPRINT) \ @@ -106,5 +110,17 @@ register unsigned long *current_stack_pointer __asm__ ("$30"); put_user(res, (int __user *)(value)); \ }) +#ifndef __ASSEMBLY__ +extern void __save_fpu(void); + +static inline void save_fpu(void) +{ + if (!(current_thread_info()->status & TS_SAVED_FP)) { + current_thread_info()->status |= TS_SAVED_FP; + __save_fpu(); + } +} +#endif + #endif /* __KERNEL__ */ #endif /* _ALPHA_THREAD_INFO_H */ diff --git a/arch/alpha/include/uapi/asm/ptrace.h b/arch/alpha/include/uapi/asm/ptrace.h index c29194181025..5ca45934fcbb 100644 --- a/arch/alpha/include/uapi/asm/ptrace.h +++ b/arch/alpha/include/uapi/asm/ptrace.h @@ -64,7 +64,9 @@ struct switch_stack { unsigned long r14; unsigned long r15; unsigned long r26; +#ifndef __KERNEL__ unsigned long fp[32]; /* fp[31] is fpcr */ +#endif }; -- cgit v1.2.3