diff options
author | Andy Polyakov <appro@openssl.org> | 2005-05-03 23:05:06 +0200 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2005-05-03 23:05:06 +0200 |
commit | cee73df3bd6dfad96f686b766cc8900f432d23b4 (patch) | |
tree | 12730772b34946d023a10dd052c5990ec4f9c9a2 | |
parent | remove false positive (diff) | |
download | openssl-cee73df3bd6dfad96f686b766cc8900f432d23b4.tar.xz openssl-cee73df3bd6dfad96f686b766cc8900f432d23b4.zip |
Cpuid modules updates.
-rw-r--r-- | crypto/ia64cpuid.S | 116 | ||||
-rw-r--r-- | crypto/sparccpuid.S | 239 | ||||
-rw-r--r-- | crypto/x86cpuid.pl | 78 |
3 files changed, 431 insertions, 2 deletions
diff --git a/crypto/ia64cpuid.S b/crypto/ia64cpuid.S index a800527a58..04fbb3439e 100644 --- a/crypto/ia64cpuid.S +++ b/crypto/ia64cpuid.S @@ -4,6 +4,118 @@ .global OPENSSL_rdtsc# .proc OPENSSL_rdtsc# OPENSSL_rdtsc: - mov r8=ar.itc - br.ret b0 +{ .mib; mov r8=ar.itc + br.ret.sptk.many b0 };; .endp OPENSSL_rdtsc# + +.global OPENSSL_atomic_add# +.proc OPENSSL_atomic_add# +.align 32 +OPENSSL_atomic_add: +{ .mii; ld4 r2=[r32] + nop.i 0 + nop.i 0 };; +.Lspin: +{ .mii; mov ar.ccv=r2 + add r8=r2,r33 + mov r3=r2 };; +{ .mmi; mf + cmpxchg4.acq r2=[r32],r8,ar.ccv + nop.i 0 };; +{ .mib; cmp.ne p6,p0=r2,r3 + nop.i 0 +(p6) br.dpnt .Lspin };; +{ .mib; nop.m 0 + sxt4 r8=r8 + br.ret.sptk.many b0 };; +.endp OPENSSL_atomic_add# + +// Returns a structure comprising pointer to the top of stack of +// the caller and pointer beyond backing storage for the current +// register frame. The latter is required, because it might be +// insufficient to wipe backing storage for the current frame +// (as this procedure does), one might have to go further, toward +// higher addresses to reach for whole "retroactively" saved +// context... +.global OPENSSL_wipe_cpu# +.proc OPENSSL_wipe_cpu# +.align 32 +OPENSSL_wipe_cpu: + .prologue + .fframe 0 + .save ar.pfs,r2 + .save ar.lc,r3 +{ .mib; alloc r2=ar.pfs,0,96,0,96 + mov r3=ar.lc + brp.loop.imp .L_wipe_top,.L_wipe_end-16 + };; +{ .mii; mov r9=ar.bsp + mov r8=pr + mov ar.lc=96 };; + .body +{ .mii; add r9=96*8-8,r9 + mov ar.ec=1 };; + +// One can sweep double as fast, but then we can't quarantee +// that backing storage is wiped... +.L_wipe_top: +{ .mfi; st8 [r9]=r0,-8 + mov f127=f0 + mov r127=r0 } +{ .mfb; nop.m 0 + nop.f 0 + br.ctop.sptk .L_wipe_top };; +.L_wipe_end: + +{ .mfi; mov r11=r0 + mov f6=f0 + mov r14=r0 } +{ .mfi; mov r15=r0 + mov f7=f0 + mov r16=r0 } +{ .mfi; mov r17=r0 + mov f8=f0 + mov r18=r0 } +{ .mfi; mov r19=r0 + mov f9=f0 + mov r20=r0 } +{ .mfi; mov r21=r0 + mov f10=f0 + mov r22=r0 } +{ .mfi; mov r23=r0 + mov f11=f0 + mov r24=r0 } +{ .mfi; mov r25=r0 + mov f12=f0 + mov r26=r0 } +{ .mfi; mov r27=r0 + mov f13=f0 + mov r28=r0 } +{ .mfi; mov r29=r0 + mov f14=f0 + mov r30=r0 } +{ .mfi; mov r31=r0 + mov f15=f0 + nop.i 0 } +{ .mfi; mov f16=f0 } +{ .mfi; mov f17=f0 } +{ .mfi; mov f18=f0 } +{ .mfi; mov f19=f0 } +{ .mfi; mov f20=f0 } +{ .mfi; mov f21=f0 } +{ .mfi; mov f22=f0 } +{ .mfi; mov f23=f0 } +{ .mfi; mov f24=f0 } +{ .mfi; mov f25=f0 } +{ .mfi; mov f26=f0 } +{ .mfi; mov f27=f0 } +{ .mfi; mov f28=f0 } +{ .mfi; mov f29=f0 } +{ .mfi; mov f30=f0 } +{ .mfi; add r9=96*8+8,r9 + mov f31=f0 + mov pr=r8,0x1ffff } +{ .mib; mov r8=sp + mov ar.lc=r3 + br.ret.sptk b0 };; +.endp OPENSSL_wipe_cpu# diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S new file mode 100644 index 0000000000..c17350fc89 --- /dev/null +++ b/crypto/sparccpuid.S @@ -0,0 +1,239 @@ +#if defined(__SUNPRO_C) && defined(__sparcv9) +# define ABI64 /* They've said -xarch=v9 at command line */ +#elif defined(__GNUC__) && defined(__arch64__) +# define ABI64 /* They've said -m64 at command line */ +#endif + +#ifdef ABI64 + .register %g2,#scratch + .register %g3,#scratch +# define FRAME -192 +# define BIAS 2047 +#else +# define FRAME -96 +# define BIAS 0 +#endif + +.text +.align 32 +.global OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,#function +! Keep in mind that this does not excuse us from wiping the stack! +! This routine wipes registers, but not the backing store [which +! resides on the stack, toward lower addresses]. To facilitate for +! stack wiping I return pointer to the top of stack of the *caller*. +OPENSSL_wipe_cpu: + save %sp,FRAME,%sp + nop +#ifdef __sun +#include <sys/trap.h> + ta ST_CLEAN_WINDOWS +#else + call .walk.reg.wins +#endif + nop + call .PIC.zero.up + mov .zero-(.-4),%o0 + ldd [%o0],%f0 + + subcc %g0,1,%o0 + ! Following is V9 "rd %ccr,%o0" instruction. However! V8 + ! specification says that it ("rd %asr2,%o0" in V8 terms) does + ! not cause illegal_instruction trap. It therefore can be used + ! to determine if the CPU the code is executing on is V8- or + ! V9-compliant, as V9 returns a distinct value of 0x99, + ! "negative" and "borrow" bits set in both %icc and %xcc. + .word 0x91408000 !rd %ccr,%o0 + cmp %o0,0x99 + bne .v8 + nop + ! Even though we do not use %fp register bank, + ! we wipe it as memcpy might have used it... + .word 0xbfa00040 !fmovd %f0,%f62 + .word 0xbba00040 !... + .word 0xb7a00040 + .word 0xb3a00040 + .word 0xafa00040 + .word 0xaba00040 + .word 0xa7a00040 + .word 0xa3a00040 + .word 0x9fa00040 + .word 0x9ba00040 + .word 0x97a00040 + .word 0x93a00040 + .word 0x8fa00040 + .word 0x8ba00040 + .word 0x87a00040 + .word 0x83a00040 !fmovd %f0,%f32 +.v8: fmovs %f1,%f31 + clr %o0 + fmovs %f0,%f30 + clr %o1 + fmovs %f1,%f29 + clr %o2 + fmovs %f0,%f28 + clr %o3 + fmovs %f1,%f27 + clr %o4 + fmovs %f0,%f26 + clr %o5 + fmovs %f1,%f25 + clr %o7 + fmovs %f0,%f24 + clr %l0 + fmovs %f1,%f23 + clr %l1 + fmovs %f0,%f22 + clr %l2 + fmovs %f1,%f21 + clr %l3 + fmovs %f0,%f20 + clr %l4 + fmovs %f1,%f19 + clr %l5 + fmovs %f0,%f18 + clr %l6 + fmovs %f1,%f17 + clr %l7 + fmovs %f0,%f16 + clr %i0 + fmovs %f1,%f15 + clr %i1 + fmovs %f0,%f14 + clr %i2 + fmovs %f1,%f13 + clr %i3 + fmovs %f0,%f12 + clr %i4 + fmovs %f1,%f11 + clr %i5 + fmovs %f0,%f10 + clr %g1 + fmovs %f1,%f9 + clr %g2 + fmovs %f0,%f8 + clr %g3 + fmovs %f1,%f7 + clr %g4 + fmovs %f0,%f6 + clr %g5 + fmovs %f1,%f5 + fmovs %f0,%f4 + fmovs %f1,%f3 + fmovs %f0,%f2 + + add %fp,BIAS,%i0 ! return pointer to callerīs top of stack + + ret + restore + +.zero: .long 0x0,0x0 +.PIC.zero.up: + retl + add %o0,%o7,%o0 +#ifdef DEBUG +.global walk_reg_wins +.type walk_reg_wins,#function +walk_reg_wins: +#endif +.walk.reg.wins: + save %sp,FRAME,%sp + cmp %i7,%o7 + be 2f + clr %o0 + cmp %o7,0 ! compiler never cleans %o7... + be 1f ! could have been a leaf function... + clr %o1 + call .walk.reg.wins + nop +1: clr %o2 + clr %o3 + clr %o4 + clr %o5 + clr %o7 + clr %l0 + clr %l1 + clr %l2 + clr %l3 + clr %l4 + clr %l5 + clr %l6 + clr %l7 + add %o0,1,%i0 ! used for debugging +2: ret + restore +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu + +.global OPENSSL_atomic_add +.type OPENSSL_atomic_add,#function +OPENSSL_atomic_add: +#ifndef ABI64 + subcc %g0,1,%o2 + .word 0x95408000 !rd %ccr,%o2, see comment above + cmp %o2,0x99 + be .v9 + nop + save %sp,FRAME,%sp + ba .enter + nop +#ifdef __sun +! Note that you don't have to link with libthread to call thr_yield, +! as libc provides a stub, which is overloaded the moment you link +! with *either* libpthread or libthread... +#define YIELD_CPU thr_yield +#else +! applies at least to Linux and FreeBSD... Feedback expected... +#define YIELD_CPU sched_yield +#endif +.spin: call YIELD_CPU + nop +.enter: ld [%i0],%i2 + cmp %i2,-4096 + be .spin + mov -1,%i2 + swap [%i0],%i2 + cmp %i2,-1 + be .spin + add %i2,%i1,%i2 + stbar + st %i2,[%i0] + sra %i2,%g0,%i0 + ret + restore +.v9: +#endif + ld [%o0],%o2 +1: add %o1,%o2,%o3 + .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 + cmp %o2,%o3 + bne 1b + mov %o3,%o2 ! cas is always fetching to dest. register + add %o1,%o2,%o0 ! OpenSSL expects the new value + retl + sra %o0,%g0,%o0 ! we return signed int, remember? +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add + +.global OPENSSL_rdtsc + subcc %g0,1,%o0 + .word 0x91408000 !rd %ccr,%o0 + cmp %o0,0x99 + bne .notsc + xor %o0,%o0,%o0 + save %sp,FRAME-16,%sp + mov 513,%o0 !SI_PLATFORM + add %sp,BIAS+16,%o1 + call sysinfo + mov 256,%o2 + + add %sp,BIAS-16,%o1 + ld [%o1],%l0 + ld [%o1+4],%l1 + ld [%o1+8],%l2 + mov %lo('SUNW'),%l3 + ret + restore +.notsc: + retl + nop +.type OPENSSL_rdtsc,#function +.size OPENSSL_rdtsc,.-OPENSSL_atomic_add diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index 894c49c0a3..9ad9435ffd 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -72,6 +72,84 @@ require "x86asm.pl"; &ret (); &function_end_B("OPENSSL_instrument_halt"); +# Essentially there is only one use for this function. Under DJGPP: +# +# #include <go32.h> +# ... +# i=OPENSSL_far_spin(_dos_ds,0x46c); +# ... +# to obtain the number of spins till closest timer interrupt. + +&function_begin_B("OPENSSL_far_spin"); + &pushf (); + &pop ("eax") + &bt ("eax",9); + &jnc (&label("nospin")); # interrupts are disabled + + &mov ("eax",&DWP(4,"esp")); + &mov ("ecx",&DWP(8,"esp")); + &data_word (0x90d88e1e); # push %ds, mov %eax,%ds + &xor ("eax","eax"); + &mov ("edx",&DWP(0,"ecx")); + &jmp (&label("spin")); + + &align (16); +&set_label("spin"); + &inc ("eax"); + &cmp ("edx",&DWP(0,"ecx")); + &je (&label("spin")); + + &data_word (0x1f909090); # pop %ds + &ret (); + +&set_label("nospin"); + &xor ("eax","eax"); + &xor ("edx","edx"); + &ret (); +&function_end_B("OPENSSL_far_spin"); + +&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); + &xor ("eax","eax"); + &xor ("edx","edx"); + &picmeup("ecx","OPENSSL_ia32cap_P"); + &mov ("ecx",&DWP(0,"ecx")); + &bt (&DWP(0,"ecx"),1); + &jnc (&label("no_x87")); + &bt (&DWP(0,"ecx"),26); + &jnc (&label("no_sse2")); + &pxor ("xmm0","xmm0"); + &pxor ("xmm1","xmm1"); + &pxor ("xmm2","xmm2"); + &pxor ("xmm3","xmm3"); + &pxor ("xmm4","xmm4"); + &pxor ("xmm5","xmm5"); + &pxor ("xmm6","xmm6"); + &pxor ("xmm7","xmm7"); +&set_label("no_sse2"); + # just a bunch of fldz to zap the fp/mm bank... + &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9); + &emms (); +&set_label("no_x87"); + &lea ("eax",&DWP(4,"esp")); + &ret (); +&function_end_B("OPENSSL_wipe_cpu"); + +&function_begin_B("OPENSSL_atomic_add"); + &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg + &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg + &push ("ebx"); + &nop (); + &mov ("eax",&DWP(0,"edx")); +&set_label("spin"); + &lea ("ebx",&DWP(0,"eax","ecx")); + &nop (); + &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is envolved and is always reloaded + &jne (&label("spin")); + &mov ("eax","ebx"); # OpenSSL expects the new value + &pop ("ebx"); + &ret (); +&function_end_B("OPENSSL_atomic_add"); + &initseg("OPENSSL_cpuid_setup"); &asm_finish(); |