summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2012-11-17 20:04:15 +0100
committerAndy Polyakov <appro@openssl.org>2012-11-17 20:04:15 +0100
commitc5cd28bd64fa2b02f29e74486539e4b2f6741114 (patch)
tree47c1f26d5a0ce2ba6f32652f17ff240902fd5afb /crypto
parentperlasm/sparcv9_modes.pl: addendum to commit#22966. (diff)
downloadopenssl-c5cd28bd64fa2b02f29e74486539e4b2f6741114.tar.xz
openssl-c5cd28bd64fa2b02f29e74486539e4b2f6741114.zip
Extend OPENSSL_ia32cap_P with extra word to accomodate AVX2 capability.
Diffstat (limited to 'crypto')
-rw-r--r--crypto/cryptlib.c20
-rw-r--r--crypto/perlasm/x86asm.pl26
-rw-r--r--crypto/perlasm/x86gas.pl4
-rw-r--r--crypto/perlasm/x86masm.pl4
-rw-r--r--crypto/perlasm/x86nasm.pl4
-rw-r--r--crypto/x86_64cpuid.pl14
-rw-r--r--crypto/x86cpuid.pl13
7 files changed, 74 insertions, 11 deletions
diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c
index c85fe5aa3d..6defb7cc69 100644
--- a/crypto/cryptlib.c
+++ b/crypto/cryptlib.c
@@ -125,7 +125,7 @@ static double SSLeay_MSVC5_hack=0.0; /* and for VC1.5 */
defined(__INTEL__) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-extern unsigned int OPENSSL_ia32cap_P[2];
+extern unsigned int OPENSSL_ia32cap_P[4];
unsigned int *OPENSSL_ia32cap_loc(void) { return OPENSSL_ia32cap_P; }
#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
@@ -137,7 +137,7 @@ typedef unsigned long long IA32CAP;
#endif
void OPENSSL_cpuid_setup(void)
{ static int trigger=0;
- IA32CAP OPENSSL_ia32_cpuid(void);
+ IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
IA32CAP vec;
char *env;
@@ -151,10 +151,18 @@ void OPENSSL_cpuid_setup(void)
#else
if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0);
#endif
- if (off) vec = OPENSSL_ia32_cpuid()&~vec;
+ if (off) vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P)&~vec;
+
+ OPENSSL_ia32cap_P[2] = 0;
+ if ((env=strchr(env,':'))) {
+ off = (env[1]=='~')?2:1;
+ vec = strtoul(env+off,NULL,0);
+ if (off>1) OPENSSL_ia32cap_P[2] &= ~vec;
+ else OPENSSL_ia32cap_P[2] = vec;
+ }
}
else
- vec = OPENSSL_ia32_cpuid();
+ vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
/*
* |(1<<10) sets a reserved bit to signal that variable
@@ -165,7 +173,7 @@ void OPENSSL_cpuid_setup(void)
OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32);
}
#else
-unsigned int OPENSSL_ia32cap_P[2];
+unsigned int OPENSSL_ia32cap_P[4];
#endif
#else
@@ -173,7 +181,7 @@ unsigned int *OPENSSL_ia32cap_loc(void) { return NULL; }
#endif
int OPENSSL_NONPIC_relocated = 0;
#if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ)
-void OPENSSL_cpuid_setup(void) {}
+void OPENSSL_cpuid_setup(unsigned int *) {}
#endif
#if (defined(_WIN32) || defined(__CYGWIN__)) && defined(_WINDLL)
diff --git a/crypto/perlasm/x86asm.pl b/crypto/perlasm/x86asm.pl
index 3f190ae590..17abf92297 100644
--- a/crypto/perlasm/x86asm.pl
+++ b/crypto/perlasm/x86asm.pl
@@ -131,6 +131,32 @@ sub ::rdrand
{ &::generic("rdrand",@_); }
}
+sub rxb {
+ local *opcode=shift;
+ my ($dst,$src1,$src2,$rxb)=@_;
+
+ $rxb|=0x7<<5;
+ $rxb&=~(0x04<<5) if($dst>=8);
+ $rxb&=~(0x01<<5) if($src1>=8);
+ $rxb&=~(0x02<<5) if($src2>=8);
+ push @opcode,$rxb;
+}
+
+sub ::vprotd
+{ my $args=join(',',@_);
+ if ($args =~ /xmm([0-7]),xmm([0-7]),([x0-9a-f]+)/)
+ { my @opcode=(0x8f);
+ rxb(\@opcode,$1,$2,-1,0x08);
+ push @opcode,0x78,0xc2;
+ push @opcode,0xc0|($2&7)|(($1&7)<<3); # ModR/M
+ my $c=$3;
+ push @opcode,$c=~/^0/?oct($c):$c;
+ &::data_byte(@opcode);
+ }
+ else
+ { &::generic("vprotd",@_); }
+}
+
# label management
$lbdecor="L"; # local label decoration, set by package
$label="000";
diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl
index e02ee84258..5c2498118f 100644
--- a/crypto/perlasm/x86gas.pl
+++ b/crypto/perlasm/x86gas.pl
@@ -70,6 +70,8 @@ sub ::DWP
{ my($addr,$reg1,$reg2,$idx)=@_;
my $ret="";
+ if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
+
$addr =~ s/^\s+//;
# prepend global references with optional underscore
$addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige;
@@ -157,7 +159,7 @@ sub ::file_end
}
}
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
- my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8";
+ my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
if ($::macosx) { push (@out,"$tmp,2\n"); }
elsif ($::elf) { push (@out,"$tmp,4\n"); }
else { push (@out,"$tmp\n"); }
diff --git a/crypto/perlasm/x86masm.pl b/crypto/perlasm/x86masm.pl
index f937d07c87..1741342c3a 100644
--- a/crypto/perlasm/x86masm.pl
+++ b/crypto/perlasm/x86masm.pl
@@ -39,6 +39,8 @@ sub get_mem
{ my($size,$addr,$reg1,$reg2,$idx)=@_;
my($post,$ret);
+ if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
+
$ret .= "$size PTR " if ($size ne "");
$addr =~ s/^\s+//;
@@ -133,7 +135,7 @@ ___
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
{ my $comm=<<___;
.bss SEGMENT 'BSS'
-COMM ${nmdecor}OPENSSL_ia32cap_P:QWORD
+COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
.bss ENDS
___
# comment out OPENSSL_ia32cap_P declarations
diff --git a/crypto/perlasm/x86nasm.pl b/crypto/perlasm/x86nasm.pl
index ca2511c9eb..5d92f6092a 100644
--- a/crypto/perlasm/x86nasm.pl
+++ b/crypto/perlasm/x86nasm.pl
@@ -36,6 +36,8 @@ sub get_mem
{ my($size,$addr,$reg1,$reg2,$idx)=@_;
my($post,$ret);
+ if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
+
if ($size ne "")
{ $ret .= "$size";
$ret .= " PTR" if ($::mwerks);
@@ -117,7 +119,7 @@ sub ::file_end
{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
{ my $comm=<<___;
${drdecor}segment .bss
-${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 8
+${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16
___
# comment out OPENSSL_ia32cap_P declarations
grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index 58c7bab1b6..3a1adeeccc 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -23,7 +23,7 @@ print<<___;
call OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
.text
@@ -52,12 +52,13 @@ OPENSSL_rdtsc:
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
.globl OPENSSL_ia32_cpuid
-.type OPENSSL_ia32_cpuid,\@abi-omnipotent
+.type OPENSSL_ia32_cpuid,\@function,1
.align 16
OPENSSL_ia32_cpuid:
mov %rbx,%r8 # save %rbx
xor %eax,%eax
+ mov %eax,8(%rdi) # clear 3rd word
cpuid
mov %eax,%r11d # max value for standard query level
@@ -125,6 +126,14 @@ OPENSSL_ia32_cpuid:
shr \$14,%r10d
and \$0xfff,%r10d # number of cores -1 per L1D
+ cmp \$7,%r11d
+ jb .Lnocacheinfo
+
+ mov \$7,%eax
+ xor %ecx,%ecx
+ cpuid
+ mov %ebx,8(%rdi)
+
.Lnocacheinfo:
mov \$1,%eax
cpuid
@@ -164,6 +173,7 @@ OPENSSL_ia32_cpuid:
.Lclear_avx:
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
+ andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
.Ldone:
shl \$32,%r9
mov %r10d,%eax
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl
index 597b7a51a7..3b6c469d08 100644
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -22,6 +22,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&xor ("eax","eax");
&bt ("ecx",21);
&jnc (&label("nocpuid"));
+ &mov ("esi",&wparam(0));
+ &mov (&DWP(8,"esi"),"eax"); # clear 3rd word
&cpuid ();
&mov ("edi","eax"); # max value for standard query level
@@ -89,6 +91,15 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&shr ("edi",14);
&and ("edi",0xfff); # number of cores -1 per L1D
+ &cmp ("edi",7);
+ &jb (&label("nocacheinfo"));
+
+ &mov ("esi",&wparam(0));
+ &mov ("eax",7);
+ &xor ("ecx","ecx");
+ &cpuid ();
+ &mov (&DWP(8,"esi"),"ebx");
+
&set_label("nocacheinfo");
&mov ("eax",1);
&cpuid ();
@@ -133,6 +144,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&and ("esi",0xfeffffff); # clear FXSR
&set_label("clear_avx");
&and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
+ &mov ("edi",&wparam(0));
+ &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2
&set_label("done");
&mov ("eax","esi");
&mov ("edx","ebp");