openssl/crypto/x86_64cpuid.pl
Rich Salz e0a651945c Copyright consolidation: perl files
Add copyright to most .pl files
This does NOT cover any .pl file that has other copyright in it.
Most of those are Andy's but some are public domain.
Fix typo's in some existing files.

Reviewed-by: Richard Levitte <levitte@openssl.org>
2016-04-20 09:45:40 -04:00

407 lines
7.9 KiB
Prolog

#! /usr/bin/env perl
# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
*STDOUT=*OUT;
($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
("%rdi","%rsi","%rdx","%rcx"); # Unix order
print<<___;
.extern OPENSSL_cpuid_setup
.hidden OPENSSL_cpuid_setup
.section .init
call OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
.comm OPENSSL_ia32cap_P,16,4
.text
.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,\@abi-omnipotent
.align 16
OPENSSL_atomic_add:
movl ($arg1),%eax
.Lspin: leaq ($arg2,%rax),%r8
.byte 0xf0 # lock
cmpxchgl %r8d,($arg1)
jne .Lspin
movl %r8d,%eax
.byte 0x48,0x98 # cltq/cdqe
ret
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.globl OPENSSL_rdtsc
.type OPENSSL_rdtsc,\@abi-omnipotent
.align 16
OPENSSL_rdtsc:
rdtsc
shl \$32,%rdx
or %rdx,%rax
ret
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
.globl OPENSSL_ia32_cpuid
.type OPENSSL_ia32_cpuid,\@function,1
.align 16
OPENSSL_ia32_cpuid:
mov %rbx,%r8 # save %rbx
xor %eax,%eax
mov %eax,8(%rdi) # clear 3rd word
cpuid
mov %eax,%r11d # max value for standard query level
xor %eax,%eax
cmp \$0x756e6547,%ebx # "Genu"
setne %al
mov %eax,%r9d
cmp \$0x49656e69,%edx # "ineI"
setne %al
or %eax,%r9d
cmp \$0x6c65746e,%ecx # "ntel"
setne %al
or %eax,%r9d # 0 indicates Intel CPU
jz .Lintel
cmp \$0x68747541,%ebx # "Auth"
setne %al
mov %eax,%r10d
cmp \$0x69746E65,%edx # "enti"
setne %al
or %eax,%r10d
cmp \$0x444D4163,%ecx # "cAMD"
setne %al
or %eax,%r10d # 0 indicates AMD CPU
jnz .Lintel
# AMD specific
mov \$0x80000000,%eax
cpuid
cmp \$0x80000001,%eax
jb .Lintel
mov %eax,%r10d
mov \$0x80000001,%eax
cpuid
or %ecx,%r9d
and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
cmp \$0x80000008,%r10d
jb .Lintel
mov \$0x80000008,%eax
cpuid
movzb %cl,%r10 # number of cores - 1
inc %r10 # number of cores
mov \$1,%eax
cpuid
bt \$28,%edx # test hyper-threading bit
jnc .Lgeneric
shr \$16,%ebx # number of logical processors
cmp %r10b,%bl
ja .Lgeneric
and \$0xefffffff,%edx # ~(1<<28)
jmp .Lgeneric
.Lintel:
cmp \$4,%r11d
mov \$-1,%r10d
jb .Lnocacheinfo
mov \$4,%eax
mov \$0,%ecx # query L1D
cpuid
mov %eax,%r10d
shr \$14,%r10d
and \$0xfff,%r10d # number of cores -1 per L1D
cmp \$7,%r11d
jb .Lnocacheinfo
mov \$7,%eax
xor %ecx,%ecx
cpuid
mov %ebx,8(%rdi)
.Lnocacheinfo:
mov \$1,%eax
cpuid
and \$0xbfefffff,%edx # force reserved bits to 0
cmp \$0,%r9d
jne .Lnotintel
or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
and \$15,%ah
cmp \$15,%ah # examine Family ID
jne .Lnotintel
or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
.Lnotintel:
bt \$28,%edx # test hyper-threading bit
jnc .Lgeneric
and \$0xefffffff,%edx # ~(1<<28)
cmp \$0,%r10d
je .Lgeneric
or \$0x10000000,%edx # 1<<28
shr \$16,%ebx
cmp \$1,%bl # see if cache is shared
ja .Lgeneric
and \$0xefffffff,%edx # ~(1<<28)
.Lgeneric:
and \$0x00000800,%r9d # isolate AMD XOP flag
and \$0xfffff7ff,%ecx
or %ecx,%r9d # merge AMD XOP flag
mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
bt \$27,%r9d # check OSXSAVE bit
jnc .Lclear_avx
xor %ecx,%ecx # XCR0
.byte 0x0f,0x01,0xd0 # xgetbv
and \$6,%eax # isolate XMM and YMM state support
cmp \$6,%eax
je .Ldone
.Lclear_avx:
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
.Ldone:
shl \$32,%r9
mov %r10d,%eax
mov %r8,%rbx # restore %rbx
or %r9,%rax
ret
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,\@abi-omnipotent
.align 16
OPENSSL_cleanse:
xor %rax,%rax
cmp \$15,$arg2
jae .Lot
cmp \$0,$arg2
je .Lret
.Little:
mov %al,($arg1)
sub \$1,$arg2
lea 1($arg1),$arg1
jnz .Little
.Lret:
ret
.align 16
.Lot:
test \$7,$arg1
jz .Laligned
mov %al,($arg1)
lea -1($arg2),$arg2
lea 1($arg1),$arg1
jmp .Lot
.Laligned:
mov %rax,($arg1)
lea -8($arg2),$arg2
test \$-8,$arg2
lea 8($arg1),$arg1
jnz .Laligned
cmp \$0,$arg2
jne .Little
ret
.size OPENSSL_cleanse,.-OPENSSL_cleanse
___
print<<___ if (!$win64);
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,\@abi-omnipotent
.align 16
OPENSSL_wipe_cpu:
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
pxor %xmm8,%xmm8
pxor %xmm9,%xmm9
pxor %xmm10,%xmm10
pxor %xmm11,%xmm11
pxor %xmm12,%xmm12
pxor %xmm13,%xmm13
pxor %xmm14,%xmm14
pxor %xmm15,%xmm15
xorq %rcx,%rcx
xorq %rdx,%rdx
xorq %rsi,%rsi
xorq %rdi,%rdi
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
leaq 8(%rsp),%rax
ret
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
print<<___ if ($win64);
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,\@abi-omnipotent
.align 16
OPENSSL_wipe_cpu:
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
xorq %rcx,%rcx
xorq %rdx,%rdx
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
leaq 8(%rsp),%rax
ret
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
{
my $out="%r10";
my $cnt="%rcx";
my $max="%r11";
my $lasttick="%r8d";
my $lastdiff="%r9d";
my $redzone=win64?8:-8;
print<<___;
.globl OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,\@abi-omnipotent
.align 16
OPENSSL_instrument_bus:
mov $arg1,$out # tribute to Win64
mov $arg2,$cnt
mov $arg2,$max
rdtsc # collect 1st tick
mov %eax,$lasttick # lasttick = tick
mov \$0,$lastdiff # lastdiff = 0
clflush ($out)
.byte 0xf0 # lock
add $lastdiff,($out)
jmp .Loop
.align 16
.Loop: rdtsc
mov %eax,%edx
sub $lasttick,%eax
mov %edx,$lasttick
mov %eax,$lastdiff
clflush ($out)
.byte 0xf0 # lock
add %eax,($out)
lea 4($out),$out
sub \$1,$cnt
jnz .Loop
mov $max,%rax
ret
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.globl OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,\@abi-omnipotent
.align 16
OPENSSL_instrument_bus2:
mov $arg1,$out # tribute to Win64
mov $arg2,$cnt
mov $arg3,$max
mov $cnt,$redzone(%rsp)
rdtsc # collect 1st tick
mov %eax,$lasttick # lasttick = tick
mov \$0,$lastdiff # lastdiff = 0
clflush ($out)
.byte 0xf0 # lock
add $lastdiff,($out)
rdtsc # collect 1st diff
mov %eax,%edx
sub $lasttick,%eax # diff
mov %edx,$lasttick # lasttick = tick
mov %eax,$lastdiff # lastdiff = diff
.Loop2:
clflush ($out)
.byte 0xf0 # lock
add %eax,($out) # accumulate diff
sub \$1,$max
jz .Ldone2
rdtsc
mov %eax,%edx
sub $lasttick,%eax # diff
mov %edx,$lasttick # lasttick = tick
cmp $lastdiff,%eax
mov %eax,$lastdiff # lastdiff = diff
mov \$0,%edx
setne %dl
sub %rdx,$cnt # conditional --$cnt
lea ($out,%rdx,4),$out # conditional ++$out
jnz .Loop2
.Ldone2:
mov $redzone(%rsp),%rax
sub $cnt,%rax
ret
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
___
}
print<<___;
.globl OPENSSL_ia32_rdrand
.type OPENSSL_ia32_rdrand,\@abi-omnipotent
.align 16
OPENSSL_ia32_rdrand:
mov \$8,%ecx
.Loop_rdrand:
rdrand %rax
jc .Lbreak_rdrand
loop .Loop_rdrand
.Lbreak_rdrand:
cmp \$0,%rax
cmove %rcx,%rax
ret
.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
.globl OPENSSL_ia32_rdseed
.type OPENSSL_ia32_rdseed,\@abi-omnipotent
.align 16
OPENSSL_ia32_rdseed:
mov \$8,%ecx
.Loop_rdseed:
rdseed %rax
jc .Lbreak_rdseed
loop .Loop_rdseed
.Lbreak_rdseed:
cmp \$0,%rax
cmove %rcx,%rax
ret
.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
___
close STDOUT; # flush