libcrypto: Switch back to the generated assembly in sys/crypto/openssl

Reviewed by:	markj
Differential Revision:	https://reviews.freebsd.org/D41569
This commit is contained in:
John Baldwin
2023-08-29 14:46:44 -07:00
parent c0855eaa3e
commit 47d997021f
145 changed files with 5 additions and 310557 deletions
+2 -2
View File
@@ -618,12 +618,12 @@ buildasm cleanasm:
PICFLAG+= -DOPENSSL_PIC
.if defined(ASM_${MACHINE_CPUARCH})
.PATH: ${SRCTOP}/secure/lib/libcrypto/arch/${MACHINE_CPUARCH}
.PATH: ${SRCTOP}/sys/crypto/openssl/${MACHINE_CPUARCH}
.if defined(ASM_amd64)
.PATH: ${LCRYPTO_SRC}/crypto/bn/asm
.endif
.elif defined(ASM_${MACHINE_ARCH})
.PATH: ${SRCTOP}/secure/lib/libcrypto/arch/${MACHINE_ARCH}
.PATH: ${SRCTOP}/sys/crypto/openssl/${MACHINE_ARCH}
.endif
.PATH: ${LCRYPTO_SRC}/crypto \
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,130 +0,0 @@
/* Do not modify. This file is auto-generated from arm64cpuid.pl. */
#include "arm_arch.h"
.text
.arch armv8-a+crypto
.align 5
.globl _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
orr v15.16b, v15.16b, v15.16b
ret
.size _armv7_neon_probe,.-_armv7_neon_probe
.globl _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
#ifdef __APPLE__
mrs x0, CNTPCT_EL0
#else
mrs x0, CNTVCT_EL0
#endif
ret
.size _armv7_tick,.-_armv7_tick
.globl _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
aese v0.16b, v0.16b
ret
.size _armv8_aes_probe,.-_armv8_aes_probe
.globl _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
sha1h s0, s0
ret
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.globl _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
sha256su0 v0.4s, v0.4s
ret
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.globl _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
pmull v0.1q, v0.1d, v0.1d
ret
.size _armv8_pmull_probe,.-_armv8_pmull_probe
.globl _armv8_sha512_probe
.type _armv8_sha512_probe,%function
_armv8_sha512_probe:
.long 0xcec08000 // sha512su0 v0.2d,v0.2d
ret
.size _armv8_sha512_probe,.-_armv8_sha512_probe
.globl _armv8_cpuid_probe
.type _armv8_cpuid_probe,%function
_armv8_cpuid_probe:
mrs x0, midr_el1
ret
.size _armv8_cpuid_probe,.-_armv8_cpuid_probe
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,%function
.align 5
OPENSSL_cleanse:
cbz x1,.Lret // len==0?
cmp x1,#15
b.hi .Lot // len>15
nop
.Little:
strb wzr,[x0],#1 // store byte-by-byte
subs x1,x1,#1
b.ne .Little
.Lret: ret
.align 4
.Lot: tst x0,#7
b.eq .Laligned // inp is aligned
strb wzr,[x0],#1 // store byte-by-byte
sub x1,x1,#1
b .Lot
.align 4
.Laligned:
str xzr,[x0],#8 // store word-by-word
sub x1,x1,#8
tst x1,#-8
b.ne .Laligned // len>=8
cbnz x1,.Little // len!=0?
ret
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,%function
.align 4
CRYPTO_memcmp:
eor w3,w3,w3
cbz x2,.Lno_data // len==0?
cmp x2,#16
b.ne .Loop_cmp
ldp x8,x9,[x0]
ldp x10,x11,[x1]
eor x8,x8,x10
eor x9,x9,x11
orr x8,x8,x9
mov x0,#1
cmp x8,#0
csel x0,xzr,x0,eq
ret
.align 4
.Loop_cmp:
ldrb w4,[x0],#1
ldrb w5,[x1],#1
eor w4,w4,w5
orr w3,w3,w4
subs x2,x2,#1
b.ne .Loop_cmp
.Lno_data:
neg w0,w3
lsr w0,w0,#31
ret
.size CRYPTO_memcmp,.-CRYPTO_memcmp
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,553 +0,0 @@
/* Do not modify. This file is auto-generated from ghashv8-armx.pl. */
#include "arm_arch.h"
#if __ARM_MAX_ARCH__>=7
.arch armv8-a+crypto
.text
.globl gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
//calculate H^3 and H^4
pmull v0.1q,v20.1d, v22.1d
pmull v5.1q,v22.1d,v22.1d
pmull2 v2.1q,v20.2d, v22.2d
pmull2 v7.1q,v22.2d,v22.2d
pmull v1.1q,v16.1d,v17.1d
pmull v6.1q,v17.1d,v17.1d
ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
ext v17.16b,v5.16b,v7.16b,#8
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v16.16b
eor v4.16b,v5.16b,v7.16b
eor v6.16b,v6.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
eor v6.16b,v6.16b,v4.16b
pmull v4.1q,v5.1d,v19.1d
ins v2.d[0],v1.d[1]
ins v7.d[0],v6.d[1]
ins v1.d[1],v0.d[0]
ins v6.d[1],v5.d[0]
eor v0.16b,v1.16b,v18.16b
eor v5.16b,v6.16b,v4.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
ext v4.16b,v5.16b,v5.16b,#8
pmull v0.1q,v0.1d,v19.1d
pmull v5.1q,v5.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v4.16b,v4.16b,v7.16b
eor v20.16b, v0.16b,v18.16b //H^3
eor v22.16b,v5.16b,v4.16b //H^4
ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing
ext v17.16b,v22.16b,v22.16b,#8
eor v16.16b,v16.16b,v20.16b
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5]
ret
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
cmp x3,#64
b.hs .Lgcm_ghash_v8_4x
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo .Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq .Ldone_v8 //is x3 zero?
.Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
.Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_ghash_v8,.-gcm_ghash_v8
.type gcm_ghash_v8_4x,%function
.align 4
gcm_ghash_v8_4x:
.Lgcm_ghash_v8_4x:
ld1 {v0.2d},[x0] //load [rotated] Xi
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
rev64 v5.16b,v5.16b
rev64 v6.16b,v6.16b
rev64 v7.16b,v7.16b
rev64 v4.16b,v4.16b
#endif
ext v25.16b,v7.16b,v7.16b,#8
ext v24.16b,v6.16b,v6.16b,#8
ext v23.16b,v5.16b,v5.16b,#8
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
eor v7.16b,v7.16b,v25.16b
pmull2 v31.1q,v20.2d,v25.2d
pmull v30.1q,v21.1d,v7.1d
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
eor v6.16b,v6.16b,v24.16b
pmull2 v24.1q,v22.2d,v24.2d
pmull2 v6.1q,v21.2d,v6.2d
eor v29.16b,v29.16b,v16.16b
eor v31.16b,v31.16b,v24.16b
eor v30.16b,v30.16b,v6.16b
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
eor v5.16b,v5.16b,v23.16b
pmull2 v23.1q,v26.2d,v23.2d
pmull v5.1q,v27.1d,v5.1d
eor v29.16b,v29.16b,v7.16b
eor v31.16b,v31.16b,v23.16b
eor v30.16b,v30.16b,v5.16b
subs x3,x3,#128
b.lo .Ltail4x
b .Loop4x
.align 4
.Loop4x:
eor v16.16b,v4.16b,v0.16b
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
ext v3.16b,v16.16b,v16.16b,#8
#ifndef __ARMEB__
rev64 v5.16b,v5.16b
rev64 v6.16b,v6.16b
rev64 v7.16b,v7.16b
rev64 v4.16b,v4.16b
#endif
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v28.2d,v3.2d
ext v25.16b,v7.16b,v7.16b,#8
pmull2 v1.1q,v27.2d,v16.2d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
ext v24.16b,v6.16b,v6.16b,#8
eor v1.16b,v1.16b,v30.16b
ext v23.16b,v5.16b,v5.16b,#8
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
eor v7.16b,v7.16b,v25.16b
eor v1.16b,v1.16b,v17.16b
pmull2 v31.1q,v20.2d,v25.2d
eor v1.16b,v1.16b,v18.16b
pmull v30.1q,v21.1d,v7.1d
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
eor v6.16b,v6.16b,v24.16b
pmull2 v24.1q,v22.2d,v24.2d
eor v0.16b,v1.16b,v18.16b
pmull2 v6.1q,v21.2d,v6.2d
eor v29.16b,v29.16b,v16.16b
eor v31.16b,v31.16b,v24.16b
eor v30.16b,v30.16b,v6.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
eor v5.16b,v5.16b,v23.16b
eor v18.16b,v18.16b,v2.16b
pmull2 v23.1q,v26.2d,v23.2d
pmull v5.1q,v27.1d,v5.1d
eor v0.16b,v0.16b,v18.16b
eor v29.16b,v29.16b,v7.16b
eor v31.16b,v31.16b,v23.16b
ext v0.16b,v0.16b,v0.16b,#8
eor v30.16b,v30.16b,v5.16b
subs x3,x3,#64
b.hs .Loop4x
.Ltail4x:
eor v16.16b,v4.16b,v0.16b
ext v3.16b,v16.16b,v16.16b,#8
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v28.2d,v3.2d
pmull2 v1.1q,v27.2d,v16.2d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
eor v1.16b,v1.16b,v30.16b
adds x3,x3,#64
b.eq .Ldone4x
cmp x3,#32
b.lo .Lone
b.eq .Ltwo
.Lthree:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v4.2d,v5.2d,v6.2d},[x2]
eor v1.16b,v1.16b,v18.16b
#ifndef __ARMEB__
rev64 v5.16b,v5.16b
rev64 v6.16b,v6.16b
rev64 v4.16b,v4.16b
#endif
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v24.16b,v6.16b,v6.16b,#8
ext v23.16b,v5.16b,v5.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v29.1q,v20.1d,v24.1d //H·Ii+2
eor v6.16b,v6.16b,v24.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
pmull2 v31.1q,v20.2d,v24.2d
pmull v30.1q,v21.1d,v6.1d
eor v0.16b,v0.16b,v18.16b
pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1
eor v5.16b,v5.16b,v23.16b
ext v0.16b,v0.16b,v0.16b,#8
pmull2 v23.1q,v22.2d,v23.2d
eor v16.16b,v4.16b,v0.16b
pmull2 v5.1q,v21.2d,v5.2d
ext v3.16b,v16.16b,v16.16b,#8
eor v29.16b,v29.16b,v7.16b
eor v31.16b,v31.16b,v23.16b
eor v30.16b,v30.16b,v5.16b
pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v26.2d,v3.2d
pmull v1.1q,v27.1d,v16.1d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
eor v1.16b,v1.16b,v30.16b
b .Ldone4x
.align 4
.Ltwo:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v4.2d,v5.2d},[x2]
eor v1.16b,v1.16b,v18.16b
#ifndef __ARMEB__
rev64 v5.16b,v5.16b
rev64 v4.16b,v4.16b
#endif
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v23.16b,v5.16b,v5.16b,#8
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
ext v0.16b,v0.16b,v0.16b,#8
pmull v29.1q,v20.1d,v23.1d //H·Ii+1
eor v5.16b,v5.16b,v23.16b
eor v16.16b,v4.16b,v0.16b
ext v3.16b,v16.16b,v16.16b,#8
pmull2 v31.1q,v20.2d,v23.2d
pmull v30.1q,v21.1d,v5.1d
pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii)
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v22.2d,v3.2d
pmull2 v1.1q,v21.2d,v16.2d
eor v0.16b,v0.16b,v29.16b
eor v2.16b,v2.16b,v31.16b
eor v1.16b,v1.16b,v30.16b
b .Ldone4x
.align 4
.Lone:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v4.2d},[x2]
eor v1.16b,v1.16b,v18.16b
#ifndef __ARMEB__
rev64 v4.16b,v4.16b
#endif
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
ext v0.16b,v0.16b,v0.16b,#8
eor v16.16b,v4.16b,v0.16b
ext v3.16b,v16.16b,v16.16b,#8
pmull v0.1q,v20.1d,v3.1d
eor v16.16b,v16.16b,v3.16b
pmull2 v2.1q,v20.2d,v3.2d
pmull v1.1q,v21.1d,v16.1d
.Ldone4x:
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
ext v0.16b,v0.16b,v0.16b,#8
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
File diff suppressed because it is too large Load Diff
@@ -1,864 +0,0 @@
/* Do not modify. This file is auto-generated from poly1305-armv8.pl. */
#include "arm_arch.h"
.text
// forward "declarations" are required for Apple
.hidden OPENSSL_armcap_P
.globl poly1305_init
.hidden poly1305_init
.globl poly1305_blocks
.hidden poly1305_blocks
.globl poly1305_emit
.hidden poly1305_emit
.type poly1305_init,%function
.align 5
poly1305_init:
cmp x1,xzr
stp xzr,xzr,[x0] // zero hash value
stp xzr,xzr,[x0,#16] // [along with is_base2_26]
csel x0,xzr,x0,eq
b.eq .Lno_key
adrp x17,OPENSSL_armcap_P
ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
ldp x7,x8,[x1] // load key
mov x9,#0xfffffffc0fffffff
movk x9,#0x0fff,lsl#48
#ifdef __ARMEB__
rev x7,x7 // flip bytes
rev x8,x8
#endif
and x7,x7,x9 // &=0ffffffc0fffffff
and x9,x9,#-4
and x8,x8,x9 // &=0ffffffc0ffffffc
stp x7,x8,[x0,#32] // save key value
tst w17,#ARMV7_NEON
adr x12,.Lpoly1305_blocks
adr x7,.Lpoly1305_blocks_neon
adr x13,.Lpoly1305_emit
adr x8,.Lpoly1305_emit_neon
csel x12,x12,x7,eq
csel x13,x13,x8,eq
#ifdef __ILP32__
stp w12,w13,[x2]
#else
stp x12,x13,[x2]
#endif
mov x0,#1
.Lno_key:
ret
.size poly1305_init,.-poly1305_init
.type poly1305_blocks,%function
.align 5
poly1305_blocks:
.Lpoly1305_blocks:
ands x2,x2,#-16
b.eq .Lno_data
ldp x4,x5,[x0] // load hash value
ldp x7,x8,[x0,#32] // load key value
ldr x6,[x0,#16]
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
b .Loop
.align 5
.Loop:
ldp x10,x11,[x1],#16 // load input
sub x2,x2,#16
#ifdef __ARMEB__
rev x10,x10
rev x11,x11
#endif
adds x4,x4,x10 // accumulate input
adcs x5,x5,x11
mul x12,x4,x7 // h0*r0
adc x6,x6,x3
umulh x13,x4,x7
mul x10,x5,x9 // h1*5*r1
umulh x11,x5,x9
adds x12,x12,x10
mul x10,x4,x8 // h0*r1
adc x13,x13,x11
umulh x14,x4,x8
adds x13,x13,x10
mul x10,x5,x7 // h1*r0
adc x14,x14,xzr
umulh x11,x5,x7
adds x13,x13,x10
mul x10,x6,x9 // h2*5*r1
adc x14,x14,x11
mul x11,x6,x7 // h2*r0
adds x13,x13,x10
adc x14,x14,x11
and x10,x14,#-4 // final reduction
and x6,x14,#3
add x10,x10,x14,lsr#2
adds x4,x12,x10
adcs x5,x13,xzr
adc x6,x6,xzr
cbnz x2,.Loop
stp x4,x5,[x0] // store hash value
str x6,[x0,#16]
.Lno_data:
ret
.size poly1305_blocks,.-poly1305_blocks
.type poly1305_emit,%function
.align 5
poly1305_emit:
.Lpoly1305_emit:
ldp x4,x5,[x0] // load hash base 2^64
ldr x6,[x0,#16]
ldp x10,x11,[x2] // load nonce
adds x12,x4,#5 // compare to modulus
adcs x13,x5,xzr
adc x14,x6,xzr
tst x14,#-4 // see if it's carried/borrowed
csel x4,x4,x12,eq
csel x5,x5,x13,eq
#ifdef __ARMEB__
ror x10,x10,#32 // flip nonce words
ror x11,x11,#32
#endif
adds x4,x4,x10 // accumulate nonce
adc x5,x5,x11
#ifdef __ARMEB__
rev x4,x4 // flip output bytes
rev x5,x5
#endif
stp x4,x5,[x1] // write result
ret
.size poly1305_emit,.-poly1305_emit
.type poly1305_mult,%function
.align 5
poly1305_mult:
mul x12,x4,x7 // h0*r0
umulh x13,x4,x7
mul x10,x5,x9 // h1*5*r1
umulh x11,x5,x9
adds x12,x12,x10
mul x10,x4,x8 // h0*r1
adc x13,x13,x11
umulh x14,x4,x8
adds x13,x13,x10
mul x10,x5,x7 // h1*r0
adc x14,x14,xzr
umulh x11,x5,x7
adds x13,x13,x10
mul x10,x6,x9 // h2*5*r1
adc x14,x14,x11
mul x11,x6,x7 // h2*r0
adds x13,x13,x10
adc x14,x14,x11
and x10,x14,#-4 // final reduction
and x6,x14,#3
add x10,x10,x14,lsr#2
adds x4,x12,x10
adcs x5,x13,xzr
adc x6,x6,xzr
ret
.size poly1305_mult,.-poly1305_mult
.type poly1305_splat,%function
.align 5
poly1305_splat:
and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
ubfx x13,x4,#26,#26
extr x14,x5,x4,#52
and x14,x14,#0x03ffffff
ubfx x15,x5,#14,#26
extr x16,x6,x5,#40
str w12,[x0,#16*0] // r0
add w12,w13,w13,lsl#2 // r1*5
str w13,[x0,#16*1] // r1
add w13,w14,w14,lsl#2 // r2*5
str w12,[x0,#16*2] // s1
str w14,[x0,#16*3] // r2
add w14,w15,w15,lsl#2 // r3*5
str w13,[x0,#16*4] // s2
str w15,[x0,#16*5] // r3
add w15,w16,w16,lsl#2 // r4*5
str w14,[x0,#16*6] // s3
str w16,[x0,#16*7] // r4
str w15,[x0,#16*8] // s4
ret
.size poly1305_splat,.-poly1305_splat
.type poly1305_blocks_neon,%function
.align 5
poly1305_blocks_neon:
.Lpoly1305_blocks_neon:
ldr x17,[x0,#24]
cmp x2,#128
b.hs .Lblocks_neon
cbz x17,.Lpoly1305_blocks
.Lblocks_neon:
.inst 0xd503233f // paciasp
stp x29,x30,[sp,#-80]!
add x29,sp,#0
ands x2,x2,#-16
b.eq .Lno_data_neon
cbz x17,.Lbase2_64_neon
ldp w10,w11,[x0] // load hash value base 2^26
ldp w12,w13,[x0,#8]
ldr w14,[x0,#16]
tst x2,#31
b.eq .Leven_neon
ldp x7,x8,[x0,#32] // load key value
add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr x5,x12,#12
adds x4,x4,x12,lsl#52
add x5,x5,x13,lsl#14
adc x5,x5,xzr
lsr x6,x14,#24
adds x5,x5,x14,lsl#40
adc x14,x6,xzr // can be partially reduced...
ldp x12,x13,[x1],#16 // load input
sub x2,x2,#16
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
and x10,x14,#-4 // ... so reduce
and x6,x14,#3
add x10,x10,x14,lsr#2
adds x4,x4,x10
adcs x5,x5,xzr
adc x6,x6,xzr
#ifdef __ARMEB__
rev x12,x12
rev x13,x13
#endif
adds x4,x4,x12 // accumulate input
adcs x5,x5,x13
adc x6,x6,x3
bl poly1305_mult
ldr x30,[sp,#8]
cbz x3,.Lstore_base2_64_neon
and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,x4,#26,#26
extr x12,x5,x4,#52
and x12,x12,#0x03ffffff
ubfx x13,x5,#14,#26
extr x14,x6,x5,#40
cbnz x2,.Leven_neon
stp w10,w11,[x0] // store hash value base 2^26
stp w12,w13,[x0,#8]
str w14,[x0,#16]
b .Lno_data_neon
.align 4
.Lstore_base2_64_neon:
stp x4,x5,[x0] // store hash value base 2^64
stp x6,xzr,[x0,#16] // note that is_base2_26 is zeroed
b .Lno_data_neon
.align 4
.Lbase2_64_neon:
ldp x7,x8,[x0,#32] // load key value
ldp x4,x5,[x0] // load hash value base 2^64
ldr x6,[x0,#16]
tst x2,#31
b.eq .Linit_neon
ldp x12,x13,[x1],#16 // load input
sub x2,x2,#16
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
#ifdef __ARMEB__
rev x12,x12
rev x13,x13
#endif
adds x4,x4,x12 // accumulate input
adcs x5,x5,x13
adc x6,x6,x3
bl poly1305_mult
.Linit_neon:
and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,x4,#26,#26
extr x12,x5,x4,#52
and x12,x12,#0x03ffffff
ubfx x13,x5,#14,#26
extr x14,x6,x5,#40
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov d24,x10
fmov d25,x11
fmov d26,x12
fmov d27,x13
fmov d28,x14
////////////////////////////////// initialize r^n table
mov x4,x7 // r^1
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
mov x5,x8
mov x6,xzr
add x0,x0,#48+12
bl poly1305_splat
bl poly1305_mult // r^2
sub x0,x0,#4
bl poly1305_splat
bl poly1305_mult // r^3
sub x0,x0,#4
bl poly1305_splat
bl poly1305_mult // r^4
sub x0,x0,#4
bl poly1305_splat
ldr x30,[sp,#8]
add x16,x1,#32
adr x17,.Lzeros
subs x2,x2,#64
csel x16,x17,x16,lo
mov x4,#1
stur x4,[x0,#-24] // set is_base2_26
sub x0,x0,#48 // restore original x0
b .Ldo_neon
.align 4
.Leven_neon:
add x16,x1,#32
adr x17,.Lzeros
subs x2,x2,#64
csel x16,x17,x16,lo
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov d24,x10
fmov d25,x11
fmov d26,x12
fmov d27,x13
fmov d28,x14
.Ldo_neon:
ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
ldp x9,x13,[x16],#48
lsl x3,x3,#24
add x15,x0,#48
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov d14,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,x3,x12,lsr#40
add x13,x3,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov d15,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
fmov d16,x8
fmov d17,x10
fmov d18,x12
ldp x8,x12,[x1],#16 // inp[0:1]
ldp x9,x13,[x1],#48
ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
ld1 {v8.4s},[x15]
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov d9,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,x3,x12,lsr#40
add x13,x3,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov d10,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
movi v31.2d,#-1
fmov d11,x8
fmov d12,x10
fmov d13,x12
ushr v31.2d,v31.2d,#38
b.ls .Lskip_loop
.align 4
.Loop_neon:
////////////////////////////////////////////////////////////////
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
// ___________________/
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
// ___________________/ ____________________/
//
// Note that we start with inp[2:3]*r^2. This is because it
// doesn't depend on reduction in previous iteration.
////////////////////////////////////////////////////////////////
// d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
// d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
// d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
// d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
subs x2,x2,#64
umull v23.2d,v14.2s,v7.s[2]
csel x16,x17,x16,lo
umull v22.2d,v14.2s,v5.s[2]
umull v21.2d,v14.2s,v3.s[2]
ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
umull v20.2d,v14.2s,v1.s[2]
ldp x9,x13,[x16],#48
umull v19.2d,v14.2s,v0.s[2]
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
umlal v23.2d,v15.2s,v5.s[2]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal v22.2d,v15.2s,v3.s[2]
and x5,x9,#0x03ffffff
umlal v21.2d,v15.2s,v1.s[2]
ubfx x6,x8,#26,#26
umlal v20.2d,v15.2s,v0.s[2]
ubfx x7,x9,#26,#26
umlal v19.2d,v15.2s,v8.s[2]
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal v23.2d,v16.2s,v3.s[2]
extr x8,x12,x8,#52
umlal v22.2d,v16.2s,v1.s[2]
extr x9,x13,x9,#52
umlal v21.2d,v16.2s,v0.s[2]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal v20.2d,v16.2s,v8.s[2]
fmov d14,x4
umlal v19.2d,v16.2s,v6.s[2]
and x8,x8,#0x03ffffff
umlal v23.2d,v17.2s,v1.s[2]
and x9,x9,#0x03ffffff
umlal v22.2d,v17.2s,v0.s[2]
ubfx x10,x12,#14,#26
umlal v21.2d,v17.2s,v8.s[2]
ubfx x11,x13,#14,#26
umlal v20.2d,v17.2s,v6.s[2]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal v19.2d,v17.2s,v4.s[2]
fmov d15,x6
add v11.2s,v11.2s,v26.2s
add x12,x3,x12,lsr#40
umlal v23.2d,v18.2s,v0.s[2]
add x13,x3,x13,lsr#40
umlal v22.2d,v18.2s,v8.s[2]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal v21.2d,v18.2s,v6.s[2]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal v20.2d,v18.2s,v4.s[2]
fmov d16,x8
umlal v19.2d,v18.2s,v2.s[2]
fmov d17,x10
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4 and accumulate
add v9.2s,v9.2s,v24.2s
fmov d18,x12
umlal v22.2d,v11.2s,v1.s[0]
ldp x8,x12,[x1],#16 // inp[0:1]
umlal v19.2d,v11.2s,v6.s[0]
ldp x9,x13,[x1],#48
umlal v23.2d,v11.2s,v3.s[0]
umlal v20.2d,v11.2s,v8.s[0]
umlal v21.2d,v11.2s,v0.s[0]
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
add v10.2s,v10.2s,v25.2s
umlal v22.2d,v9.2s,v5.s[0]
umlal v23.2d,v9.2s,v7.s[0]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal v21.2d,v9.2s,v3.s[0]
and x5,x9,#0x03ffffff
umlal v19.2d,v9.2s,v0.s[0]
ubfx x6,x8,#26,#26
umlal v20.2d,v9.2s,v1.s[0]
ubfx x7,x9,#26,#26
add v12.2s,v12.2s,v27.2s
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal v22.2d,v10.2s,v3.s[0]
extr x8,x12,x8,#52
umlal v23.2d,v10.2s,v5.s[0]
extr x9,x13,x9,#52
umlal v19.2d,v10.2s,v8.s[0]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal v21.2d,v10.2s,v1.s[0]
fmov d9,x4
umlal v20.2d,v10.2s,v0.s[0]
and x8,x8,#0x03ffffff
add v13.2s,v13.2s,v28.2s
and x9,x9,#0x03ffffff
umlal v22.2d,v12.2s,v0.s[0]
ubfx x10,x12,#14,#26
umlal v19.2d,v12.2s,v4.s[0]
ubfx x11,x13,#14,#26
umlal v23.2d,v12.2s,v1.s[0]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal v20.2d,v12.2s,v6.s[0]
fmov d10,x6
umlal v21.2d,v12.2s,v8.s[0]
add x12,x3,x12,lsr#40
umlal v22.2d,v13.2s,v8.s[0]
add x13,x3,x13,lsr#40
umlal v19.2d,v13.2s,v2.s[0]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal v23.2d,v13.2s,v0.s[0]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal v20.2d,v13.2s,v4.s[0]
fmov d11,x8
umlal v21.2d,v13.2s,v6.s[0]
fmov d12,x10
fmov d13,x12
/////////////////////////////////////////////////////////////////
// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
// and P. Schwabe
//
// [see discussion in poly1305-armv4 module]
ushr v29.2d,v22.2d,#26
xtn v27.2s,v22.2d
ushr v30.2d,v19.2d,#26
and v19.16b,v19.16b,v31.16b
add v23.2d,v23.2d,v29.2d // h3 -> h4
bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
add v20.2d,v20.2d,v30.2d // h0 -> h1
ushr v29.2d,v23.2d,#26
xtn v28.2s,v23.2d
ushr v30.2d,v20.2d,#26
xtn v25.2s,v20.2d
bic v28.2s,#0xfc,lsl#24
add v21.2d,v21.2d,v30.2d // h1 -> h2
add v19.2d,v19.2d,v29.2d
shl v29.2d,v29.2d,#2
shrn v30.2s,v21.2d,#26
xtn v26.2s,v21.2d
add v19.2d,v19.2d,v29.2d // h4 -> h0
bic v25.2s,#0xfc,lsl#24
add v27.2s,v27.2s,v30.2s // h2 -> h3
bic v26.2s,#0xfc,lsl#24
shrn v29.2s,v19.2d,#26
xtn v24.2s,v19.2d
ushr v30.2s,v27.2s,#26
bic v27.2s,#0xfc,lsl#24
bic v24.2s,#0xfc,lsl#24
add v25.2s,v25.2s,v29.2s // h0 -> h1
add v28.2s,v28.2s,v30.2s // h3 -> h4
b.hi .Loop_neon
.Lskip_loop:
dup v16.2d,v16.d[0]
add v11.2s,v11.2s,v26.2s
////////////////////////////////////////////////////////////////
// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
adds x2,x2,#32
b.ne .Long_tail
dup v16.2d,v11.d[0]
add v14.2s,v9.2s,v24.2s
add v17.2s,v12.2s,v27.2s
add v15.2s,v10.2s,v25.2s
add v18.2s,v13.2s,v28.2s
.Long_tail:
dup v14.2d,v14.d[0]
umull2 v19.2d,v16.4s,v6.4s
umull2 v22.2d,v16.4s,v1.4s
umull2 v23.2d,v16.4s,v3.4s
umull2 v21.2d,v16.4s,v0.4s
umull2 v20.2d,v16.4s,v8.4s
dup v15.2d,v15.d[0]
umlal2 v19.2d,v14.4s,v0.4s
umlal2 v21.2d,v14.4s,v3.4s
umlal2 v22.2d,v14.4s,v5.4s
umlal2 v23.2d,v14.4s,v7.4s
umlal2 v20.2d,v14.4s,v1.4s
dup v17.2d,v17.d[0]
umlal2 v19.2d,v15.4s,v8.4s
umlal2 v22.2d,v15.4s,v3.4s
umlal2 v21.2d,v15.4s,v1.4s
umlal2 v23.2d,v15.4s,v5.4s
umlal2 v20.2d,v15.4s,v0.4s
dup v18.2d,v18.d[0]
umlal2 v22.2d,v17.4s,v0.4s
umlal2 v23.2d,v17.4s,v1.4s
umlal2 v19.2d,v17.4s,v4.4s
umlal2 v20.2d,v17.4s,v6.4s
umlal2 v21.2d,v17.4s,v8.4s
umlal2 v22.2d,v18.4s,v8.4s
umlal2 v19.2d,v18.4s,v2.4s
umlal2 v23.2d,v18.4s,v0.4s
umlal2 v20.2d,v18.4s,v4.4s
umlal2 v21.2d,v18.4s,v6.4s
b.eq .Lshort_tail
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4:r^3 and accumulate
add v9.2s,v9.2s,v24.2s
umlal v22.2d,v11.2s,v1.2s
umlal v19.2d,v11.2s,v6.2s
umlal v23.2d,v11.2s,v3.2s
umlal v20.2d,v11.2s,v8.2s
umlal v21.2d,v11.2s,v0.2s
add v10.2s,v10.2s,v25.2s
umlal v22.2d,v9.2s,v5.2s
umlal v19.2d,v9.2s,v0.2s
umlal v23.2d,v9.2s,v7.2s
umlal v20.2d,v9.2s,v1.2s
umlal v21.2d,v9.2s,v3.2s
add v12.2s,v12.2s,v27.2s
umlal v22.2d,v10.2s,v3.2s
umlal v19.2d,v10.2s,v8.2s
umlal v23.2d,v10.2s,v5.2s
umlal v20.2d,v10.2s,v0.2s
umlal v21.2d,v10.2s,v1.2s
add v13.2s,v13.2s,v28.2s
umlal v22.2d,v12.2s,v0.2s
umlal v19.2d,v12.2s,v4.2s
umlal v23.2d,v12.2s,v1.2s
umlal v20.2d,v12.2s,v6.2s
umlal v21.2d,v12.2s,v8.2s
umlal v22.2d,v13.2s,v8.2s
umlal v19.2d,v13.2s,v2.2s
umlal v23.2d,v13.2s,v0.2s
umlal v20.2d,v13.2s,v4.2s
umlal v21.2d,v13.2s,v6.2s
.Lshort_tail:
////////////////////////////////////////////////////////////////
// horizontal add
addp v22.2d,v22.2d,v22.2d
ldp d8,d9,[sp,#16] // meet ABI requirements
addp v19.2d,v19.2d,v19.2d
ldp d10,d11,[sp,#32]
addp v23.2d,v23.2d,v23.2d
ldp d12,d13,[sp,#48]
addp v20.2d,v20.2d,v20.2d
ldp d14,d15,[sp,#64]
addp v21.2d,v21.2d,v21.2d
////////////////////////////////////////////////////////////////
// lazy reduction, but without narrowing
ushr v29.2d,v22.2d,#26
and v22.16b,v22.16b,v31.16b
ushr v30.2d,v19.2d,#26
and v19.16b,v19.16b,v31.16b
add v23.2d,v23.2d,v29.2d // h3 -> h4
add v20.2d,v20.2d,v30.2d // h0 -> h1
ushr v29.2d,v23.2d,#26
and v23.16b,v23.16b,v31.16b
ushr v30.2d,v20.2d,#26
and v20.16b,v20.16b,v31.16b
add v21.2d,v21.2d,v30.2d // h1 -> h2
add v19.2d,v19.2d,v29.2d
shl v29.2d,v29.2d,#2
ushr v30.2d,v21.2d,#26
and v21.16b,v21.16b,v31.16b
add v19.2d,v19.2d,v29.2d // h4 -> h0
add v22.2d,v22.2d,v30.2d // h2 -> h3
ushr v29.2d,v19.2d,#26
and v19.16b,v19.16b,v31.16b
ushr v30.2d,v22.2d,#26
and v22.16b,v22.16b,v31.16b
add v20.2d,v20.2d,v29.2d // h0 -> h1
add v23.2d,v23.2d,v30.2d // h3 -> h4
////////////////////////////////////////////////////////////////
// write the result, can be partially reduced
st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
st1 {v23.s}[0],[x0]
.Lno_data_neon:
ldr x29,[sp],#80
.inst 0xd50323bf // autiasp
ret
.size poly1305_blocks_neon,.-poly1305_blocks_neon
.type poly1305_emit_neon,%function
.align 5
poly1305_emit_neon:
.Lpoly1305_emit_neon:
ldr x17,[x0,#24]
cbz x17,poly1305_emit
ldp w10,w11,[x0] // load hash value base 2^26
ldp w12,w13,[x0,#8]
ldr w14,[x0,#16]
add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr x5,x12,#12
adds x4,x4,x12,lsl#52
add x5,x5,x13,lsl#14
adc x5,x5,xzr
lsr x6,x14,#24
adds x5,x5,x14,lsl#40
adc x6,x6,xzr // can be partially reduced...
ldp x10,x11,[x2] // load nonce
and x12,x6,#-4 // ... so reduce
add x12,x12,x6,lsr#2
and x6,x6,#3
adds x4,x4,x12
adcs x5,x5,xzr
adc x6,x6,xzr
adds x12,x4,#5 // compare to modulus
adcs x13,x5,xzr
adc x14,x6,xzr
tst x14,#-4 // see if it's carried/borrowed
csel x4,x4,x12,eq
csel x5,x5,x13,eq
#ifdef __ARMEB__
ror x10,x10,#32 // flip nonce words
ror x11,x11,#32
#endif
adds x4,x4,x10 // accumulate nonce
adc x5,x5,x11
#ifdef __ARMEB__
rev x4,x4 // flip output bytes
rev x5,x5
#endif
stp x4,x5,[x1] // write result
ret
.size poly1305_emit_neon,.-poly1305_emit_neon
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,811 +0,0 @@
/* Do not modify. This file is auto-generated from aesni-gcm-x86_64.pl. */
.text
.type _aesni_ctr32_ghash_6x,@function
.align 32
_aesni_ctr32_ghash_6x:
.cfi_startproc
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
vmovdqu 0-128(%rcx),%xmm15
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpaddb %xmm2,%xmm11,%xmm12
vpaddb %xmm2,%xmm12,%xmm13
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm15,%xmm1,%xmm9
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x
.align 32
.Loop6x:
addl $100663296,%ebx
jc .Lhandle_ctr32
vmovdqu 0-32(%r9),%xmm3
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm15,%xmm10,%xmm10
vpxor %xmm15,%xmm11,%xmm11
.Lresume_ctr32:
vmovdqu %xmm1,(%r8)
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
vpxor %xmm15,%xmm12,%xmm12
vmovups 16-128(%rcx),%xmm2
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
xorq %r12,%r12
cmpq %r14,%r15
vaesenc %xmm2,%xmm9,%xmm9
vmovdqu 48+8(%rsp),%xmm0
vpxor %xmm15,%xmm13,%xmm13
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
vaesenc %xmm2,%xmm10,%xmm10
vpxor %xmm15,%xmm14,%xmm14
setnc %r12b
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vaesenc %xmm2,%xmm11,%xmm11
vmovdqu 16-32(%r9),%xmm3
negq %r12
vaesenc %xmm2,%xmm12,%xmm12
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
vpxor %xmm4,%xmm8,%xmm8
vaesenc %xmm2,%xmm13,%xmm13
vpxor %xmm5,%xmm1,%xmm4
andq $0x60,%r12
vmovups 32-128(%rcx),%xmm15
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
vaesenc %xmm2,%xmm14,%xmm14
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
leaq (%r14,%r12,1),%r14
vaesenc %xmm15,%xmm9,%xmm9
vpxor 16+8(%rsp),%xmm8,%xmm8
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
vmovdqu 64+8(%rsp),%xmm0
vaesenc %xmm15,%xmm10,%xmm10
movbeq 88(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 80(%r14),%r12
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,32+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,40+8(%rsp)
vmovdqu 48-32(%r9),%xmm5
vaesenc %xmm15,%xmm14,%xmm14
vmovups 48-128(%rcx),%xmm15
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm3,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
vaesenc %xmm15,%xmm11,%xmm11
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
vmovdqu 80+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vpxor %xmm1,%xmm4,%xmm4
vmovdqu 64-32(%r9),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vmovups 64-128(%rcx),%xmm15
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
vaesenc %xmm15,%xmm10,%xmm10
movbeq 72(%r14),%r13
vpxor %xmm5,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
movbeq 64(%r14),%r12
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
vmovdqu 96+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,48+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,56+8(%rsp)
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 96-32(%r9),%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vmovups 80-128(%rcx),%xmm15
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
vaesenc %xmm15,%xmm10,%xmm10
movbeq 56(%r14),%r13
vpxor %xmm1,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
vpxor 112+8(%rsp),%xmm8,%xmm8
vaesenc %xmm15,%xmm11,%xmm11
movbeq 48(%r14),%r12
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,64+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,72+8(%rsp)
vpxor %xmm3,%xmm4,%xmm4
vmovdqu 112-32(%r9),%xmm3
vaesenc %xmm15,%xmm14,%xmm14
vmovups 96-128(%rcx),%xmm15
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
vaesenc %xmm15,%xmm10,%xmm10
movbeq 40(%r14),%r13
vpxor %xmm2,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
movbeq 32(%r14),%r12
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,80+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,88+8(%rsp)
vpxor %xmm5,%xmm6,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor %xmm1,%xmm6,%xmm6
vmovups 112-128(%rcx),%xmm15
vpslldq $8,%xmm6,%xmm5
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 16(%r11),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm8,%xmm7,%xmm7
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm5,%xmm4,%xmm4
movbeq 24(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 16(%r14),%r12
vpalignr $8,%xmm4,%xmm4,%xmm0
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
movq %r13,96+8(%rsp)
vaesenc %xmm15,%xmm12,%xmm12
movq %r12,104+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
vmovups 128-128(%rcx),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vmovups 144-128(%rcx),%xmm15
vaesenc %xmm1,%xmm10,%xmm10
vpsrldq $8,%xmm6,%xmm6
vaesenc %xmm1,%xmm11,%xmm11
vpxor %xmm6,%xmm7,%xmm7
vaesenc %xmm1,%xmm12,%xmm12
vpxor %xmm0,%xmm4,%xmm4
movbeq 8(%r14),%r13
vaesenc %xmm1,%xmm13,%xmm13
movbeq 0(%r14),%r12
vaesenc %xmm1,%xmm14,%xmm14
vmovups 160-128(%rcx),%xmm1
cmpl $11,%ebp
jb .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 176-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 192-128(%rcx),%xmm1
je .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 208-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail
.align 32
.Lhandle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vmovdqu 0-32(%r9),%xmm3
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm15,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm15,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpshufb %xmm0,%xmm14,%xmm14
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32
.align 32
.Lenc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
vpalignr $8,%xmm4,%xmm4,%xmm8
vaesenc %xmm15,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
vpxor 0(%rdi),%xmm1,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
vpxor 16(%rdi),%xmm1,%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vpxor 32(%rdi),%xmm1,%xmm5
vaesenc %xmm15,%xmm13,%xmm13
vpxor 48(%rdi),%xmm1,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor 64(%rdi),%xmm1,%xmm7
vpxor 80(%rdi),%xmm1,%xmm3
vmovdqu (%r8),%xmm1
vaesenclast %xmm2,%xmm9,%xmm9
vmovdqu 32(%r11),%xmm2
vaesenclast %xmm0,%xmm10,%xmm10
vpaddb %xmm2,%xmm1,%xmm0
movq %r13,112+8(%rsp)
leaq 96(%rdi),%rdi
vaesenclast %xmm5,%xmm11,%xmm11
vpaddb %xmm2,%xmm0,%xmm5
movq %r12,120+8(%rsp)
leaq 96(%rsi),%rsi
vmovdqu 0-128(%rcx),%xmm15
vaesenclast %xmm6,%xmm12,%xmm12
vpaddb %xmm2,%xmm5,%xmm6
vaesenclast %xmm7,%xmm13,%xmm13
vpaddb %xmm2,%xmm6,%xmm7
vaesenclast %xmm3,%xmm14,%xmm14
vpaddb %xmm2,%xmm7,%xmm3
addq $0x60,%r10
subq $0x6,%rdx
jc .L6x_done
vmovups %xmm9,-96(%rsi)
vpxor %xmm15,%xmm1,%xmm9
vmovups %xmm10,-80(%rsi)
vmovdqa %xmm0,%xmm10
vmovups %xmm11,-64(%rsi)
vmovdqa %xmm5,%xmm11
vmovups %xmm12,-48(%rsi)
vmovdqa %xmm6,%xmm12
vmovups %xmm13,-32(%rsi)
vmovdqa %xmm7,%xmm13
vmovups %xmm14,-16(%rsi)
vmovdqa %xmm3,%xmm14
vmovdqu 32+8(%rsp),%xmm7
jmp .Loop6x
.L6x_done:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
.byte 0xf3,0xc3
.cfi_endproc
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
.globl aesni_gcm_decrypt
.type aesni_gcm_decrypt,@function
.align 32
aesni_gcm_decrypt:
.cfi_startproc
xorq %r10,%r10
cmpq $0x60,%rdx
jb .Lgcm_dec_abort
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
vmovdqu (%r9),%xmm8
andq $-128,%rsp
vmovdqu (%r11),%xmm0
leaq 128(%rcx),%rcx
leaq 32+32(%r9),%r9
movl 240-128(%rcx),%ebp
vpshufb %xmm0,%xmm8,%xmm8
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Ldec_no_key_aliasing
cmpq $768,%r15
jnc .Ldec_no_key_aliasing
subq %r15,%rsp
.Ldec_no_key_aliasing:
vmovdqu 80(%rdi),%xmm7
leaq (%rdi),%r14
vmovdqu 64(%rdi),%xmm4
leaq -192(%rdi,%rdx,1),%r15
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %r10,%r10
vmovdqu 32(%rdi),%xmm6
vpshufb %xmm0,%xmm7,%xmm7
vmovdqu 16(%rdi),%xmm2
vpshufb %xmm0,%xmm4,%xmm4
vmovdqu (%rdi),%xmm3
vpshufb %xmm0,%xmm5,%xmm5
vmovdqu %xmm4,48(%rsp)
vpshufb %xmm0,%xmm6,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm2,%xmm2
vmovdqu %xmm6,80(%rsp)
vpshufb %xmm0,%xmm3,%xmm3
vmovdqu %xmm2,96(%rsp)
vmovdqu %xmm3,112(%rsp)
call _aesni_ctr32_ghash_6x
vmovups %xmm9,-96(%rsi)
vmovups %xmm10,-80(%rsi)
vmovups %xmm11,-64(%rsi)
vmovups %xmm12,-48(%rsi)
vmovups %xmm13,-32(%rsi)
vmovups %xmm14,-16(%rsi)
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lgcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
.type _aesni_ctr32_6x,@function
.align 32
_aesni_ctr32_6x:
.cfi_startproc
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -1(%rbp),%r13
vmovups 16-128(%rcx),%xmm15
leaq 32-128(%rcx),%r12
vpxor %xmm4,%xmm1,%xmm9
addl $100663296,%ebx
jc .Lhandle_ctr32_2
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddb %xmm2,%xmm11,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddb %xmm2,%xmm12,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.align 16
.Loop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vmovups (%r12),%xmm15
leaq 16(%r12),%r12
decl %r13d
jnz .Loop_ctr32
vmovdqu (%r12),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor 0(%rdi),%xmm3,%xmm4
vaesenc %xmm15,%xmm10,%xmm10
vpxor 16(%rdi),%xmm3,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
vpxor 32(%rdi),%xmm3,%xmm6
vaesenc %xmm15,%xmm12,%xmm12
vpxor 48(%rdi),%xmm3,%xmm8
vaesenc %xmm15,%xmm13,%xmm13
vpxor 64(%rdi),%xmm3,%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vpxor 80(%rdi),%xmm3,%xmm3
leaq 96(%rdi),%rdi
vaesenclast %xmm4,%xmm9,%xmm9
vaesenclast %xmm5,%xmm10,%xmm10
vaesenclast %xmm6,%xmm11,%xmm11
vaesenclast %xmm8,%xmm12,%xmm12
vaesenclast %xmm2,%xmm13,%xmm13
vaesenclast %xmm3,%xmm14,%xmm14
vmovups %xmm9,0(%rsi)
vmovups %xmm10,16(%rsi)
vmovups %xmm11,32(%rsi)
vmovups %xmm12,48(%rsi)
vmovups %xmm13,64(%rsi)
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
.byte 0xf3,0xc3
.align 32
.Lhandle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpshufb %xmm0,%xmm14,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpshufb %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.cfi_endproc
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
.globl aesni_gcm_encrypt
.type aesni_gcm_encrypt,@function
.align 32
aesni_gcm_encrypt:
.cfi_startproc
xorq %r10,%r10
cmpq $288,%rdx
jb .Lgcm_enc_abort
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
leaq 128(%rcx),%rcx
vmovdqu (%r11),%xmm0
andq $-128,%rsp
movl 240-128(%rcx),%ebp
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Lenc_no_key_aliasing
cmpq $768,%r15
jnc .Lenc_no_key_aliasing
subq %r15,%rsp
.Lenc_no_key_aliasing:
leaq (%rsi),%r14
leaq -192(%rsi,%rdx,1),%r15
shrq $4,%rdx
call _aesni_ctr32_6x
vpshufb %xmm0,%xmm9,%xmm8
vpshufb %xmm0,%xmm10,%xmm2
vmovdqu %xmm8,112(%rsp)
vpshufb %xmm0,%xmm11,%xmm4
vmovdqu %xmm2,96(%rsp)
vpshufb %xmm0,%xmm12,%xmm5
vmovdqu %xmm4,80(%rsp)
vpshufb %xmm0,%xmm13,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm14,%xmm7
vmovdqu %xmm6,48(%rsp)
call _aesni_ctr32_6x
vmovdqu (%r9),%xmm8
leaq 32+32(%r9),%r9
subq $12,%rdx
movq $192,%r10
vpshufb %xmm0,%xmm8,%xmm8
call _aesni_ctr32_ghash_6x
vmovdqu 32(%rsp),%xmm7
vmovdqu (%r11),%xmm0
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm7,%xmm7,%xmm1
vmovdqu 32-32(%r9),%xmm15
vmovups %xmm9,-96(%rsi)
vpshufb %xmm0,%xmm9,%xmm9
vpxor %xmm7,%xmm1,%xmm1
vmovups %xmm10,-80(%rsi)
vpshufb %xmm0,%xmm10,%xmm10
vmovups %xmm11,-64(%rsi)
vpshufb %xmm0,%xmm11,%xmm11
vmovups %xmm12,-48(%rsi)
vpshufb %xmm0,%xmm12,%xmm12
vmovups %xmm13,-32(%rsi)
vpshufb %xmm0,%xmm13,%xmm13
vmovups %xmm14,-16(%rsi)
vpshufb %xmm0,%xmm14,%xmm14
vmovdqu %xmm9,16(%rsp)
vmovdqu 48(%rsp),%xmm6
vmovdqu 16-32(%r9),%xmm0
vpunpckhqdq %xmm6,%xmm6,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
vpxor %xmm6,%xmm2,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vmovdqu 64(%rsp),%xmm9
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm9,%xmm9,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
vpxor %xmm9,%xmm5,%xmm5
vpxor %xmm7,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vmovdqu 80(%rsp),%xmm1
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm4,%xmm7,%xmm7
vpunpckhqdq %xmm1,%xmm1,%xmm4
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpxor %xmm6,%xmm9,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 96(%rsp),%xmm2
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm7,%xmm6,%xmm6
vpunpckhqdq %xmm2,%xmm2,%xmm7
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
vpxor %xmm2,%xmm7,%xmm7
vpxor %xmm9,%xmm1,%xmm1
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm5,%xmm4,%xmm4
vpxor 112(%rsp),%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
vmovdqu 112-32(%r9),%xmm0
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpxor %xmm6,%xmm5,%xmm5
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
vpxor %xmm4,%xmm7,%xmm4
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm1
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
vpxor %xmm14,%xmm1,%xmm1
vpxor %xmm5,%xmm6,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
vmovdqu 32-32(%r9),%xmm15
vpxor %xmm2,%xmm8,%xmm7
vpxor %xmm4,%xmm9,%xmm6
vmovdqu 16-32(%r9),%xmm0
vpxor %xmm5,%xmm7,%xmm9
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
vpxor %xmm9,%xmm6,%xmm6
vpunpckhqdq %xmm13,%xmm13,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
vpxor %xmm13,%xmm2,%xmm2
vpslldq $8,%xmm6,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vpxor %xmm9,%xmm5,%xmm8
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm6,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm12,%xmm12,%xmm9
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
vpxor %xmm12,%xmm9,%xmm9
vpxor %xmm14,%xmm13,%xmm13
vpalignr $8,%xmm8,%xmm8,%xmm14
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm11,%xmm11,%xmm1
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
vpxor %xmm11,%xmm1,%xmm1
vpxor %xmm13,%xmm12,%xmm12
vxorps 16(%rsp),%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm9,%xmm9
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm10,%xmm10,%xmm2
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
vpxor %xmm10,%xmm2,%xmm2
vpalignr $8,%xmm8,%xmm8,%xmm14
vpxor %xmm12,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm9,%xmm1,%xmm1
vxorps %xmm7,%xmm14,%xmm14
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
vmovdqu 112-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm11,%xmm10,%xmm10
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
vpxor %xmm4,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
vpxor %xmm10,%xmm7,%xmm7
vpxor %xmm2,%xmm6,%xmm6
vpxor %xmm5,%xmm7,%xmm4
vpxor %xmm4,%xmm6,%xmm6
vpslldq $8,%xmm6,%xmm1
vmovdqu 16(%r11),%xmm3
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm1,%xmm5,%xmm8
vpxor %xmm6,%xmm7,%xmm7
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm2,%xmm8,%xmm8
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm7,%xmm2,%xmm2
vpxor %xmm2,%xmm8,%xmm8
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lgcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.Lpoly:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
.Lone_msb:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
.Ltwo_lsb:
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.Lone_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,546 +0,0 @@
/* Do not modify. This file is auto-generated from keccak1600-x86_64.pl. */
.text
.type __KeccakF1600,@function
.align 32
__KeccakF1600:
.cfi_startproc
movq 60(%rdi),%rax
movq 68(%rdi),%rbx
movq 76(%rdi),%rcx
movq 84(%rdi),%rdx
movq 92(%rdi),%rbp
jmp .Loop
.align 32
.Loop:
movq -100(%rdi),%r8
movq -52(%rdi),%r9
movq -4(%rdi),%r10
movq 44(%rdi),%r11
xorq -84(%rdi),%rcx
xorq -76(%rdi),%rdx
xorq %r8,%rax
xorq -92(%rdi),%rbx
xorq -44(%rdi),%rcx
xorq -60(%rdi),%rax
movq %rbp,%r12
xorq -68(%rdi),%rbp
xorq %r10,%rcx
xorq -20(%rdi),%rax
xorq -36(%rdi),%rdx
xorq %r9,%rbx
xorq -28(%rdi),%rbp
xorq 36(%rdi),%rcx
xorq 20(%rdi),%rax
xorq 4(%rdi),%rdx
xorq -12(%rdi),%rbx
xorq 12(%rdi),%rbp
movq %rcx,%r13
rolq $1,%rcx
xorq %rax,%rcx
xorq %r11,%rdx
rolq $1,%rax
xorq %rdx,%rax
xorq 28(%rdi),%rbx
rolq $1,%rdx
xorq %rbx,%rdx
xorq 52(%rdi),%rbp
rolq $1,%rbx
xorq %rbp,%rbx
rolq $1,%rbp
xorq %r13,%rbp
xorq %rcx,%r9
xorq %rdx,%r10
rolq $44,%r9
xorq %rbp,%r11
xorq %rax,%r12
rolq $43,%r10
xorq %rbx,%r8
movq %r9,%r13
rolq $21,%r11
orq %r10,%r9
xorq %r8,%r9
rolq $14,%r12
xorq (%r15),%r9
leaq 8(%r15),%r15
movq %r12,%r14
andq %r11,%r12
movq %r9,-100(%rsi)
xorq %r10,%r12
notq %r10
movq %r12,-84(%rsi)
orq %r11,%r10
movq 76(%rdi),%r12
xorq %r13,%r10
movq %r10,-92(%rsi)
andq %r8,%r13
movq -28(%rdi),%r9
xorq %r14,%r13
movq -20(%rdi),%r10
movq %r13,-68(%rsi)
orq %r8,%r14
movq -76(%rdi),%r8
xorq %r11,%r14
movq 28(%rdi),%r11
movq %r14,-76(%rsi)
xorq %rbp,%r8
xorq %rdx,%r12
rolq $28,%r8
xorq %rcx,%r11
xorq %rax,%r9
rolq $61,%r12
rolq $45,%r11
xorq %rbx,%r10
rolq $20,%r9
movq %r8,%r13
orq %r12,%r8
rolq $3,%r10
xorq %r11,%r8
movq %r8,-36(%rsi)
movq %r9,%r14
andq %r13,%r9
movq -92(%rdi),%r8
xorq %r12,%r9
notq %r12
movq %r9,-28(%rsi)
orq %r11,%r12
movq -44(%rdi),%r9
xorq %r10,%r12
movq %r12,-44(%rsi)
andq %r10,%r11
movq 60(%rdi),%r12
xorq %r14,%r11
movq %r11,-52(%rsi)
orq %r10,%r14
movq 4(%rdi),%r10
xorq %r13,%r14
movq 52(%rdi),%r11
movq %r14,-60(%rsi)
xorq %rbp,%r10
xorq %rax,%r11
rolq $25,%r10
xorq %rdx,%r9
rolq $8,%r11
xorq %rbx,%r12
rolq $6,%r9
xorq %rcx,%r8
rolq $18,%r12
movq %r10,%r13
andq %r11,%r10
rolq $1,%r8
notq %r11
xorq %r9,%r10
movq %r10,-12(%rsi)
movq %r12,%r14
andq %r11,%r12
movq -12(%rdi),%r10
xorq %r13,%r12
movq %r12,-4(%rsi)
orq %r9,%r13
movq 84(%rdi),%r12
xorq %r8,%r13
movq %r13,-20(%rsi)
andq %r8,%r9
xorq %r14,%r9
movq %r9,12(%rsi)
orq %r8,%r14
movq -60(%rdi),%r9
xorq %r11,%r14
movq 36(%rdi),%r11
movq %r14,4(%rsi)
movq -68(%rdi),%r8
xorq %rcx,%r10
xorq %rdx,%r11
rolq $10,%r10
xorq %rbx,%r9
rolq $15,%r11
xorq %rbp,%r12
rolq $36,%r9
xorq %rax,%r8
rolq $56,%r12
movq %r10,%r13
orq %r11,%r10
rolq $27,%r8
notq %r11
xorq %r9,%r10
movq %r10,28(%rsi)
movq %r12,%r14
orq %r11,%r12
xorq %r13,%r12
movq %r12,36(%rsi)
andq %r9,%r13
xorq %r8,%r13
movq %r13,20(%rsi)
orq %r8,%r9
xorq %r14,%r9
movq %r9,52(%rsi)
andq %r14,%r8
xorq %r11,%r8
movq %r8,44(%rsi)
xorq -84(%rdi),%rdx
xorq -36(%rdi),%rbp
rolq $62,%rdx
xorq 68(%rdi),%rcx
rolq $55,%rbp
xorq 12(%rdi),%rax
rolq $2,%rcx
xorq 20(%rdi),%rbx
xchgq %rsi,%rdi
rolq $39,%rax
rolq $41,%rbx
movq %rdx,%r13
andq %rbp,%rdx
notq %rbp
xorq %rcx,%rdx
movq %rdx,92(%rdi)
movq %rax,%r14
andq %rbp,%rax
xorq %r13,%rax
movq %rax,60(%rdi)
orq %rcx,%r13
xorq %rbx,%r13
movq %r13,84(%rdi)
andq %rbx,%rcx
xorq %r14,%rcx
movq %rcx,76(%rdi)
orq %r14,%rbx
xorq %rbp,%rbx
movq %rbx,68(%rdi)
movq %rdx,%rbp
movq %r13,%rdx
testq $255,%r15
jnz .Loop
leaq -192(%r15),%r15
.byte 0xf3,0xc3
.cfi_endproc
.size __KeccakF1600,.-__KeccakF1600
.type KeccakF1600,@function
.align 32
KeccakF1600:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq 100(%rdi),%rdi
subq $200,%rsp
.cfi_adjust_cfa_offset 200
notq -92(%rdi)
notq -84(%rdi)
notq -36(%rdi)
notq -4(%rdi)
notq 36(%rdi)
notq 60(%rdi)
leaq iotas(%rip),%r15
leaq 100(%rsp),%rsi
call __KeccakF1600
notq -92(%rdi)
notq -84(%rdi)
notq -36(%rdi)
notq -4(%rdi)
notq 36(%rdi)
notq 60(%rdi)
leaq -100(%rdi),%rdi
addq $200,%rsp
.cfi_adjust_cfa_offset -200
popq %r15
.cfi_adjust_cfa_offset -8
.cfi_restore %r15
popq %r14
.cfi_adjust_cfa_offset -8
.cfi_restore %r14
popq %r13
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r12
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_restore %rbp
popq %rbx
.cfi_adjust_cfa_offset -8
.cfi_restore %rbx
.byte 0xf3,0xc3
.cfi_endproc
.size KeccakF1600,.-KeccakF1600
.globl SHA3_absorb
.type SHA3_absorb,@function
.align 32
SHA3_absorb:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq 100(%rdi),%rdi
subq $232,%rsp
.cfi_adjust_cfa_offset 232
movq %rsi,%r9
leaq 100(%rsp),%rsi
notq -92(%rdi)
notq -84(%rdi)
notq -36(%rdi)
notq -4(%rdi)
notq 36(%rdi)
notq 60(%rdi)
leaq iotas(%rip),%r15
movq %rcx,216-100(%rsi)
.Loop_absorb:
cmpq %rcx,%rdx
jc .Ldone_absorb
shrq $3,%rcx
leaq -100(%rdi),%r8
.Lblock_absorb:
movq (%r9),%rax
leaq 8(%r9),%r9
xorq (%r8),%rax
leaq 8(%r8),%r8
subq $8,%rdx
movq %rax,-8(%r8)
subq $1,%rcx
jnz .Lblock_absorb
movq %r9,200-100(%rsi)
movq %rdx,208-100(%rsi)
call __KeccakF1600
movq 200-100(%rsi),%r9
movq 208-100(%rsi),%rdx
movq 216-100(%rsi),%rcx
jmp .Loop_absorb
.align 32
.Ldone_absorb:
movq %rdx,%rax
notq -92(%rdi)
notq -84(%rdi)
notq -36(%rdi)
notq -4(%rdi)
notq 36(%rdi)
notq 60(%rdi)
addq $232,%rsp
.cfi_adjust_cfa_offset -232
popq %r15
.cfi_adjust_cfa_offset -8
.cfi_restore %r15
popq %r14
.cfi_adjust_cfa_offset -8
.cfi_restore %r14
popq %r13
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r12
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_restore %rbp
popq %rbx
.cfi_adjust_cfa_offset -8
.cfi_restore %rbx
.byte 0xf3,0xc3
.cfi_endproc
.size SHA3_absorb,.-SHA3_absorb
.globl SHA3_squeeze
.type SHA3_squeeze,@function
.align 32
SHA3_squeeze:
.cfi_startproc
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-16
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-24
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-32
shrq $3,%rcx
movq %rdi,%r8
movq %rsi,%r12
movq %rdx,%r13
movq %rcx,%r14
jmp .Loop_squeeze
.align 32
.Loop_squeeze:
cmpq $8,%r13
jb .Ltail_squeeze
movq (%r8),%rax
leaq 8(%r8),%r8
movq %rax,(%r12)
leaq 8(%r12),%r12
subq $8,%r13
jz .Ldone_squeeze
subq $1,%rcx
jnz .Loop_squeeze
call KeccakF1600
movq %rdi,%r8
movq %r14,%rcx
jmp .Loop_squeeze
.Ltail_squeeze:
movq %r8,%rsi
movq %r12,%rdi
movq %r13,%rcx
.byte 0xf3,0xa4
.Ldone_squeeze:
popq %r14
.cfi_adjust_cfa_offset -8
.cfi_restore %r14
popq %r13
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r13
.byte 0xf3,0xc3
.cfi_endproc
.size SHA3_squeeze,.-SHA3_squeeze
.align 256
.quad 0,0,0,0,0,0,0,0
.type iotas,@object
iotas:
.quad 0x0000000000000001
.quad 0x0000000000008082
.quad 0x800000000000808a
.quad 0x8000000080008000
.quad 0x000000000000808b
.quad 0x0000000080000001
.quad 0x8000000080008081
.quad 0x8000000000008009
.quad 0x000000000000008a
.quad 0x0000000000000088
.quad 0x0000000080008009
.quad 0x000000008000000a
.quad 0x000000008000808b
.quad 0x800000000000008b
.quad 0x8000000000008089
.quad 0x8000000000008003
.quad 0x8000000000008002
.quad 0x8000000000000080
.quad 0x000000000000800a
.quad 0x800000008000000a
.quad 0x8000000080008081
.quad 0x8000000000008080
.quad 0x0000000080000001
.quad 0x8000000080008008
.size iotas,.-iotas
.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
@@ -1,705 +0,0 @@
/* Do not modify. This file is auto-generated from md5-x86_64.pl. */
.text
.align 16
.globl ossl_md5_block_asm_data_order
.type ossl_md5_block_asm_data_order,@function
ossl_md5_block_asm_data_order:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-40
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-48
.Lprologue:
movq %rdi,%rbp
shlq $6,%rdx
leaq (%rsi,%rdx,1),%rdi
movl 0(%rbp),%eax
movl 4(%rbp),%ebx
movl 8(%rbp),%ecx
movl 12(%rbp),%edx
cmpq %rdi,%rsi
je .Lend
.Lloop:
movl %eax,%r8d
movl %ebx,%r9d
movl %ecx,%r14d
movl %edx,%r15d
movl 0(%rsi),%r10d
movl %edx,%r11d
xorl %ecx,%r11d
leal -680876936(%rax,%r10,1),%eax
andl %ebx,%r11d
movl 4(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -389564586(%rdx,%r10,1),%edx
andl %eax,%r11d
movl 8(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal 606105819(%rcx,%r10,1),%ecx
andl %edx,%r11d
movl 12(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1044525330(%rbx,%r10,1),%ebx
andl %ecx,%r11d
movl 16(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal -176418897(%rax,%r10,1),%eax
andl %ebx,%r11d
movl 20(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal 1200080426(%rdx,%r10,1),%edx
andl %eax,%r11d
movl 24(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1473231341(%rcx,%r10,1),%ecx
andl %edx,%r11d
movl 28(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -45705983(%rbx,%r10,1),%ebx
andl %ecx,%r11d
movl 32(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1770035416(%rax,%r10,1),%eax
andl %ebx,%r11d
movl 36(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -1958414417(%rdx,%r10,1),%edx
andl %eax,%r11d
movl 40(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -42063(%rcx,%r10,1),%ecx
andl %edx,%r11d
movl 44(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1990404162(%rbx,%r10,1),%ebx
andl %ecx,%r11d
movl 48(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1804603682(%rax,%r10,1),%eax
andl %ebx,%r11d
movl 52(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -40341101(%rdx,%r10,1),%edx
andl %eax,%r11d
movl 56(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1502002290(%rcx,%r10,1),%ecx
andl %edx,%r11d
movl 60(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal 1236535329(%rbx,%r10,1),%ebx
andl %ecx,%r11d
movl 4(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
movl %edx,%r11d
movl %edx,%r12d
notl %r11d
andl %ebx,%r12d
leal -165796510(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 24(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
andl %eax,%r12d
leal -1069501632(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 44(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
andl %edx,%r12d
leal 643717713(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
andl %ecx,%r12d
leal -373897302(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 20(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
andl %ebx,%r12d
leal -701558691(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 40(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
andl %eax,%r12d
leal 38016083(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 60(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
andl %edx,%r12d
leal -660478335(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 16(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
andl %ecx,%r12d
leal -405537848(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 36(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
andl %ebx,%r12d
leal 568446438(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 56(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
andl %eax,%r12d
leal -1019803690(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 12(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
andl %edx,%r12d
leal -187363961(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 32(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
andl %ecx,%r12d
leal 1163531501(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 52(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
andl %ebx,%r12d
leal -1444681467(%rax,%r10,1),%eax
andl %ecx,%r11d
movl 8(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
andl %eax,%r12d
leal -51403784(%rdx,%r10,1),%edx
andl %ebx,%r11d
movl 28(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
andl %edx,%r12d
leal 1735328473(%rcx,%r10,1),%ecx
andl %eax,%r11d
movl 48(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
andl %ecx,%r12d
leal -1926607734(%rbx,%r10,1),%ebx
andl %edx,%r11d
movl 20(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
movl %ecx,%r11d
leal -378558(%rax,%r10,1),%eax
xorl %edx,%r11d
movl 32(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%eax
movl %ebx,%r11d
roll $4,%eax
addl %ebx,%eax
leal -2022574463(%rdx,%r10,1),%edx
xorl %ecx,%r11d
movl 44(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 1839030562(%rcx,%r10,1),%ecx
xorl %ebx,%r11d
movl 56(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ecx
movl %edx,%r11d
roll $16,%ecx
addl %edx,%ecx
leal -35309556(%rbx,%r10,1),%ebx
xorl %eax,%r11d
movl 4(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -1530992060(%rax,%r10,1),%eax
xorl %edx,%r11d
movl 16(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%eax
movl %ebx,%r11d
roll $4,%eax
addl %ebx,%eax
leal 1272893353(%rdx,%r10,1),%edx
xorl %ecx,%r11d
movl 28(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -155497632(%rcx,%r10,1),%ecx
xorl %ebx,%r11d
movl 40(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ecx
movl %edx,%r11d
roll $16,%ecx
addl %edx,%ecx
leal -1094730640(%rbx,%r10,1),%ebx
xorl %eax,%r11d
movl 52(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal 681279174(%rax,%r10,1),%eax
xorl %edx,%r11d
movl 0(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%eax
movl %ebx,%r11d
roll $4,%eax
addl %ebx,%eax
leal -358537222(%rdx,%r10,1),%edx
xorl %ecx,%r11d
movl 12(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -722521979(%rcx,%r10,1),%ecx
xorl %ebx,%r11d
movl 24(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ecx
movl %edx,%r11d
roll $16,%ecx
addl %edx,%ecx
leal 76029189(%rbx,%r10,1),%ebx
xorl %eax,%r11d
movl 36(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -640364487(%rax,%r10,1),%eax
xorl %edx,%r11d
movl 48(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%eax
movl %ebx,%r11d
roll $4,%eax
addl %ebx,%eax
leal -421815835(%rdx,%r10,1),%edx
xorl %ecx,%r11d
movl 60(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 530742520(%rcx,%r10,1),%ecx
xorl %ebx,%r11d
movl 8(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ecx
movl %edx,%r11d
roll $16,%ecx
addl %edx,%ecx
leal -995338651(%rbx,%r10,1),%ebx
xorl %eax,%r11d
movl 0(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
movl $0xffffffff,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
movl 28(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%eax
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal 1126891415(%rdx,%r10,1),%edx
orl %eax,%r11d
movl 56(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%edx
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1416354905(%rcx,%r10,1),%ecx
orl %edx,%r11d
movl 20(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ecx
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -57434055(%rbx,%r10,1),%ebx
orl %ecx,%r11d
movl 48(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ebx
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1700485571(%rax,%r10,1),%eax
orl %ebx,%r11d
movl 12(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%eax
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1894986606(%rdx,%r10,1),%edx
orl %eax,%r11d
movl 40(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%edx
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1051523(%rcx,%r10,1),%ecx
orl %edx,%r11d
movl 4(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ecx
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -2054922799(%rbx,%r10,1),%ebx
orl %ecx,%r11d
movl 32(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ebx
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1873313359(%rax,%r10,1),%eax
orl %ebx,%r11d
movl 60(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%eax
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -30611744(%rdx,%r10,1),%edx
orl %eax,%r11d
movl 24(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%edx
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1560198380(%rcx,%r10,1),%ecx
orl %edx,%r11d
movl 52(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ecx
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal 1309151649(%rbx,%r10,1),%ebx
orl %ecx,%r11d
movl 16(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ebx
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal -145523070(%rax,%r10,1),%eax
orl %ebx,%r11d
movl 44(%rsi),%r10d
xorl %ecx,%r11d
addl %r11d,%eax
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1120210379(%rdx,%r10,1),%edx
orl %eax,%r11d
movl 8(%rsi),%r10d
xorl %ebx,%r11d
addl %r11d,%edx
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal 718787259(%rcx,%r10,1),%ecx
orl %edx,%r11d
movl 36(%rsi),%r10d
xorl %eax,%r11d
addl %r11d,%ecx
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -343485551(%rbx,%r10,1),%ebx
orl %ecx,%r11d
movl 0(%rsi),%r10d
xorl %edx,%r11d
addl %r11d,%ebx
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
addl %r8d,%eax
addl %r9d,%ebx
addl %r14d,%ecx
addl %r15d,%edx
addq $64,%rsi
cmpq %rdi,%rsi
jb .Lloop
.Lend:
movl %eax,0(%rbp)
movl %ebx,4(%rbp)
movl %ecx,8(%rbp)
movl %edx,12(%rbp)
movq (%rsp),%r15
.cfi_restore %r15
movq 8(%rsp),%r14
.cfi_restore %r14
movq 16(%rsp),%r12
.cfi_restore %r12
movq 24(%rsp),%rbx
.cfi_restore %rbx
movq 32(%rsp),%rbp
.cfi_restore %rbp
addq $40,%rsp
.cfi_adjust_cfa_offset -40
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size ossl_md5_block_asm_data_order,.-ossl_md5_block_asm_data_order
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,657 +0,0 @@
/* Do not modify. This file is auto-generated from rc4-x86_64.pl. */
.text
.globl RC4
.type RC4,@function
.align 16
RC4:
.cfi_startproc
.byte 243,15,30,250
orq %rsi,%rsi
jne .Lentry
.byte 0xf3,0xc3
.Lentry:
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-24
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-32
.Lprologue:
movq %rsi,%r11
movq %rdx,%r12
movq %rcx,%r13
xorq %r10,%r10
xorq %rcx,%rcx
leaq 8(%rdi),%rdi
movb -8(%rdi),%r10b
movb -4(%rdi),%cl
cmpl $-1,256(%rdi)
je .LRC4_CHAR
movl OPENSSL_ia32cap_P(%rip),%r8d
xorq %rbx,%rbx
incb %r10b
subq %r10,%rbx
subq %r12,%r13
movl (%rdi,%r10,4),%eax
testq $-16,%r11
jz .Lloop1
btl $30,%r8d
jc .Lintel
andq $7,%rbx
leaq 1(%r10),%rsi
jz .Loop8
subq %rbx,%r11
.Loop8_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop8_warmup
leaq 1(%r10),%rsi
jmp .Loop8
.align 16
.Loop8:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 0(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,0(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,4(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 8(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,8(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 12(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,12(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 16(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,16(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 20(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,20(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 24(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,24(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%sil
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl -4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,28(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%r10b
rorq $8,%r8
subq $8,%r11
xorq (%r12),%r8
movq %r8,(%r12,%r13,1)
leaq 8(%r12),%r12
testq $-8,%r11
jnz .Loop8
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.align 16
.Lintel:
testq $-32,%r11
jz .Lloop1
andq $15,%rbx
jz .Loop16_is_hot
subq %rbx,%r11
.Loop16_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop16_warmup
movq %rcx,%rbx
xorq %rcx,%rcx
movb %bl,%cl
.Loop16_is_hot:
leaq (%rdi,%r10,4),%rsi
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
jmp .Loop16_enter
.align 16
.Loop16:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm2
psllq $8,%xmm1
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
pxor %xmm1,%xmm2
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
.Loop16_enter:
movl (%rdi,%rcx,4),%edx
pxor %xmm1,%xmm1
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 8(%rsi),%eax
movzbl %bl,%ebx
movl %edx,4(%rsi)
addb %al,%cl
pinsrw $0,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 12(%rsi),%ebx
movzbl %al,%eax
movl %edx,8(%rsi)
addb %bl,%cl
pinsrw $1,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 16(%rsi),%eax
movzbl %bl,%ebx
movl %edx,12(%rsi)
addb %al,%cl
pinsrw $1,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 20(%rsi),%ebx
movzbl %al,%eax
movl %edx,16(%rsi)
addb %bl,%cl
pinsrw $2,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 24(%rsi),%eax
movzbl %bl,%ebx
movl %edx,20(%rsi)
addb %al,%cl
pinsrw $2,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 28(%rsi),%ebx
movzbl %al,%eax
movl %edx,24(%rsi)
addb %bl,%cl
pinsrw $3,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 32(%rsi),%eax
movzbl %bl,%ebx
movl %edx,28(%rsi)
addb %al,%cl
pinsrw $3,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 36(%rsi),%ebx
movzbl %al,%eax
movl %edx,32(%rsi)
addb %bl,%cl
pinsrw $4,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 40(%rsi),%eax
movzbl %bl,%ebx
movl %edx,36(%rsi)
addb %al,%cl
pinsrw $4,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 44(%rsi),%ebx
movzbl %al,%eax
movl %edx,40(%rsi)
addb %bl,%cl
pinsrw $5,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 48(%rsi),%eax
movzbl %bl,%ebx
movl %edx,44(%rsi)
addb %al,%cl
pinsrw $5,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 52(%rsi),%ebx
movzbl %al,%eax
movl %edx,48(%rsi)
addb %bl,%cl
pinsrw $6,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 56(%rsi),%eax
movzbl %bl,%ebx
movl %edx,52(%rsi)
addb %al,%cl
pinsrw $6,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 60(%rsi),%ebx
movzbl %al,%eax
movl %edx,56(%rsi)
addb %bl,%cl
pinsrw $7,(%rdi,%rax,4),%xmm0
addb $16,%r10b
movdqu (%r12),%xmm2
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movzbl %bl,%ebx
movl %edx,60(%rsi)
leaq (%rdi,%r10,4),%rsi
pinsrw $7,(%rdi,%rbx,4),%xmm1
movl (%rsi),%eax
movq %rcx,%rbx
xorq %rcx,%rcx
subq $16,%r11
movb %bl,%cl
testq $-16,%r11
jnz .Loop16
psllq $8,%xmm1
pxor %xmm0,%xmm2
pxor %xmm1,%xmm2
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.align 16
.Lloop1:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %r11
jnz .Lloop1
jmp .Lexit
.align 16
.LRC4_CHAR:
addb $1,%r10b
movzbl (%rdi,%r10,1),%eax
testq $-8,%r11
jz .Lcloop1
jmp .Lcloop8
.align 16
.Lcloop8:
movl (%r12),%r8d
movl 4(%r12),%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov0
movq %rax,%rbx
.Lcmov0:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov1
movq %rbx,%rax
.Lcmov1:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov2
movq %rax,%rbx
.Lcmov2:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov3
movq %rbx,%rax
.Lcmov3:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov4
movq %rax,%rbx
.Lcmov4:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov5
movq %rbx,%rax
.Lcmov5:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov6
movq %rax,%rbx
.Lcmov6:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov7
movq %rbx,%rax
.Lcmov7:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
leaq -8(%r11),%r11
movl %r8d,(%r13)
leaq 8(%r12),%r12
movl %r9d,4(%r13)
leaq 8(%r13),%r13
testq $-8,%r11
jnz .Lcloop8
cmpq $0,%r11
jne .Lcloop1
jmp .Lexit
.align 16
.Lcloop1:
addb %al,%cl
movzbl %cl,%ecx
movzbl (%rdi,%rcx,1),%edx
movb %al,(%rdi,%rcx,1)
movb %dl,(%rdi,%r10,1)
addb %al,%dl
addb $1,%r10b
movzbl %dl,%edx
movzbl %r10b,%r10d
movzbl (%rdi,%rdx,1),%edx
movzbl (%rdi,%r10,1),%eax
xorb (%r12),%dl
leaq 1(%r12),%r12
movb %dl,(%r13)
leaq 1(%r13),%r13
subq $1,%r11
jnz .Lcloop1
jmp .Lexit
.align 16
.Lexit:
subb $1,%r10b
movl %r10d,-8(%rdi)
movl %ecx,-4(%rdi)
movq (%rsp),%r13
.cfi_restore %r13
movq 8(%rsp),%r12
.cfi_restore %r12
movq 16(%rsp),%rbx
.cfi_restore %rbx
addq $24,%rsp
.cfi_adjust_cfa_offset -24
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size RC4,.-RC4
.globl RC4_set_key
.type RC4_set_key,@function
.align 16
RC4_set_key:
.cfi_startproc
.byte 243,15,30,250
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
movq %rsi,%rcx
xorl %eax,%eax
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
movl OPENSSL_ia32cap_P(%rip),%r8d
btl $20,%r8d
jc .Lc1stloop
jmp .Lw1stloop
.align 16
.Lw1stloop:
movl %eax,(%rdi,%rax,4)
addb $1,%al
jnc .Lw1stloop
xorq %r9,%r9
xorq %r8,%r8
.align 16
.Lw2ndloop:
movl (%rdi,%r9,4),%r10d
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movl (%rdi,%r8,4),%r11d
cmovzq %rcx,%rsi
movl %r10d,(%rdi,%r8,4)
movl %r11d,(%rdi,%r9,4)
addb $1,%r9b
jnc .Lw2ndloop
jmp .Lexit_key
.align 16
.Lc1stloop:
movb %al,(%rdi,%rax,1)
addb $1,%al
jnc .Lc1stloop
xorq %r9,%r9
xorq %r8,%r8
.align 16
.Lc2ndloop:
movb (%rdi,%r9,1),%r10b
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movb (%rdi,%r8,1),%r11b
jnz .Lcnowrap
movq %rcx,%rsi
.Lcnowrap:
movb %r10b,(%rdi,%r8,1)
movb %r11b,(%rdi,%r9,1)
addb $1,%r9b
jnc .Lc2ndloop
movl $-1,256(%rdi)
.align 16
.Lexit_key:
xorl %eax,%eax
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size RC4_set_key,.-RC4_set_key
.globl RC4_options
.type RC4_options,@function
.align 16
RC4_options:
.cfi_startproc
.byte 243,15,30,250
leaq .Lopts(%rip),%rax
movl OPENSSL_ia32cap_P(%rip),%edx
btl $20,%edx
jc .L8xchar
btl $30,%edx
jnc .Ldone
addq $25,%rax
.byte 0xf3,0xc3
.L8xchar:
addq $12,%rax
.Ldone:
.byte 0xf3,0xc3
.cfi_endproc
.align 64
.Lopts:
.byte 114,99,52,40,56,120,44,105,110,116,41,0
.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.size RC4_options,.-RC4_options
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
File diff suppressed because it is too large Load Diff
@@ -1,902 +0,0 @@
/* Do not modify. This file is auto-generated from rsaz-avx512.pl. */
.globl ossl_rsaz_avx512ifma_eligible
.type ossl_rsaz_avx512ifma_eligible,@function
.align 32
ossl_rsaz_avx512ifma_eligible:
movl OPENSSL_ia32cap_P+8(%rip),%ecx
xorl %eax,%eax
andl $2149777408,%ecx
cmpl $2149777408,%ecx
cmovel %ecx,%eax
.byte 0xf3,0xc3
.size ossl_rsaz_avx512ifma_eligible, .-ossl_rsaz_avx512ifma_eligible
.text
.globl ossl_rsaz_amm52x20_x1_256
.type ossl_rsaz_amm52x20_x1_256,@function
.align 32
ossl_rsaz_amm52x20_x1_256:
.cfi_startproc
.byte 243,15,30,250
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
.Lrsaz_amm52x20_x1_256_body:
vpxord %ymm0,%ymm0,%ymm0
vmovdqa64 %ymm0,%ymm1
vmovdqa64 %ymm0,%ymm16
vmovdqa64 %ymm0,%ymm17
vmovdqa64 %ymm0,%ymm18
vmovdqa64 %ymm0,%ymm19
xorl %r9d,%r9d
movq %rdx,%r11
movq $0xfffffffffffff,%rax
movl $5,%ebx
.align 32
.Lloop5:
movq 0(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq %r8,%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
movq 8(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq %r8,%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
movq 16(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq %r8,%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
movq 24(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq %r8,%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
leaq 32(%r11),%r11
decl %ebx
jne .Lloop5
vmovdqa64 .Lmask52x4(%rip),%ymm4
vpbroadcastq %r9,%ymm3
vpblendd $3,%ymm3,%ymm1,%ymm1
vpsrlq $52,%ymm1,%ymm24
vpsrlq $52,%ymm16,%ymm25
vpsrlq $52,%ymm17,%ymm26
vpsrlq $52,%ymm18,%ymm27
vpsrlq $52,%ymm19,%ymm28
valignq $3,%ymm27,%ymm28,%ymm28
valignq $3,%ymm26,%ymm27,%ymm27
valignq $3,%ymm25,%ymm26,%ymm26
valignq $3,%ymm24,%ymm25,%ymm25
valignq $3,%ymm0,%ymm24,%ymm24
vpandq %ymm4,%ymm1,%ymm1
vpandq %ymm4,%ymm16,%ymm16
vpandq %ymm4,%ymm17,%ymm17
vpandq %ymm4,%ymm18,%ymm18
vpandq %ymm4,%ymm19,%ymm19
vpaddq %ymm24,%ymm1,%ymm1
vpaddq %ymm25,%ymm16,%ymm16
vpaddq %ymm26,%ymm17,%ymm17
vpaddq %ymm27,%ymm18,%ymm18
vpaddq %ymm28,%ymm19,%ymm19
vpcmpuq $1,%ymm1,%ymm4,%k1
vpcmpuq $1,%ymm16,%ymm4,%k2
vpcmpuq $1,%ymm17,%ymm4,%k3
vpcmpuq $1,%ymm18,%ymm4,%k4
vpcmpuq $1,%ymm19,%ymm4,%k5
kmovb %k1,%r14d
kmovb %k2,%r13d
kmovb %k3,%r12d
kmovb %k4,%r11d
kmovb %k5,%r10d
vpcmpuq $0,%ymm1,%ymm4,%k1
vpcmpuq $0,%ymm16,%ymm4,%k2
vpcmpuq $0,%ymm17,%ymm4,%k3
vpcmpuq $0,%ymm18,%ymm4,%k4
vpcmpuq $0,%ymm19,%ymm4,%k5
kmovb %k1,%r9d
kmovb %k2,%r8d
kmovb %k3,%ebx
kmovb %k4,%ecx
kmovb %k5,%edx
shlb $4,%r13b
orb %r13b,%r14b
shlb $4,%r11b
orb %r11b,%r12b
addb %r14b,%r14b
adcb %r12b,%r12b
adcb %r10b,%r10b
shlb $4,%r8b
orb %r8b,%r9b
shlb $4,%cl
orb %cl,%bl
addb %r9b,%r14b
adcb %bl,%r12b
adcb %dl,%r10b
xorb %r9b,%r14b
xorb %bl,%r12b
xorb %dl,%r10b
kmovb %r14d,%k1
shrb $4,%r14b
kmovb %r14d,%k2
kmovb %r12d,%k3
shrb $4,%r12b
kmovb %r12d,%k4
kmovb %r10d,%k5
vpsubq %ymm4,%ymm1,%ymm1{%k1}
vpsubq %ymm4,%ymm16,%ymm16{%k2}
vpsubq %ymm4,%ymm17,%ymm17{%k3}
vpsubq %ymm4,%ymm18,%ymm18{%k4}
vpsubq %ymm4,%ymm19,%ymm19{%k5}
vpandq %ymm4,%ymm1,%ymm1
vpandq %ymm4,%ymm16,%ymm16
vpandq %ymm4,%ymm17,%ymm17
vpandq %ymm4,%ymm18,%ymm18
vpandq %ymm4,%ymm19,%ymm19
vmovdqu64 %ymm1,(%rdi)
vmovdqu64 %ymm16,32(%rdi)
vmovdqu64 %ymm17,64(%rdi)
vmovdqu64 %ymm18,96(%rdi)
vmovdqu64 %ymm19,128(%rdi)
vzeroupper
movq 0(%rsp),%r15
.cfi_restore %r15
movq 8(%rsp),%r14
.cfi_restore %r14
movq 16(%rsp),%r13
.cfi_restore %r13
movq 24(%rsp),%r12
.cfi_restore %r12
movq 32(%rsp),%rbp
.cfi_restore %rbp
movq 40(%rsp),%rbx
.cfi_restore %rbx
leaq 48(%rsp),%rsp
.cfi_adjust_cfa_offset -48
.Lrsaz_amm52x20_x1_256_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size ossl_rsaz_amm52x20_x1_256, .-ossl_rsaz_amm52x20_x1_256
.data
.align 32
.Lmask52x4:
.quad 0xfffffffffffff
.quad 0xfffffffffffff
.quad 0xfffffffffffff
.quad 0xfffffffffffff
.text
.globl ossl_rsaz_amm52x20_x2_256
.type ossl_rsaz_amm52x20_x2_256,@function
.align 32
ossl_rsaz_amm52x20_x2_256:
.cfi_startproc
.byte 243,15,30,250
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
.Lrsaz_amm52x20_x2_256_body:
vpxord %ymm0,%ymm0,%ymm0
vmovdqa64 %ymm0,%ymm1
vmovdqa64 %ymm0,%ymm16
vmovdqa64 %ymm0,%ymm17
vmovdqa64 %ymm0,%ymm18
vmovdqa64 %ymm0,%ymm19
vmovdqa64 %ymm0,%ymm2
vmovdqa64 %ymm0,%ymm20
vmovdqa64 %ymm0,%ymm21
vmovdqa64 %ymm0,%ymm22
vmovdqa64 %ymm0,%ymm23
xorl %r9d,%r9d
xorl %r15d,%r15d
movq %rdx,%r11
movq $0xfffffffffffff,%rax
movl $20,%ebx
.align 32
.Lloop20:
movq 0(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq (%r8),%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
movq 160(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 160(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r15
movq %r12,%r10
adcq $0,%r10
movq 8(%r8),%r13
imulq %r15,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 160(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r15
adcq %r12,%r10
shrq $52,%r15
salq $12,%r10
orq %r10,%r15
vpmadd52luq 160(%rsi),%ymm3,%ymm2
vpmadd52luq 192(%rsi),%ymm3,%ymm20
vpmadd52luq 224(%rsi),%ymm3,%ymm21
vpmadd52luq 256(%rsi),%ymm3,%ymm22
vpmadd52luq 288(%rsi),%ymm3,%ymm23
vpmadd52luq 160(%rcx),%ymm4,%ymm2
vpmadd52luq 192(%rcx),%ymm4,%ymm20
vpmadd52luq 224(%rcx),%ymm4,%ymm21
vpmadd52luq 256(%rcx),%ymm4,%ymm22
vpmadd52luq 288(%rcx),%ymm4,%ymm23
valignq $1,%ymm2,%ymm20,%ymm2
valignq $1,%ymm20,%ymm21,%ymm20
valignq $1,%ymm21,%ymm22,%ymm21
valignq $1,%ymm22,%ymm23,%ymm22
valignq $1,%ymm23,%ymm0,%ymm23
vmovq %xmm2,%r13
addq %r13,%r15
vpmadd52huq 160(%rsi),%ymm3,%ymm2
vpmadd52huq 192(%rsi),%ymm3,%ymm20
vpmadd52huq 224(%rsi),%ymm3,%ymm21
vpmadd52huq 256(%rsi),%ymm3,%ymm22
vpmadd52huq 288(%rsi),%ymm3,%ymm23
vpmadd52huq 160(%rcx),%ymm4,%ymm2
vpmadd52huq 192(%rcx),%ymm4,%ymm20
vpmadd52huq 224(%rcx),%ymm4,%ymm21
vpmadd52huq 256(%rcx),%ymm4,%ymm22
vpmadd52huq 288(%rcx),%ymm4,%ymm23
leaq 8(%r11),%r11
decl %ebx
jne .Lloop20
vmovdqa64 .Lmask52x4(%rip),%ymm4
vpbroadcastq %r9,%ymm3
vpblendd $3,%ymm3,%ymm1,%ymm1
vpsrlq $52,%ymm1,%ymm24
vpsrlq $52,%ymm16,%ymm25
vpsrlq $52,%ymm17,%ymm26
vpsrlq $52,%ymm18,%ymm27
vpsrlq $52,%ymm19,%ymm28
valignq $3,%ymm27,%ymm28,%ymm28
valignq $3,%ymm26,%ymm27,%ymm27
valignq $3,%ymm25,%ymm26,%ymm26
valignq $3,%ymm24,%ymm25,%ymm25
valignq $3,%ymm0,%ymm24,%ymm24
vpandq %ymm4,%ymm1,%ymm1
vpandq %ymm4,%ymm16,%ymm16
vpandq %ymm4,%ymm17,%ymm17
vpandq %ymm4,%ymm18,%ymm18
vpandq %ymm4,%ymm19,%ymm19
vpaddq %ymm24,%ymm1,%ymm1
vpaddq %ymm25,%ymm16,%ymm16
vpaddq %ymm26,%ymm17,%ymm17
vpaddq %ymm27,%ymm18,%ymm18
vpaddq %ymm28,%ymm19,%ymm19
vpcmpuq $1,%ymm1,%ymm4,%k1
vpcmpuq $1,%ymm16,%ymm4,%k2
vpcmpuq $1,%ymm17,%ymm4,%k3
vpcmpuq $1,%ymm18,%ymm4,%k4
vpcmpuq $1,%ymm19,%ymm4,%k5
kmovb %k1,%r14d
kmovb %k2,%r13d
kmovb %k3,%r12d
kmovb %k4,%r11d
kmovb %k5,%r10d
vpcmpuq $0,%ymm1,%ymm4,%k1
vpcmpuq $0,%ymm16,%ymm4,%k2
vpcmpuq $0,%ymm17,%ymm4,%k3
vpcmpuq $0,%ymm18,%ymm4,%k4
vpcmpuq $0,%ymm19,%ymm4,%k5
kmovb %k1,%r9d
kmovb %k2,%r8d
kmovb %k3,%ebx
kmovb %k4,%ecx
kmovb %k5,%edx
shlb $4,%r13b
orb %r13b,%r14b
shlb $4,%r11b
orb %r11b,%r12b
addb %r14b,%r14b
adcb %r12b,%r12b
adcb %r10b,%r10b
shlb $4,%r8b
orb %r8b,%r9b
shlb $4,%cl
orb %cl,%bl
addb %r9b,%r14b
adcb %bl,%r12b
adcb %dl,%r10b
xorb %r9b,%r14b
xorb %bl,%r12b
xorb %dl,%r10b
kmovb %r14d,%k1
shrb $4,%r14b
kmovb %r14d,%k2
kmovb %r12d,%k3
shrb $4,%r12b
kmovb %r12d,%k4
kmovb %r10d,%k5
vpsubq %ymm4,%ymm1,%ymm1{%k1}
vpsubq %ymm4,%ymm16,%ymm16{%k2}
vpsubq %ymm4,%ymm17,%ymm17{%k3}
vpsubq %ymm4,%ymm18,%ymm18{%k4}
vpsubq %ymm4,%ymm19,%ymm19{%k5}
vpandq %ymm4,%ymm1,%ymm1
vpandq %ymm4,%ymm16,%ymm16
vpandq %ymm4,%ymm17,%ymm17
vpandq %ymm4,%ymm18,%ymm18
vpandq %ymm4,%ymm19,%ymm19
vpbroadcastq %r15,%ymm3
vpblendd $3,%ymm3,%ymm2,%ymm2
vpsrlq $52,%ymm2,%ymm24
vpsrlq $52,%ymm20,%ymm25
vpsrlq $52,%ymm21,%ymm26
vpsrlq $52,%ymm22,%ymm27
vpsrlq $52,%ymm23,%ymm28
valignq $3,%ymm27,%ymm28,%ymm28
valignq $3,%ymm26,%ymm27,%ymm27
valignq $3,%ymm25,%ymm26,%ymm26
valignq $3,%ymm24,%ymm25,%ymm25
valignq $3,%ymm0,%ymm24,%ymm24
vpandq %ymm4,%ymm2,%ymm2
vpandq %ymm4,%ymm20,%ymm20
vpandq %ymm4,%ymm21,%ymm21
vpandq %ymm4,%ymm22,%ymm22
vpandq %ymm4,%ymm23,%ymm23
vpaddq %ymm24,%ymm2,%ymm2
vpaddq %ymm25,%ymm20,%ymm20
vpaddq %ymm26,%ymm21,%ymm21
vpaddq %ymm27,%ymm22,%ymm22
vpaddq %ymm28,%ymm23,%ymm23
vpcmpuq $1,%ymm2,%ymm4,%k1
vpcmpuq $1,%ymm20,%ymm4,%k2
vpcmpuq $1,%ymm21,%ymm4,%k3
vpcmpuq $1,%ymm22,%ymm4,%k4
vpcmpuq $1,%ymm23,%ymm4,%k5
kmovb %k1,%r14d
kmovb %k2,%r13d
kmovb %k3,%r12d
kmovb %k4,%r11d
kmovb %k5,%r10d
vpcmpuq $0,%ymm2,%ymm4,%k1
vpcmpuq $0,%ymm20,%ymm4,%k2
vpcmpuq $0,%ymm21,%ymm4,%k3
vpcmpuq $0,%ymm22,%ymm4,%k4
vpcmpuq $0,%ymm23,%ymm4,%k5
kmovb %k1,%r9d
kmovb %k2,%r8d
kmovb %k3,%ebx
kmovb %k4,%ecx
kmovb %k5,%edx
shlb $4,%r13b
orb %r13b,%r14b
shlb $4,%r11b
orb %r11b,%r12b
addb %r14b,%r14b
adcb %r12b,%r12b
adcb %r10b,%r10b
shlb $4,%r8b
orb %r8b,%r9b
shlb $4,%cl
orb %cl,%bl
addb %r9b,%r14b
adcb %bl,%r12b
adcb %dl,%r10b
xorb %r9b,%r14b
xorb %bl,%r12b
xorb %dl,%r10b
kmovb %r14d,%k1
shrb $4,%r14b
kmovb %r14d,%k2
kmovb %r12d,%k3
shrb $4,%r12b
kmovb %r12d,%k4
kmovb %r10d,%k5
vpsubq %ymm4,%ymm2,%ymm2{%k1}
vpsubq %ymm4,%ymm20,%ymm20{%k2}
vpsubq %ymm4,%ymm21,%ymm21{%k3}
vpsubq %ymm4,%ymm22,%ymm22{%k4}
vpsubq %ymm4,%ymm23,%ymm23{%k5}
vpandq %ymm4,%ymm2,%ymm2
vpandq %ymm4,%ymm20,%ymm20
vpandq %ymm4,%ymm21,%ymm21
vpandq %ymm4,%ymm22,%ymm22
vpandq %ymm4,%ymm23,%ymm23
vmovdqu64 %ymm1,(%rdi)
vmovdqu64 %ymm16,32(%rdi)
vmovdqu64 %ymm17,64(%rdi)
vmovdqu64 %ymm18,96(%rdi)
vmovdqu64 %ymm19,128(%rdi)
vmovdqu64 %ymm2,160(%rdi)
vmovdqu64 %ymm20,192(%rdi)
vmovdqu64 %ymm21,224(%rdi)
vmovdqu64 %ymm22,256(%rdi)
vmovdqu64 %ymm23,288(%rdi)
vzeroupper
movq 0(%rsp),%r15
.cfi_restore %r15
movq 8(%rsp),%r14
.cfi_restore %r14
movq 16(%rsp),%r13
.cfi_restore %r13
movq 24(%rsp),%r12
.cfi_restore %r12
movq 32(%rsp),%rbp
.cfi_restore %rbp
movq 40(%rsp),%rbx
.cfi_restore %rbx
leaq 48(%rsp),%rsp
.cfi_adjust_cfa_offset -48
.Lrsaz_amm52x20_x2_256_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size ossl_rsaz_amm52x20_x2_256, .-ossl_rsaz_amm52x20_x2_256
.text
.align 32
.globl ossl_extract_multiplier_2x20_win5
.type ossl_extract_multiplier_2x20_win5,@function
ossl_extract_multiplier_2x20_win5:
.cfi_startproc
.byte 243,15,30,250
leaq (%rcx,%rcx,4),%rax
salq $5,%rax
addq %rax,%rsi
vmovdqa64 .Lones(%rip),%ymm23
vpbroadcastq %rdx,%ymm22
leaq 10240(%rsi),%rax
vpxor %xmm4,%xmm4,%xmm4
vmovdqa64 %ymm4,%ymm3
vmovdqa64 %ymm4,%ymm2
vmovdqa64 %ymm4,%ymm1
vmovdqa64 %ymm4,%ymm0
vmovdqa64 %ymm4,%ymm21
.align 32
.Lloop:
vpcmpq $0,%ymm21,%ymm22,%k1
addq $320,%rsi
vpaddq %ymm23,%ymm21,%ymm21
vmovdqu64 -320(%rsi),%ymm16
vmovdqu64 -288(%rsi),%ymm17
vmovdqu64 -256(%rsi),%ymm18
vmovdqu64 -224(%rsi),%ymm19
vmovdqu64 -192(%rsi),%ymm20
vpblendmq %ymm16,%ymm0,%ymm0{%k1}
vpblendmq %ymm17,%ymm1,%ymm1{%k1}
vpblendmq %ymm18,%ymm2,%ymm2{%k1}
vpblendmq %ymm19,%ymm3,%ymm3{%k1}
vpblendmq %ymm20,%ymm4,%ymm4{%k1}
cmpq %rsi,%rax
jne .Lloop
vmovdqu64 %ymm0,(%rdi)
vmovdqu64 %ymm1,32(%rdi)
vmovdqu64 %ymm2,64(%rdi)
vmovdqu64 %ymm3,96(%rdi)
vmovdqu64 %ymm4,128(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size ossl_extract_multiplier_2x20_win5, .-ossl_extract_multiplier_2x20_win5
.data
.align 32
.Lones:
.quad 1,1,1,1
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,880 +0,0 @@
/* Do not modify. This file is auto-generated from vpaes-x86_64.pl. */
.text
.type _vpaes_encrypt_core,@function
.align 16
_vpaes_encrypt_core:
.cfi_startproc
movq %rdx,%r9
movq $16,%r11
movl 240(%rdx),%eax
movdqa %xmm9,%xmm1
movdqa .Lk_ipt(%rip),%xmm2
pandn %xmm0,%xmm1
movdqu (%r9),%xmm5
psrld $4,%xmm1
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa .Lk_ipt+16(%rip),%xmm0
.byte 102,15,56,0,193
pxor %xmm5,%xmm2
addq $16,%r9
pxor %xmm2,%xmm0
leaq .Lk_mc_backward(%rip),%r10
jmp .Lenc_entry
.align 16
.Lenc_loop:
movdqa %xmm13,%xmm4
movdqa %xmm12,%xmm0
.byte 102,15,56,0,226
.byte 102,15,56,0,195
pxor %xmm5,%xmm4
movdqa %xmm15,%xmm5
pxor %xmm4,%xmm0
movdqa -64(%r11,%r10,1),%xmm1
.byte 102,15,56,0,234
movdqa (%r11,%r10,1),%xmm4
movdqa %xmm14,%xmm2
.byte 102,15,56,0,211
movdqa %xmm0,%xmm3
pxor %xmm5,%xmm2
.byte 102,15,56,0,193
addq $16,%r9
pxor %xmm2,%xmm0
.byte 102,15,56,0,220
addq $16,%r11
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andq $0x30,%r11
subq $1,%rax
pxor %xmm3,%xmm0
.Lenc_entry:
movdqa %xmm9,%xmm1
movdqa %xmm11,%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
.byte 102,15,56,0,232
movdqa %xmm10,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm10,%xmm4
pxor %xmm5,%xmm3
.byte 102,15,56,0,224
movdqa %xmm10,%xmm2
pxor %xmm5,%xmm4
.byte 102,15,56,0,211
movdqa %xmm10,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%r9),%xmm5
pxor %xmm1,%xmm3
jnz .Lenc_loop
movdqa -96(%r10),%xmm4
movdqa -80(%r10),%xmm0
.byte 102,15,56,0,226
pxor %xmm5,%xmm4
.byte 102,15,56,0,195
movdqa 64(%r11,%r10,1),%xmm1
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
.cfi_startproc
movq %rdx,%r9
movl 240(%rdx),%eax
movdqa %xmm9,%xmm1
movdqa .Lk_dipt(%rip),%xmm2
pandn %xmm0,%xmm1
movq %rax,%r11
psrld $4,%xmm1
movdqu (%r9),%xmm5
shlq $4,%r11
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa .Lk_dipt+16(%rip),%xmm0
xorq $0x30,%r11
leaq .Lk_dsbd(%rip),%r10
.byte 102,15,56,0,193
andq $0x30,%r11
pxor %xmm5,%xmm2
movdqa .Lk_mc_forward+48(%rip),%xmm5
pxor %xmm2,%xmm0
addq $16,%r9
addq %r10,%r11
jmp .Ldec_entry
.align 16
.Ldec_loop:
movdqa -32(%r10),%xmm4
movdqa -16(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 0(%r10),%xmm4
pxor %xmm1,%xmm0
movdqa 16(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 32(%r10),%xmm4
pxor %xmm1,%xmm0
movdqa 48(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 64(%r10),%xmm4
pxor %xmm1,%xmm0
movdqa 80(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
addq $16,%r9
.byte 102,15,58,15,237,12
pxor %xmm1,%xmm0
subq $1,%rax
.Ldec_entry:
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
movdqa %xmm11,%xmm2
psrld $4,%xmm1
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa %xmm10,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm10,%xmm4
pxor %xmm2,%xmm3
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm10,%xmm2
.byte 102,15,56,0,211
movdqa %xmm10,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%r9),%xmm0
pxor %xmm1,%xmm3
jnz .Ldec_loop
movdqa 96(%r10),%xmm4
.byte 102,15,56,0,226
pxor %xmm0,%xmm4
movdqa 112(%r10),%xmm0
movdqa -352(%r11),%xmm2
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
.type _vpaes_schedule_core,@function
.align 16
_vpaes_schedule_core:
.cfi_startproc
call _vpaes_preheat
movdqa .Lk_rcon(%rip),%xmm8
movdqu (%rdi),%xmm0
movdqa %xmm0,%xmm3
leaq .Lk_ipt(%rip),%r11
call _vpaes_schedule_transform
movdqa %xmm0,%xmm7
leaq .Lk_sr(%rip),%r10
testq %rcx,%rcx
jnz .Lschedule_am_decrypting
movdqu %xmm0,(%rdx)
jmp .Lschedule_go
.Lschedule_am_decrypting:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
movdqu %xmm3,(%rdx)
xorq $0x30,%r8
.Lschedule_go:
cmpl $192,%esi
ja .Lschedule_256
je .Lschedule_192
.Lschedule_128:
movl $10,%esi
.Loop_schedule_128:
call _vpaes_schedule_round
decq %rsi
jz .Lschedule_mangle_last
call _vpaes_schedule_mangle
jmp .Loop_schedule_128
.align 16
.Lschedule_192:
movdqu 8(%rdi),%xmm0
call _vpaes_schedule_transform
movdqa %xmm0,%xmm6
pxor %xmm4,%xmm4
movhlps %xmm4,%xmm6
movl $4,%esi
.Loop_schedule_192:
call _vpaes_schedule_round
.byte 102,15,58,15,198,8
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
call _vpaes_schedule_mangle
call _vpaes_schedule_round
decq %rsi
jz .Lschedule_mangle_last
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
jmp .Loop_schedule_192
.align 16
.Lschedule_256:
movdqu 16(%rdi),%xmm0
call _vpaes_schedule_transform
movl $7,%esi
.Loop_schedule_256:
call _vpaes_schedule_mangle
movdqa %xmm0,%xmm6
call _vpaes_schedule_round
decq %rsi
jz .Lschedule_mangle_last
call _vpaes_schedule_mangle
pshufd $0xFF,%xmm0,%xmm0
movdqa %xmm7,%xmm5
movdqa %xmm6,%xmm7
call _vpaes_schedule_low_round
movdqa %xmm5,%xmm7
jmp .Loop_schedule_256
.align 16
.Lschedule_mangle_last:
leaq .Lk_deskew(%rip),%r11
testq %rcx,%rcx
jnz .Lschedule_mangle_last_dec
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,193
leaq .Lk_opt(%rip),%r11
addq $32,%rdx
.Lschedule_mangle_last_dec:
addq $-16,%rdx
pxor .Lk_s63(%rip),%xmm0
call _vpaes_schedule_transform
movdqu %xmm0,(%rdx)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_core,.-_vpaes_schedule_core
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
.cfi_startproc
pshufd $0x80,%xmm6,%xmm1
pshufd $0xFE,%xmm7,%xmm0
pxor %xmm1,%xmm6
pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
.type _vpaes_schedule_round,@function
.align 16
_vpaes_schedule_round:
.cfi_startproc
pxor %xmm1,%xmm1
.byte 102,65,15,58,15,200,15
.byte 102,69,15,58,15,192,15
pxor %xmm1,%xmm7
pshufd $0xFF,%xmm0,%xmm0
.byte 102,15,58,15,192,1
_vpaes_schedule_low_round:
movdqa %xmm7,%xmm1
pslldq $4,%xmm7
pxor %xmm1,%xmm7
movdqa %xmm7,%xmm1
pslldq $8,%xmm7
pxor %xmm1,%xmm7
pxor .Lk_s63(%rip),%xmm7
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
movdqa %xmm11,%xmm2
.byte 102,15,56,0,208
pxor %xmm1,%xmm0
movdqa %xmm10,%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
movdqa %xmm10,%xmm4
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm10,%xmm2
.byte 102,15,56,0,211
pxor %xmm0,%xmm2
movdqa %xmm10,%xmm3
.byte 102,15,56,0,220
pxor %xmm1,%xmm3
movdqa %xmm13,%xmm4
.byte 102,15,56,0,226
movdqa %xmm12,%xmm0
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_round,.-_vpaes_schedule_round
.type _vpaes_schedule_transform,@function
.align 16
_vpaes_schedule_transform:
.cfi_startproc
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
movdqa (%r11),%xmm2
.byte 102,15,56,0,208
movdqa 16(%r11),%xmm0
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
.type _vpaes_schedule_mangle,@function
.align 16
_vpaes_schedule_mangle:
.cfi_startproc
movdqa %xmm0,%xmm4
movdqa .Lk_mc_forward(%rip),%xmm5
testq %rcx,%rcx
jnz .Lschedule_mangle_dec
addq $16,%rdx
pxor .Lk_s63(%rip),%xmm4
.byte 102,15,56,0,229
movdqa %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
jmp .Lschedule_mangle_both
.align 16
.Lschedule_mangle_dec:
leaq .Lk_dksd(%rip),%r11
movdqa %xmm9,%xmm1
pandn %xmm4,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm4
movdqa 0(%r11),%xmm2
.byte 102,15,56,0,212
movdqa 16(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 32(%r11),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 48(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 64(%r11),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 80(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 96(%r11),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 112(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
addq $-16,%rdx
.Lschedule_mangle_both:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
addq $-16,%r8
andq $0x30,%r8
movdqu %xmm3,(%rdx)
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
.globl vpaes_set_encrypt_key
.type vpaes_set_encrypt_key,@function
.align 16
vpaes_set_encrypt_key:
.cfi_startproc
.byte 243,15,30,250
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
movl %eax,240(%rdx)
movl $0,%ecx
movl $0x30,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
.globl vpaes_set_decrypt_key
.type vpaes_set_decrypt_key,@function
.align 16
vpaes_set_decrypt_key:
.cfi_startproc
.byte 243,15,30,250
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
movl %eax,240(%rdx)
shll $4,%eax
leaq 16(%rdx,%rax,1),%rdx
movl $1,%ecx
movl %esi,%r8d
shrl $1,%r8d
andl $32,%r8d
xorl $32,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
.globl vpaes_encrypt
.type vpaes_encrypt,@function
.align 16
vpaes_encrypt:
.cfi_startproc
.byte 243,15,30,250
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_encrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_encrypt,.-vpaes_encrypt
.globl vpaes_decrypt
.type vpaes_decrypt,@function
.align 16
vpaes_decrypt:
.cfi_startproc
.byte 243,15,30,250
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_decrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_decrypt,.-vpaes_decrypt
.globl vpaes_cbc_encrypt
.type vpaes_cbc_encrypt,@function
.align 16
vpaes_cbc_encrypt:
.cfi_startproc
.byte 243,15,30,250
xchgq %rcx,%rdx
subq $16,%rcx
jc .Lcbc_abort
movdqu (%r8),%xmm6
subq %rdi,%rsi
call _vpaes_preheat
cmpl $0,%r9d
je .Lcbc_dec_loop
jmp .Lcbc_enc_loop
.align 16
.Lcbc_enc_loop:
movdqu (%rdi),%xmm0
pxor %xmm6,%xmm0
call _vpaes_encrypt_core
movdqa %xmm0,%xmm6
movdqu %xmm0,(%rsi,%rdi,1)
leaq 16(%rdi),%rdi
subq $16,%rcx
jnc .Lcbc_enc_loop
jmp .Lcbc_done
.align 16
.Lcbc_dec_loop:
movdqu (%rdi),%xmm0
movdqa %xmm0,%xmm7
call _vpaes_decrypt_core
pxor %xmm6,%xmm0
movdqa %xmm7,%xmm6
movdqu %xmm0,(%rsi,%rdi,1)
leaq 16(%rdi),%rdi
subq $16,%rcx
jnc .Lcbc_dec_loop
.Lcbc_done:
movdqu %xmm6,(%r8)
.Lcbc_abort:
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
.type _vpaes_preheat,@function
.align 16
_vpaes_preheat:
.cfi_startproc
leaq .Lk_s0F(%rip),%r10
movdqa -32(%r10),%xmm10
movdqa -16(%r10),%xmm11
movdqa 0(%r10),%xmm9
movdqa 48(%r10),%xmm13
movdqa 64(%r10),%xmm12
movdqa 80(%r10),%xmm15
movdqa 96(%r10),%xmm14
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_preheat,.-_vpaes_preheat
.type _vpaes_consts,@object
.align 64
_vpaes_consts:
.Lk_inv:
.quad 0x0E05060F0D080180, 0x040703090A0B0C02
.quad 0x01040A060F0B0780, 0x030D0E0C02050809
.Lk_s0F:
.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
.Lk_ipt:
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
.Lk_sb1:
.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
.Lk_sb2:
.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
.Lk_sbo:
.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
.Lk_mc_forward:
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
.quad 0x080B0A0904070605, 0x000302010C0F0E0D
.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
.quad 0x000302010C0F0E0D, 0x080B0A0904070605
.Lk_mc_backward:
.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
.quad 0x020100030E0D0C0F, 0x0A09080B06050407
.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
.quad 0x0A09080B06050407, 0x020100030E0D0C0F
.Lk_sr:
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
.quad 0x030E09040F0A0500, 0x0B06010C07020D08
.quad 0x0F060D040B020900, 0x070E050C030A0108
.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
.Lk_rcon:
.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
.Lk_s63:
.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
.Lk_opt:
.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
.Lk_deskew:
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
.Lk_dksd:
.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
.Lk_dksb:
.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
.Lk_dkse:
.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
.Lk_dks9:
.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
.Lk_dipt:
.quad 0x0F505B040B545F00, 0x154A411E114E451A
.quad 0x86E383E660056500, 0x12771772F491F194
.Lk_dsb9:
.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
.Lk_dsbd:
.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
.Lk_dsbb:
.quad 0xD022649296B44200, 0x602646F6B0F2D404
.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
.Lk_dsbe:
.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
.Lk_dsbo:
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.align 64
.size _vpaes_consts,.-_vpaes_consts
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
-901
View File
@@ -1,901 +0,0 @@
/* Do not modify. This file is auto-generated from wp-x86_64.pl. */
.text
.globl whirlpool_block
.type whirlpool_block,@function
.align 16
whirlpool_block:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
subq $128+40,%rsp
andq $-64,%rsp
leaq 128(%rsp),%r10
movq %rdi,0(%r10)
movq %rsi,8(%r10)
movq %rdx,16(%r10)
movq %rax,32(%r10)
.cfi_escape 0x0f,0x06,0x77,0xa0,0x01,0x06,0x23,0x08
.Lprologue:
movq %r10,%rbx
leaq .Ltable(%rip),%rbp
xorq %rcx,%rcx
xorq %rdx,%rdx
movq 0(%rdi),%r8
movq 8(%rdi),%r9
movq 16(%rdi),%r10
movq 24(%rdi),%r11
movq 32(%rdi),%r12
movq 40(%rdi),%r13
movq 48(%rdi),%r14
movq 56(%rdi),%r15
.Louterloop:
movq %r8,0(%rsp)
movq %r9,8(%rsp)
movq %r10,16(%rsp)
movq %r11,24(%rsp)
movq %r12,32(%rsp)
movq %r13,40(%rsp)
movq %r14,48(%rsp)
movq %r15,56(%rsp)
xorq 0(%rsi),%r8
xorq 8(%rsi),%r9
xorq 16(%rsi),%r10
xorq 24(%rsi),%r11
xorq 32(%rsi),%r12
xorq 40(%rsi),%r13
xorq 48(%rsi),%r14
xorq 56(%rsi),%r15
movq %r8,64+0(%rsp)
movq %r9,64+8(%rsp)
movq %r10,64+16(%rsp)
movq %r11,64+24(%rsp)
movq %r12,64+32(%rsp)
movq %r13,64+40(%rsp)
movq %r14,64+48(%rsp)
movq %r15,64+56(%rsp)
xorq %rsi,%rsi
movq %rsi,24(%rbx)
jmp .Lround
.align 16
.Lround:
movq 4096(%rbp,%rsi,8),%r8
movl 0(%rsp),%eax
movl 4(%rsp),%ebx
movzbl %al,%ecx
movzbl %ah,%edx
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r8
movq 7(%rbp,%rdi,8),%r9
movl 0+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
movq 6(%rbp,%rsi,8),%r10
movq 5(%rbp,%rdi,8),%r11
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
movq 4(%rbp,%rsi,8),%r12
movq 3(%rbp,%rdi,8),%r13
movl 0+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
movq 2(%rbp,%rsi,8),%r14
movq 1(%rbp,%rdi,8),%r15
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r9
xorq 7(%rbp,%rdi,8),%r10
movl 8+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r11
xorq 5(%rbp,%rdi,8),%r12
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r13
xorq 3(%rbp,%rdi,8),%r14
movl 8+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r15
xorq 1(%rbp,%rdi,8),%r8
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r10
xorq 7(%rbp,%rdi,8),%r11
movl 16+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r12
xorq 5(%rbp,%rdi,8),%r13
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r14
xorq 3(%rbp,%rdi,8),%r15
movl 16+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r8
xorq 1(%rbp,%rdi,8),%r9
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r11
xorq 7(%rbp,%rdi,8),%r12
movl 24+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r13
xorq 5(%rbp,%rdi,8),%r14
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r15
xorq 3(%rbp,%rdi,8),%r8
movl 24+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r9
xorq 1(%rbp,%rdi,8),%r10
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r12
xorq 7(%rbp,%rdi,8),%r13
movl 32+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r14
xorq 5(%rbp,%rdi,8),%r15
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r8
xorq 3(%rbp,%rdi,8),%r9
movl 32+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r10
xorq 1(%rbp,%rdi,8),%r11
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r13
xorq 7(%rbp,%rdi,8),%r14
movl 40+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r15
xorq 5(%rbp,%rdi,8),%r8
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r9
xorq 3(%rbp,%rdi,8),%r10
movl 40+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r11
xorq 1(%rbp,%rdi,8),%r12
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r14
xorq 7(%rbp,%rdi,8),%r15
movl 48+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r8
xorq 5(%rbp,%rdi,8),%r9
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r10
xorq 3(%rbp,%rdi,8),%r11
movl 48+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r12
xorq 1(%rbp,%rdi,8),%r13
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r15
xorq 7(%rbp,%rdi,8),%r8
movl 56+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r9
xorq 5(%rbp,%rdi,8),%r10
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r11
xorq 3(%rbp,%rdi,8),%r12
movl 56+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r13
xorq 1(%rbp,%rdi,8),%r14
movq %r8,0(%rsp)
movq %r9,8(%rsp)
movq %r10,16(%rsp)
movq %r11,24(%rsp)
movq %r12,32(%rsp)
movq %r13,40(%rsp)
movq %r14,48(%rsp)
movq %r15,56(%rsp)
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r8
xorq 7(%rbp,%rdi,8),%r9
movl 64+0+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r10
xorq 5(%rbp,%rdi,8),%r11
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r12
xorq 3(%rbp,%rdi,8),%r13
movl 64+0+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r14
xorq 1(%rbp,%rdi,8),%r15
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r9
xorq 7(%rbp,%rdi,8),%r10
movl 64+8+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r11
xorq 5(%rbp,%rdi,8),%r12
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r13
xorq 3(%rbp,%rdi,8),%r14
movl 64+8+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r15
xorq 1(%rbp,%rdi,8),%r8
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r10
xorq 7(%rbp,%rdi,8),%r11
movl 64+16+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r12
xorq 5(%rbp,%rdi,8),%r13
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r14
xorq 3(%rbp,%rdi,8),%r15
movl 64+16+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r8
xorq 1(%rbp,%rdi,8),%r9
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r11
xorq 7(%rbp,%rdi,8),%r12
movl 64+24+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r13
xorq 5(%rbp,%rdi,8),%r14
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r15
xorq 3(%rbp,%rdi,8),%r8
movl 64+24+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r9
xorq 1(%rbp,%rdi,8),%r10
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r12
xorq 7(%rbp,%rdi,8),%r13
movl 64+32+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r14
xorq 5(%rbp,%rdi,8),%r15
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r8
xorq 3(%rbp,%rdi,8),%r9
movl 64+32+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r10
xorq 1(%rbp,%rdi,8),%r11
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r13
xorq 7(%rbp,%rdi,8),%r14
movl 64+40+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r15
xorq 5(%rbp,%rdi,8),%r8
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r9
xorq 3(%rbp,%rdi,8),%r10
movl 64+40+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r11
xorq 1(%rbp,%rdi,8),%r12
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r14
xorq 7(%rbp,%rdi,8),%r15
movl 64+48+8(%rsp),%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r8
xorq 5(%rbp,%rdi,8),%r9
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r10
xorq 3(%rbp,%rdi,8),%r11
movl 64+48+8+4(%rsp),%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r12
xorq 1(%rbp,%rdi,8),%r13
shrl $16,%eax
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 0(%rbp,%rsi,8),%r15
xorq 7(%rbp,%rdi,8),%r8
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 6(%rbp,%rsi,8),%r9
xorq 5(%rbp,%rdi,8),%r10
shrl $16,%ebx
leaq (%rcx,%rcx,1),%rsi
movzbl %bl,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %bh,%edx
xorq 4(%rbp,%rsi,8),%r11
xorq 3(%rbp,%rdi,8),%r12
leaq (%rcx,%rcx,1),%rsi
movzbl %al,%ecx
leaq (%rdx,%rdx,1),%rdi
movzbl %ah,%edx
xorq 2(%rbp,%rsi,8),%r13
xorq 1(%rbp,%rdi,8),%r14
leaq 128(%rsp),%rbx
movq 24(%rbx),%rsi
addq $1,%rsi
cmpq $10,%rsi
je .Lroundsdone
movq %rsi,24(%rbx)
movq %r8,64+0(%rsp)
movq %r9,64+8(%rsp)
movq %r10,64+16(%rsp)
movq %r11,64+24(%rsp)
movq %r12,64+32(%rsp)
movq %r13,64+40(%rsp)
movq %r14,64+48(%rsp)
movq %r15,64+56(%rsp)
jmp .Lround
.align 16
.Lroundsdone:
movq 0(%rbx),%rdi
movq 8(%rbx),%rsi
movq 16(%rbx),%rax
xorq 0(%rsi),%r8
xorq 8(%rsi),%r9
xorq 16(%rsi),%r10
xorq 24(%rsi),%r11
xorq 32(%rsi),%r12
xorq 40(%rsi),%r13
xorq 48(%rsi),%r14
xorq 56(%rsi),%r15
xorq 0(%rdi),%r8
xorq 8(%rdi),%r9
xorq 16(%rdi),%r10
xorq 24(%rdi),%r11
xorq 32(%rdi),%r12
xorq 40(%rdi),%r13
xorq 48(%rdi),%r14
xorq 56(%rdi),%r15
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r12,32(%rdi)
movq %r13,40(%rdi)
movq %r14,48(%rdi)
movq %r15,56(%rdi)
leaq 64(%rsi),%rsi
subq $1,%rax
jz .Lalldone
movq %rsi,8(%rbx)
movq %rax,16(%rbx)
jmp .Louterloop
.Lalldone:
movq 32(%rbx),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size whirlpool_block,.-whirlpool_block
.align 64
.type .Ltable,@object
.Ltable:
.byte 24,24,96,24,192,120,48,216,24,24,96,24,192,120,48,216
.byte 35,35,140,35,5,175,70,38,35,35,140,35,5,175,70,38
.byte 198,198,63,198,126,249,145,184,198,198,63,198,126,249,145,184
.byte 232,232,135,232,19,111,205,251,232,232,135,232,19,111,205,251
.byte 135,135,38,135,76,161,19,203,135,135,38,135,76,161,19,203
.byte 184,184,218,184,169,98,109,17,184,184,218,184,169,98,109,17
.byte 1,1,4,1,8,5,2,9,1,1,4,1,8,5,2,9
.byte 79,79,33,79,66,110,158,13,79,79,33,79,66,110,158,13
.byte 54,54,216,54,173,238,108,155,54,54,216,54,173,238,108,155
.byte 166,166,162,166,89,4,81,255,166,166,162,166,89,4,81,255
.byte 210,210,111,210,222,189,185,12,210,210,111,210,222,189,185,12
.byte 245,245,243,245,251,6,247,14,245,245,243,245,251,6,247,14
.byte 121,121,249,121,239,128,242,150,121,121,249,121,239,128,242,150
.byte 111,111,161,111,95,206,222,48,111,111,161,111,95,206,222,48
.byte 145,145,126,145,252,239,63,109,145,145,126,145,252,239,63,109
.byte 82,82,85,82,170,7,164,248,82,82,85,82,170,7,164,248
.byte 96,96,157,96,39,253,192,71,96,96,157,96,39,253,192,71
.byte 188,188,202,188,137,118,101,53,188,188,202,188,137,118,101,53
.byte 155,155,86,155,172,205,43,55,155,155,86,155,172,205,43,55
.byte 142,142,2,142,4,140,1,138,142,142,2,142,4,140,1,138
.byte 163,163,182,163,113,21,91,210,163,163,182,163,113,21,91,210
.byte 12,12,48,12,96,60,24,108,12,12,48,12,96,60,24,108
.byte 123,123,241,123,255,138,246,132,123,123,241,123,255,138,246,132
.byte 53,53,212,53,181,225,106,128,53,53,212,53,181,225,106,128
.byte 29,29,116,29,232,105,58,245,29,29,116,29,232,105,58,245
.byte 224,224,167,224,83,71,221,179,224,224,167,224,83,71,221,179
.byte 215,215,123,215,246,172,179,33,215,215,123,215,246,172,179,33
.byte 194,194,47,194,94,237,153,156,194,194,47,194,94,237,153,156
.byte 46,46,184,46,109,150,92,67,46,46,184,46,109,150,92,67
.byte 75,75,49,75,98,122,150,41,75,75,49,75,98,122,150,41
.byte 254,254,223,254,163,33,225,93,254,254,223,254,163,33,225,93
.byte 87,87,65,87,130,22,174,213,87,87,65,87,130,22,174,213
.byte 21,21,84,21,168,65,42,189,21,21,84,21,168,65,42,189
.byte 119,119,193,119,159,182,238,232,119,119,193,119,159,182,238,232
.byte 55,55,220,55,165,235,110,146,55,55,220,55,165,235,110,146
.byte 229,229,179,229,123,86,215,158,229,229,179,229,123,86,215,158
.byte 159,159,70,159,140,217,35,19,159,159,70,159,140,217,35,19
.byte 240,240,231,240,211,23,253,35,240,240,231,240,211,23,253,35
.byte 74,74,53,74,106,127,148,32,74,74,53,74,106,127,148,32
.byte 218,218,79,218,158,149,169,68,218,218,79,218,158,149,169,68
.byte 88,88,125,88,250,37,176,162,88,88,125,88,250,37,176,162
.byte 201,201,3,201,6,202,143,207,201,201,3,201,6,202,143,207
.byte 41,41,164,41,85,141,82,124,41,41,164,41,85,141,82,124
.byte 10,10,40,10,80,34,20,90,10,10,40,10,80,34,20,90
.byte 177,177,254,177,225,79,127,80,177,177,254,177,225,79,127,80
.byte 160,160,186,160,105,26,93,201,160,160,186,160,105,26,93,201
.byte 107,107,177,107,127,218,214,20,107,107,177,107,127,218,214,20
.byte 133,133,46,133,92,171,23,217,133,133,46,133,92,171,23,217
.byte 189,189,206,189,129,115,103,60,189,189,206,189,129,115,103,60
.byte 93,93,105,93,210,52,186,143,93,93,105,93,210,52,186,143
.byte 16,16,64,16,128,80,32,144,16,16,64,16,128,80,32,144
.byte 244,244,247,244,243,3,245,7,244,244,247,244,243,3,245,7
.byte 203,203,11,203,22,192,139,221,203,203,11,203,22,192,139,221
.byte 62,62,248,62,237,198,124,211,62,62,248,62,237,198,124,211
.byte 5,5,20,5,40,17,10,45,5,5,20,5,40,17,10,45
.byte 103,103,129,103,31,230,206,120,103,103,129,103,31,230,206,120
.byte 228,228,183,228,115,83,213,151,228,228,183,228,115,83,213,151
.byte 39,39,156,39,37,187,78,2,39,39,156,39,37,187,78,2
.byte 65,65,25,65,50,88,130,115,65,65,25,65,50,88,130,115
.byte 139,139,22,139,44,157,11,167,139,139,22,139,44,157,11,167
.byte 167,167,166,167,81,1,83,246,167,167,166,167,81,1,83,246
.byte 125,125,233,125,207,148,250,178,125,125,233,125,207,148,250,178
.byte 149,149,110,149,220,251,55,73,149,149,110,149,220,251,55,73
.byte 216,216,71,216,142,159,173,86,216,216,71,216,142,159,173,86
.byte 251,251,203,251,139,48,235,112,251,251,203,251,139,48,235,112
.byte 238,238,159,238,35,113,193,205,238,238,159,238,35,113,193,205
.byte 124,124,237,124,199,145,248,187,124,124,237,124,199,145,248,187
.byte 102,102,133,102,23,227,204,113,102,102,133,102,23,227,204,113
.byte 221,221,83,221,166,142,167,123,221,221,83,221,166,142,167,123
.byte 23,23,92,23,184,75,46,175,23,23,92,23,184,75,46,175
.byte 71,71,1,71,2,70,142,69,71,71,1,71,2,70,142,69
.byte 158,158,66,158,132,220,33,26,158,158,66,158,132,220,33,26
.byte 202,202,15,202,30,197,137,212,202,202,15,202,30,197,137,212
.byte 45,45,180,45,117,153,90,88,45,45,180,45,117,153,90,88
.byte 191,191,198,191,145,121,99,46,191,191,198,191,145,121,99,46
.byte 7,7,28,7,56,27,14,63,7,7,28,7,56,27,14,63
.byte 173,173,142,173,1,35,71,172,173,173,142,173,1,35,71,172
.byte 90,90,117,90,234,47,180,176,90,90,117,90,234,47,180,176
.byte 131,131,54,131,108,181,27,239,131,131,54,131,108,181,27,239
.byte 51,51,204,51,133,255,102,182,51,51,204,51,133,255,102,182
.byte 99,99,145,99,63,242,198,92,99,99,145,99,63,242,198,92
.byte 2,2,8,2,16,10,4,18,2,2,8,2,16,10,4,18
.byte 170,170,146,170,57,56,73,147,170,170,146,170,57,56,73,147
.byte 113,113,217,113,175,168,226,222,113,113,217,113,175,168,226,222
.byte 200,200,7,200,14,207,141,198,200,200,7,200,14,207,141,198
.byte 25,25,100,25,200,125,50,209,25,25,100,25,200,125,50,209
.byte 73,73,57,73,114,112,146,59,73,73,57,73,114,112,146,59
.byte 217,217,67,217,134,154,175,95,217,217,67,217,134,154,175,95
.byte 242,242,239,242,195,29,249,49,242,242,239,242,195,29,249,49
.byte 227,227,171,227,75,72,219,168,227,227,171,227,75,72,219,168
.byte 91,91,113,91,226,42,182,185,91,91,113,91,226,42,182,185
.byte 136,136,26,136,52,146,13,188,136,136,26,136,52,146,13,188
.byte 154,154,82,154,164,200,41,62,154,154,82,154,164,200,41,62
.byte 38,38,152,38,45,190,76,11,38,38,152,38,45,190,76,11
.byte 50,50,200,50,141,250,100,191,50,50,200,50,141,250,100,191
.byte 176,176,250,176,233,74,125,89,176,176,250,176,233,74,125,89
.byte 233,233,131,233,27,106,207,242,233,233,131,233,27,106,207,242
.byte 15,15,60,15,120,51,30,119,15,15,60,15,120,51,30,119
.byte 213,213,115,213,230,166,183,51,213,213,115,213,230,166,183,51
.byte 128,128,58,128,116,186,29,244,128,128,58,128,116,186,29,244
.byte 190,190,194,190,153,124,97,39,190,190,194,190,153,124,97,39
.byte 205,205,19,205,38,222,135,235,205,205,19,205,38,222,135,235
.byte 52,52,208,52,189,228,104,137,52,52,208,52,189,228,104,137
.byte 72,72,61,72,122,117,144,50,72,72,61,72,122,117,144,50
.byte 255,255,219,255,171,36,227,84,255,255,219,255,171,36,227,84
.byte 122,122,245,122,247,143,244,141,122,122,245,122,247,143,244,141
.byte 144,144,122,144,244,234,61,100,144,144,122,144,244,234,61,100
.byte 95,95,97,95,194,62,190,157,95,95,97,95,194,62,190,157
.byte 32,32,128,32,29,160,64,61,32,32,128,32,29,160,64,61
.byte 104,104,189,104,103,213,208,15,104,104,189,104,103,213,208,15
.byte 26,26,104,26,208,114,52,202,26,26,104,26,208,114,52,202
.byte 174,174,130,174,25,44,65,183,174,174,130,174,25,44,65,183
.byte 180,180,234,180,201,94,117,125,180,180,234,180,201,94,117,125
.byte 84,84,77,84,154,25,168,206,84,84,77,84,154,25,168,206
.byte 147,147,118,147,236,229,59,127,147,147,118,147,236,229,59,127
.byte 34,34,136,34,13,170,68,47,34,34,136,34,13,170,68,47
.byte 100,100,141,100,7,233,200,99,100,100,141,100,7,233,200,99
.byte 241,241,227,241,219,18,255,42,241,241,227,241,219,18,255,42
.byte 115,115,209,115,191,162,230,204,115,115,209,115,191,162,230,204
.byte 18,18,72,18,144,90,36,130,18,18,72,18,144,90,36,130
.byte 64,64,29,64,58,93,128,122,64,64,29,64,58,93,128,122
.byte 8,8,32,8,64,40,16,72,8,8,32,8,64,40,16,72
.byte 195,195,43,195,86,232,155,149,195,195,43,195,86,232,155,149
.byte 236,236,151,236,51,123,197,223,236,236,151,236,51,123,197,223
.byte 219,219,75,219,150,144,171,77,219,219,75,219,150,144,171,77
.byte 161,161,190,161,97,31,95,192,161,161,190,161,97,31,95,192
.byte 141,141,14,141,28,131,7,145,141,141,14,141,28,131,7,145
.byte 61,61,244,61,245,201,122,200,61,61,244,61,245,201,122,200
.byte 151,151,102,151,204,241,51,91,151,151,102,151,204,241,51,91
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 207,207,27,207,54,212,131,249,207,207,27,207,54,212,131,249
.byte 43,43,172,43,69,135,86,110,43,43,172,43,69,135,86,110
.byte 118,118,197,118,151,179,236,225,118,118,197,118,151,179,236,225
.byte 130,130,50,130,100,176,25,230,130,130,50,130,100,176,25,230
.byte 214,214,127,214,254,169,177,40,214,214,127,214,254,169,177,40
.byte 27,27,108,27,216,119,54,195,27,27,108,27,216,119,54,195
.byte 181,181,238,181,193,91,119,116,181,181,238,181,193,91,119,116
.byte 175,175,134,175,17,41,67,190,175,175,134,175,17,41,67,190
.byte 106,106,181,106,119,223,212,29,106,106,181,106,119,223,212,29
.byte 80,80,93,80,186,13,160,234,80,80,93,80,186,13,160,234
.byte 69,69,9,69,18,76,138,87,69,69,9,69,18,76,138,87
.byte 243,243,235,243,203,24,251,56,243,243,235,243,203,24,251,56
.byte 48,48,192,48,157,240,96,173,48,48,192,48,157,240,96,173
.byte 239,239,155,239,43,116,195,196,239,239,155,239,43,116,195,196
.byte 63,63,252,63,229,195,126,218,63,63,252,63,229,195,126,218
.byte 85,85,73,85,146,28,170,199,85,85,73,85,146,28,170,199
.byte 162,162,178,162,121,16,89,219,162,162,178,162,121,16,89,219
.byte 234,234,143,234,3,101,201,233,234,234,143,234,3,101,201,233
.byte 101,101,137,101,15,236,202,106,101,101,137,101,15,236,202,106
.byte 186,186,210,186,185,104,105,3,186,186,210,186,185,104,105,3
.byte 47,47,188,47,101,147,94,74,47,47,188,47,101,147,94,74
.byte 192,192,39,192,78,231,157,142,192,192,39,192,78,231,157,142
.byte 222,222,95,222,190,129,161,96,222,222,95,222,190,129,161,96
.byte 28,28,112,28,224,108,56,252,28,28,112,28,224,108,56,252
.byte 253,253,211,253,187,46,231,70,253,253,211,253,187,46,231,70
.byte 77,77,41,77,82,100,154,31,77,77,41,77,82,100,154,31
.byte 146,146,114,146,228,224,57,118,146,146,114,146,228,224,57,118
.byte 117,117,201,117,143,188,234,250,117,117,201,117,143,188,234,250
.byte 6,6,24,6,48,30,12,54,6,6,24,6,48,30,12,54
.byte 138,138,18,138,36,152,9,174,138,138,18,138,36,152,9,174
.byte 178,178,242,178,249,64,121,75,178,178,242,178,249,64,121,75
.byte 230,230,191,230,99,89,209,133,230,230,191,230,99,89,209,133
.byte 14,14,56,14,112,54,28,126,14,14,56,14,112,54,28,126
.byte 31,31,124,31,248,99,62,231,31,31,124,31,248,99,62,231
.byte 98,98,149,98,55,247,196,85,98,98,149,98,55,247,196,85
.byte 212,212,119,212,238,163,181,58,212,212,119,212,238,163,181,58
.byte 168,168,154,168,41,50,77,129,168,168,154,168,41,50,77,129
.byte 150,150,98,150,196,244,49,82,150,150,98,150,196,244,49,82
.byte 249,249,195,249,155,58,239,98,249,249,195,249,155,58,239,98
.byte 197,197,51,197,102,246,151,163,197,197,51,197,102,246,151,163
.byte 37,37,148,37,53,177,74,16,37,37,148,37,53,177,74,16
.byte 89,89,121,89,242,32,178,171,89,89,121,89,242,32,178,171
.byte 132,132,42,132,84,174,21,208,132,132,42,132,84,174,21,208
.byte 114,114,213,114,183,167,228,197,114,114,213,114,183,167,228,197
.byte 57,57,228,57,213,221,114,236,57,57,228,57,213,221,114,236
.byte 76,76,45,76,90,97,152,22,76,76,45,76,90,97,152,22
.byte 94,94,101,94,202,59,188,148,94,94,101,94,202,59,188,148
.byte 120,120,253,120,231,133,240,159,120,120,253,120,231,133,240,159
.byte 56,56,224,56,221,216,112,229,56,56,224,56,221,216,112,229
.byte 140,140,10,140,20,134,5,152,140,140,10,140,20,134,5,152
.byte 209,209,99,209,198,178,191,23,209,209,99,209,198,178,191,23
.byte 165,165,174,165,65,11,87,228,165,165,174,165,65,11,87,228
.byte 226,226,175,226,67,77,217,161,226,226,175,226,67,77,217,161
.byte 97,97,153,97,47,248,194,78,97,97,153,97,47,248,194,78
.byte 179,179,246,179,241,69,123,66,179,179,246,179,241,69,123,66
.byte 33,33,132,33,21,165,66,52,33,33,132,33,21,165,66,52
.byte 156,156,74,156,148,214,37,8,156,156,74,156,148,214,37,8
.byte 30,30,120,30,240,102,60,238,30,30,120,30,240,102,60,238
.byte 67,67,17,67,34,82,134,97,67,67,17,67,34,82,134,97
.byte 199,199,59,199,118,252,147,177,199,199,59,199,118,252,147,177
.byte 252,252,215,252,179,43,229,79,252,252,215,252,179,43,229,79
.byte 4,4,16,4,32,20,8,36,4,4,16,4,32,20,8,36
.byte 81,81,89,81,178,8,162,227,81,81,89,81,178,8,162,227
.byte 153,153,94,153,188,199,47,37,153,153,94,153,188,199,47,37
.byte 109,109,169,109,79,196,218,34,109,109,169,109,79,196,218,34
.byte 13,13,52,13,104,57,26,101,13,13,52,13,104,57,26,101
.byte 250,250,207,250,131,53,233,121,250,250,207,250,131,53,233,121
.byte 223,223,91,223,182,132,163,105,223,223,91,223,182,132,163,105
.byte 126,126,229,126,215,155,252,169,126,126,229,126,215,155,252,169
.byte 36,36,144,36,61,180,72,25,36,36,144,36,61,180,72,25
.byte 59,59,236,59,197,215,118,254,59,59,236,59,197,215,118,254
.byte 171,171,150,171,49,61,75,154,171,171,150,171,49,61,75,154
.byte 206,206,31,206,62,209,129,240,206,206,31,206,62,209,129,240
.byte 17,17,68,17,136,85,34,153,17,17,68,17,136,85,34,153
.byte 143,143,6,143,12,137,3,131,143,143,6,143,12,137,3,131
.byte 78,78,37,78,74,107,156,4,78,78,37,78,74,107,156,4
.byte 183,183,230,183,209,81,115,102,183,183,230,183,209,81,115,102
.byte 235,235,139,235,11,96,203,224,235,235,139,235,11,96,203,224
.byte 60,60,240,60,253,204,120,193,60,60,240,60,253,204,120,193
.byte 129,129,62,129,124,191,31,253,129,129,62,129,124,191,31,253
.byte 148,148,106,148,212,254,53,64,148,148,106,148,212,254,53,64
.byte 247,247,251,247,235,12,243,28,247,247,251,247,235,12,243,28
.byte 185,185,222,185,161,103,111,24,185,185,222,185,161,103,111,24
.byte 19,19,76,19,152,95,38,139,19,19,76,19,152,95,38,139
.byte 44,44,176,44,125,156,88,81,44,44,176,44,125,156,88,81
.byte 211,211,107,211,214,184,187,5,211,211,107,211,214,184,187,5
.byte 231,231,187,231,107,92,211,140,231,231,187,231,107,92,211,140
.byte 110,110,165,110,87,203,220,57,110,110,165,110,87,203,220,57
.byte 196,196,55,196,110,243,149,170,196,196,55,196,110,243,149,170
.byte 3,3,12,3,24,15,6,27,3,3,12,3,24,15,6,27
.byte 86,86,69,86,138,19,172,220,86,86,69,86,138,19,172,220
.byte 68,68,13,68,26,73,136,94,68,68,13,68,26,73,136,94
.byte 127,127,225,127,223,158,254,160,127,127,225,127,223,158,254,160
.byte 169,169,158,169,33,55,79,136,169,169,158,169,33,55,79,136
.byte 42,42,168,42,77,130,84,103,42,42,168,42,77,130,84,103
.byte 187,187,214,187,177,109,107,10,187,187,214,187,177,109,107,10
.byte 193,193,35,193,70,226,159,135,193,193,35,193,70,226,159,135
.byte 83,83,81,83,162,2,166,241,83,83,81,83,162,2,166,241
.byte 220,220,87,220,174,139,165,114,220,220,87,220,174,139,165,114
.byte 11,11,44,11,88,39,22,83,11,11,44,11,88,39,22,83
.byte 157,157,78,157,156,211,39,1,157,157,78,157,156,211,39,1
.byte 108,108,173,108,71,193,216,43,108,108,173,108,71,193,216,43
.byte 49,49,196,49,149,245,98,164,49,49,196,49,149,245,98,164
.byte 116,116,205,116,135,185,232,243,116,116,205,116,135,185,232,243
.byte 246,246,255,246,227,9,241,21,246,246,255,246,227,9,241,21
.byte 70,70,5,70,10,67,140,76,70,70,5,70,10,67,140,76
.byte 172,172,138,172,9,38,69,165,172,172,138,172,9,38,69,165
.byte 137,137,30,137,60,151,15,181,137,137,30,137,60,151,15,181
.byte 20,20,80,20,160,68,40,180,20,20,80,20,160,68,40,180
.byte 225,225,163,225,91,66,223,186,225,225,163,225,91,66,223,186
.byte 22,22,88,22,176,78,44,166,22,22,88,22,176,78,44,166
.byte 58,58,232,58,205,210,116,247,58,58,232,58,205,210,116,247
.byte 105,105,185,105,111,208,210,6,105,105,185,105,111,208,210,6
.byte 9,9,36,9,72,45,18,65,9,9,36,9,72,45,18,65
.byte 112,112,221,112,167,173,224,215,112,112,221,112,167,173,224,215
.byte 182,182,226,182,217,84,113,111,182,182,226,182,217,84,113,111
.byte 208,208,103,208,206,183,189,30,208,208,103,208,206,183,189,30
.byte 237,237,147,237,59,126,199,214,237,237,147,237,59,126,199,214
.byte 204,204,23,204,46,219,133,226,204,204,23,204,46,219,133,226
.byte 66,66,21,66,42,87,132,104,66,66,21,66,42,87,132,104
.byte 152,152,90,152,180,194,45,44,152,152,90,152,180,194,45,44
.byte 164,164,170,164,73,14,85,237,164,164,170,164,73,14,85,237
.byte 40,40,160,40,93,136,80,117,40,40,160,40,93,136,80,117
.byte 92,92,109,92,218,49,184,134,92,92,109,92,218,49,184,134
.byte 248,248,199,248,147,63,237,107,248,248,199,248,147,63,237,107
.byte 134,134,34,134,68,164,17,194,134,134,34,134,68,164,17,194
.byte 24,35,198,232,135,184,1,79
.byte 54,166,210,245,121,111,145,82
.byte 96,188,155,142,163,12,123,53
.byte 29,224,215,194,46,75,254,87
.byte 21,119,55,229,159,240,74,218
.byte 88,201,41,10,177,160,107,133
.byte 189,93,16,244,203,62,5,103
.byte 228,39,65,139,167,125,149,216
.byte 251,238,124,102,221,23,71,158
.byte 202,45,191,7,173,90,131,51
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
@@ -1,824 +0,0 @@
/* Do not modify. This file is auto-generated from x25519-x86_64.pl. */
.text
.globl x25519_fe51_mul
.type x25519_fe51_mul,@function
.align 32
x25519_fe51_mul:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -40(%rsp),%rsp
.cfi_adjust_cfa_offset 40
.Lfe51_mul_body:
movq 0(%rsi),%rax
movq 0(%rdx),%r11
movq 8(%rdx),%r12
movq 16(%rdx),%r13
movq 24(%rdx),%rbp
movq 32(%rdx),%r14
movq %rdi,32(%rsp)
movq %rax,%rdi
mulq %r11
movq %r11,0(%rsp)
movq %rax,%rbx
movq %rdi,%rax
movq %rdx,%rcx
mulq %r12
movq %r12,8(%rsp)
movq %rax,%r8
movq %rdi,%rax
leaq (%r14,%r14,8),%r15
movq %rdx,%r9
mulq %r13
movq %r13,16(%rsp)
movq %rax,%r10
movq %rdi,%rax
leaq (%r14,%r15,2),%rdi
movq %rdx,%r11
mulq %rbp
movq %rax,%r12
movq 0(%rsi),%rax
movq %rdx,%r13
mulq %r14
movq %rax,%r14
movq 8(%rsi),%rax
movq %rdx,%r15
mulq %rdi
addq %rax,%rbx
movq 16(%rsi),%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r8
movq 24(%rsi),%rax
adcq %rdx,%r9
mulq %rdi
addq %rax,%r10
movq 32(%rsi),%rax
adcq %rdx,%r11
mulq %rdi
imulq $19,%rbp,%rdi
addq %rax,%r12
movq 8(%rsi),%rax
adcq %rdx,%r13
mulq %rbp
movq 16(%rsp),%rbp
addq %rax,%r14
movq 16(%rsi),%rax
adcq %rdx,%r15
mulq %rdi
addq %rax,%rbx
movq 24(%rsi),%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r8
movq 32(%rsi),%rax
adcq %rdx,%r9
mulq %rdi
imulq $19,%rbp,%rdi
addq %rax,%r10
movq 8(%rsi),%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r12
movq 16(%rsi),%rax
adcq %rdx,%r13
mulq %rbp
movq 8(%rsp),%rbp
addq %rax,%r14
movq 24(%rsi),%rax
adcq %rdx,%r15
mulq %rdi
addq %rax,%rbx
movq 32(%rsi),%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r8
movq 8(%rsi),%rax
adcq %rdx,%r9
mulq %rbp
imulq $19,%rbp,%rdi
addq %rax,%r10
movq 16(%rsi),%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r12
movq 24(%rsi),%rax
adcq %rdx,%r13
mulq %rbp
movq 0(%rsp),%rbp
addq %rax,%r14
movq 32(%rsi),%rax
adcq %rdx,%r15
mulq %rdi
addq %rax,%rbx
movq 8(%rsi),%rax
adcq %rdx,%rcx
mulq %rbp
addq %rax,%r8
movq 16(%rsi),%rax
adcq %rdx,%r9
mulq %rbp
addq %rax,%r10
movq 24(%rsi),%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r12
movq 32(%rsi),%rax
adcq %rdx,%r13
mulq %rbp
addq %rax,%r14
adcq %rdx,%r15
movq 32(%rsp),%rdi
jmp .Lreduce51
.Lfe51_mul_epilogue:
.cfi_endproc
.size x25519_fe51_mul,.-x25519_fe51_mul
.globl x25519_fe51_sqr
.type x25519_fe51_sqr,@function
.align 32
x25519_fe51_sqr:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -40(%rsp),%rsp
.cfi_adjust_cfa_offset 40
.Lfe51_sqr_body:
movq 0(%rsi),%rax
movq 16(%rsi),%r15
movq 32(%rsi),%rbp
movq %rdi,32(%rsp)
leaq (%rax,%rax,1),%r14
mulq %rax
movq %rax,%rbx
movq 8(%rsi),%rax
movq %rdx,%rcx
mulq %r14
movq %rax,%r8
movq %r15,%rax
movq %r15,0(%rsp)
movq %rdx,%r9
mulq %r14
movq %rax,%r10
movq 24(%rsi),%rax
movq %rdx,%r11
imulq $19,%rbp,%rdi
mulq %r14
movq %rax,%r12
movq %rbp,%rax
movq %rdx,%r13
mulq %r14
movq %rax,%r14
movq %rbp,%rax
movq %rdx,%r15
mulq %rdi
addq %rax,%r12
movq 8(%rsi),%rax
adcq %rdx,%r13
movq 24(%rsi),%rsi
leaq (%rax,%rax,1),%rbp
mulq %rax
addq %rax,%r10
movq 0(%rsp),%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r12
movq %rbp,%rax
adcq %rdx,%r13
mulq %rsi
addq %rax,%r14
movq %rbp,%rax
adcq %rdx,%r15
imulq $19,%rsi,%rbp
mulq %rdi
addq %rax,%rbx
leaq (%rsi,%rsi,1),%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r10
movq %rsi,%rax
adcq %rdx,%r11
mulq %rbp
addq %rax,%r8
movq 0(%rsp),%rax
adcq %rdx,%r9
leaq (%rax,%rax,1),%rsi
mulq %rax
addq %rax,%r14
movq %rbp,%rax
adcq %rdx,%r15
mulq %rsi
addq %rax,%rbx
movq %rsi,%rax
adcq %rdx,%rcx
mulq %rdi
addq %rax,%r8
adcq %rdx,%r9
movq 32(%rsp),%rdi
jmp .Lreduce51
.align 32
.Lreduce51:
movq $0x7ffffffffffff,%rbp
movq %r10,%rdx
shrq $51,%r10
shlq $13,%r11
andq %rbp,%rdx
orq %r10,%r11
addq %r11,%r12
adcq $0,%r13
movq %rbx,%rax
shrq $51,%rbx
shlq $13,%rcx
andq %rbp,%rax
orq %rbx,%rcx
addq %rcx,%r8
adcq $0,%r9
movq %r12,%rbx
shrq $51,%r12
shlq $13,%r13
andq %rbp,%rbx
orq %r12,%r13
addq %r13,%r14
adcq $0,%r15
movq %r8,%rcx
shrq $51,%r8
shlq $13,%r9
andq %rbp,%rcx
orq %r8,%r9
addq %r9,%rdx
movq %r14,%r10
shrq $51,%r14
shlq $13,%r15
andq %rbp,%r10
orq %r14,%r15
leaq (%r15,%r15,8),%r14
leaq (%r15,%r14,2),%r15
addq %r15,%rax
movq %rdx,%r8
andq %rbp,%rdx
shrq $51,%r8
addq %r8,%rbx
movq %rax,%r9
andq %rbp,%rax
shrq $51,%r9
addq %r9,%rcx
movq %rax,0(%rdi)
movq %rcx,8(%rdi)
movq %rdx,16(%rdi)
movq %rbx,24(%rdi)
movq %r10,32(%rdi)
movq 40(%rsp),%r15
.cfi_restore %r15
movq 48(%rsp),%r14
.cfi_restore %r14
movq 56(%rsp),%r13
.cfi_restore %r13
movq 64(%rsp),%r12
.cfi_restore %r12
movq 72(%rsp),%rbx
.cfi_restore %rbx
movq 80(%rsp),%rbp
.cfi_restore %rbp
leaq 88(%rsp),%rsp
.cfi_adjust_cfa_offset 88
.Lfe51_sqr_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size x25519_fe51_sqr,.-x25519_fe51_sqr
.globl x25519_fe51_mul121666
.type x25519_fe51_mul121666,@function
.align 32
x25519_fe51_mul121666:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
leaq -40(%rsp),%rsp
.cfi_adjust_cfa_offset 40
.Lfe51_mul121666_body:
movl $121666,%eax
mulq 0(%rsi)
movq %rax,%rbx
movl $121666,%eax
movq %rdx,%rcx
mulq 8(%rsi)
movq %rax,%r8
movl $121666,%eax
movq %rdx,%r9
mulq 16(%rsi)
movq %rax,%r10
movl $121666,%eax
movq %rdx,%r11
mulq 24(%rsi)
movq %rax,%r12
movl $121666,%eax
movq %rdx,%r13
mulq 32(%rsi)
movq %rax,%r14
movq %rdx,%r15
jmp .Lreduce51
.Lfe51_mul121666_epilogue:
.cfi_endproc
.size x25519_fe51_mul121666,.-x25519_fe51_mul121666
.globl x25519_fe64_eligible
.type x25519_fe64_eligible,@function
.align 32
x25519_fe64_eligible:
.cfi_startproc
movl OPENSSL_ia32cap_P+8(%rip),%ecx
xorl %eax,%eax
andl $0x80100,%ecx
cmpl $0x80100,%ecx
cmovel %ecx,%eax
.byte 0xf3,0xc3
.cfi_endproc
.size x25519_fe64_eligible,.-x25519_fe64_eligible
.globl x25519_fe64_mul
.type x25519_fe64_mul,@function
.align 32
x25519_fe64_mul:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdi
.cfi_adjust_cfa_offset 8
.cfi_offset %rdi,-64
leaq -16(%rsp),%rsp
.cfi_adjust_cfa_offset 16
.Lfe64_mul_body:
movq %rdx,%rax
movq 0(%rdx),%rbp
movq 0(%rsi),%rdx
movq 8(%rax),%rcx
movq 16(%rax),%r14
movq 24(%rax),%r15
mulxq %rbp,%r8,%rax
xorl %edi,%edi
mulxq %rcx,%r9,%rbx
adcxq %rax,%r9
mulxq %r14,%r10,%rax
adcxq %rbx,%r10
mulxq %r15,%r11,%r12
movq 8(%rsi),%rdx
adcxq %rax,%r11
movq %r14,(%rsp)
adcxq %rdi,%r12
mulxq %rbp,%rax,%rbx
adoxq %rax,%r9
adcxq %rbx,%r10
mulxq %rcx,%rax,%rbx
adoxq %rax,%r10
adcxq %rbx,%r11
mulxq %r14,%rax,%rbx
adoxq %rax,%r11
adcxq %rbx,%r12
mulxq %r15,%rax,%r13
movq 16(%rsi),%rdx
adoxq %rax,%r12
adcxq %rdi,%r13
adoxq %rdi,%r13
mulxq %rbp,%rax,%rbx
adcxq %rax,%r10
adoxq %rbx,%r11
mulxq %rcx,%rax,%rbx
adcxq %rax,%r11
adoxq %rbx,%r12
mulxq %r14,%rax,%rbx
adcxq %rax,%r12
adoxq %rbx,%r13
mulxq %r15,%rax,%r14
movq 24(%rsi),%rdx
adcxq %rax,%r13
adoxq %rdi,%r14
adcxq %rdi,%r14
mulxq %rbp,%rax,%rbx
adoxq %rax,%r11
adcxq %rbx,%r12
mulxq %rcx,%rax,%rbx
adoxq %rax,%r12
adcxq %rbx,%r13
mulxq (%rsp),%rax,%rbx
adoxq %rax,%r13
adcxq %rbx,%r14
mulxq %r15,%rax,%r15
movl $38,%edx
adoxq %rax,%r14
adcxq %rdi,%r15
adoxq %rdi,%r15
jmp .Lreduce64
.Lfe64_mul_epilogue:
.cfi_endproc
.size x25519_fe64_mul,.-x25519_fe64_mul
.globl x25519_fe64_sqr
.type x25519_fe64_sqr,@function
.align 32
x25519_fe64_sqr:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdi
.cfi_adjust_cfa_offset 8
.cfi_offset %rdi,-64
leaq -16(%rsp),%rsp
.cfi_adjust_cfa_offset 16
.Lfe64_sqr_body:
movq 0(%rsi),%rdx
movq 8(%rsi),%rcx
movq 16(%rsi),%rbp
movq 24(%rsi),%rsi
mulxq %rdx,%r8,%r15
mulxq %rcx,%r9,%rax
xorl %edi,%edi
mulxq %rbp,%r10,%rbx
adcxq %rax,%r10
mulxq %rsi,%r11,%r12
movq %rcx,%rdx
adcxq %rbx,%r11
adcxq %rdi,%r12
mulxq %rbp,%rax,%rbx
adoxq %rax,%r11
adcxq %rbx,%r12
mulxq %rsi,%rax,%r13
movq %rbp,%rdx
adoxq %rax,%r12
adcxq %rdi,%r13
mulxq %rsi,%rax,%r14
movq %rcx,%rdx
adoxq %rax,%r13
adcxq %rdi,%r14
adoxq %rdi,%r14
adcxq %r9,%r9
adoxq %r15,%r9
adcxq %r10,%r10
mulxq %rdx,%rax,%rbx
movq %rbp,%rdx
adcxq %r11,%r11
adoxq %rax,%r10
adcxq %r12,%r12
adoxq %rbx,%r11
mulxq %rdx,%rax,%rbx
movq %rsi,%rdx
adcxq %r13,%r13
adoxq %rax,%r12
adcxq %r14,%r14
adoxq %rbx,%r13
mulxq %rdx,%rax,%r15
movl $38,%edx
adoxq %rax,%r14
adcxq %rdi,%r15
adoxq %rdi,%r15
jmp .Lreduce64
.align 32
.Lreduce64:
mulxq %r12,%rax,%rbx
adcxq %rax,%r8
adoxq %rbx,%r9
mulxq %r13,%rax,%rbx
adcxq %rax,%r9
adoxq %rbx,%r10
mulxq %r14,%rax,%rbx
adcxq %rax,%r10
adoxq %rbx,%r11
mulxq %r15,%rax,%r12
adcxq %rax,%r11
adoxq %rdi,%r12
adcxq %rdi,%r12
movq 16(%rsp),%rdi
imulq %rdx,%r12
addq %r12,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
sbbq %rax,%rax
andq $38,%rax
addq %rax,%r8
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r8,0(%rdi)
movq 24(%rsp),%r15
.cfi_restore %r15
movq 32(%rsp),%r14
.cfi_restore %r14
movq 40(%rsp),%r13
.cfi_restore %r13
movq 48(%rsp),%r12
.cfi_restore %r12
movq 56(%rsp),%rbx
.cfi_restore %rbx
movq 64(%rsp),%rbp
.cfi_restore %rbp
leaq 72(%rsp),%rsp
.cfi_adjust_cfa_offset 88
.Lfe64_sqr_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size x25519_fe64_sqr,.-x25519_fe64_sqr
.globl x25519_fe64_mul121666
.type x25519_fe64_mul121666,@function
.align 32
x25519_fe64_mul121666:
.Lfe64_mul121666_body:
.cfi_startproc
movl $121666,%edx
mulxq 0(%rsi),%r8,%rcx
mulxq 8(%rsi),%r9,%rax
addq %rcx,%r9
mulxq 16(%rsi),%r10,%rcx
adcq %rax,%r10
mulxq 24(%rsi),%r11,%rax
adcq %rcx,%r11
adcq $0,%rax
imulq $38,%rax,%rax
addq %rax,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
sbbq %rax,%rax
andq $38,%rax
addq %rax,%r8
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r8,0(%rdi)
.Lfe64_mul121666_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size x25519_fe64_mul121666,.-x25519_fe64_mul121666
.globl x25519_fe64_add
.type x25519_fe64_add,@function
.align 32
x25519_fe64_add:
.Lfe64_add_body:
.cfi_startproc
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
addq 0(%rdx),%r8
adcq 8(%rdx),%r9
adcq 16(%rdx),%r10
adcq 24(%rdx),%r11
sbbq %rax,%rax
andq $38,%rax
addq %rax,%r8
adcq $0,%r9
adcq $0,%r10
movq %r9,8(%rdi)
adcq $0,%r11
movq %r10,16(%rdi)
sbbq %rax,%rax
movq %r11,24(%rdi)
andq $38,%rax
addq %rax,%r8
movq %r8,0(%rdi)
.Lfe64_add_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size x25519_fe64_add,.-x25519_fe64_add
.globl x25519_fe64_sub
.type x25519_fe64_sub,@function
.align 32
x25519_fe64_sub:
.Lfe64_sub_body:
.cfi_startproc
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
subq 0(%rdx),%r8
sbbq 8(%rdx),%r9
sbbq 16(%rdx),%r10
sbbq 24(%rdx),%r11
sbbq %rax,%rax
andq $38,%rax
subq %rax,%r8
sbbq $0,%r9
sbbq $0,%r10
movq %r9,8(%rdi)
sbbq $0,%r11
movq %r10,16(%rdi)
sbbq %rax,%rax
movq %r11,24(%rdi)
andq $38,%rax
subq %rax,%r8
movq %r8,0(%rdi)
.Lfe64_sub_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size x25519_fe64_sub,.-x25519_fe64_sub
.globl x25519_fe64_tobytes
.type x25519_fe64_tobytes,@function
.align 32
x25519_fe64_tobytes:
.Lfe64_to_body:
.cfi_startproc
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
leaq (%r11,%r11,1),%rax
sarq $63,%r11
shrq $1,%rax
andq $19,%r11
addq $19,%r11
addq %r11,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%rax
leaq (%rax,%rax,1),%r11
sarq $63,%rax
shrq $1,%r11
notq %rax
andq $19,%rax
subq %rax,%r8
sbbq $0,%r9
sbbq $0,%r10
sbbq $0,%r11
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
.Lfe64_to_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size x25519_fe64_tobytes,.-x25519_fe64_tobytes
.byte 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
@@ -1,333 +0,0 @@
/* Do not modify. This file is auto-generated from x86_64-gf2m.pl. */
.text
.type _mul_1x1,@function
.align 16
_mul_1x1:
.cfi_startproc
subq $128+8,%rsp
.cfi_adjust_cfa_offset 128+8
movq $-1,%r9
leaq (%rax,%rax,1),%rsi
shrq $3,%r9
leaq (,%rax,4),%rdi
andq %rax,%r9
leaq (,%rax,8),%r12
sarq $63,%rax
leaq (%r9,%r9,1),%r10
sarq $63,%rsi
leaq (,%r9,4),%r11
andq %rbp,%rax
sarq $63,%rdi
movq %rax,%rdx
shlq $63,%rax
andq %rbp,%rsi
shrq $1,%rdx
movq %rsi,%rcx
shlq $62,%rsi
andq %rbp,%rdi
shrq $2,%rcx
xorq %rsi,%rax
movq %rdi,%rbx
shlq $61,%rdi
xorq %rcx,%rdx
shrq $3,%rbx
xorq %rdi,%rax
xorq %rbx,%rdx
movq %r9,%r13
movq $0,0(%rsp)
xorq %r10,%r13
movq %r9,8(%rsp)
movq %r11,%r14
movq %r10,16(%rsp)
xorq %r12,%r14
movq %r13,24(%rsp)
xorq %r11,%r9
movq %r11,32(%rsp)
xorq %r11,%r10
movq %r9,40(%rsp)
xorq %r11,%r13
movq %r10,48(%rsp)
xorq %r14,%r9
movq %r13,56(%rsp)
xorq %r14,%r10
movq %r12,64(%rsp)
xorq %r14,%r13
movq %r9,72(%rsp)
xorq %r11,%r9
movq %r10,80(%rsp)
xorq %r11,%r10
movq %r13,88(%rsp)
xorq %r11,%r13
movq %r14,96(%rsp)
movq %r8,%rsi
movq %r9,104(%rsp)
andq %rbp,%rsi
movq %r10,112(%rsp)
shrq $4,%rbp
movq %r13,120(%rsp)
movq %r8,%rdi
andq %rbp,%rdi
shrq $4,%rbp
movq (%rsp,%rsi,8),%xmm0
movq %r8,%rsi
andq %rbp,%rsi
shrq $4,%rbp
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $4,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $60,%rbx
xorq %rcx,%rax
pslldq $1,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $12,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $52,%rbx
xorq %rcx,%rax
pslldq $2,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $20,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $44,%rbx
xorq %rcx,%rax
pslldq $3,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $28,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $36,%rbx
xorq %rcx,%rax
pslldq $4,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $36,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $28,%rbx
xorq %rcx,%rax
pslldq $5,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $44,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $20,%rbx
xorq %rcx,%rax
pslldq $6,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %r8,%rdi
movq %rcx,%rbx
shlq $52,%rcx
andq %rbp,%rdi
movq (%rsp,%rsi,8),%xmm1
shrq $12,%rbx
xorq %rcx,%rax
pslldq $7,%xmm1
movq %r8,%rsi
shrq $4,%rbp
xorq %rbx,%rdx
andq %rbp,%rsi
shrq $4,%rbp
pxor %xmm1,%xmm0
movq (%rsp,%rdi,8),%rcx
movq %rcx,%rbx
shlq $60,%rcx
.byte 102,72,15,126,198
shrq $4,%rbx
xorq %rcx,%rax
psrldq $8,%xmm0
xorq %rbx,%rdx
.byte 102,72,15,126,199
xorq %rsi,%rax
xorq %rdi,%rdx
addq $128+8,%rsp
.cfi_adjust_cfa_offset -128-8
.byte 0xf3,0xc3
.Lend_mul_1x1:
.cfi_endproc
.size _mul_1x1,.-_mul_1x1
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,@function
.align 16
bn_GF2m_mul_2x2:
.cfi_startproc
movq %rsp,%rax
movq OPENSSL_ia32cap_P(%rip),%r10
btq $33,%r10
jnc .Lvanilla_mul_2x2
.byte 102,72,15,110,198
.byte 102,72,15,110,201
.byte 102,72,15,110,210
.byte 102,73,15,110,216
movdqa %xmm0,%xmm4
movdqa %xmm1,%xmm5
.byte 102,15,58,68,193,0
pxor %xmm2,%xmm4
pxor %xmm3,%xmm5
.byte 102,15,58,68,211,0
.byte 102,15,58,68,229,0
xorps %xmm0,%xmm4
xorps %xmm2,%xmm4
movdqa %xmm4,%xmm5
pslldq $8,%xmm4
psrldq $8,%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm0
movdqu %xmm2,0(%rdi)
movdqu %xmm0,16(%rdi)
.byte 0xf3,0xc3
.align 16
.Lvanilla_mul_2x2:
leaq -136(%rsp),%rsp
.cfi_adjust_cfa_offset 8*17
movq %r14,80(%rsp)
.cfi_rel_offset %r14,8*10
movq %r13,88(%rsp)
.cfi_rel_offset %r13,8*11
movq %r12,96(%rsp)
.cfi_rel_offset %r12,8*12
movq %rbp,104(%rsp)
.cfi_rel_offset %rbp,8*13
movq %rbx,112(%rsp)
.cfi_rel_offset %rbx,8*14
.Lbody_mul_2x2:
movq %rdi,32(%rsp)
movq %rsi,40(%rsp)
movq %rdx,48(%rsp)
movq %rcx,56(%rsp)
movq %r8,64(%rsp)
movq $0xf,%r8
movq %rsi,%rax
movq %rcx,%rbp
call _mul_1x1
movq %rax,16(%rsp)
movq %rdx,24(%rsp)
movq 48(%rsp),%rax
movq 64(%rsp),%rbp
call _mul_1x1
movq %rax,0(%rsp)
movq %rdx,8(%rsp)
movq 40(%rsp),%rax
movq 56(%rsp),%rbp
xorq 48(%rsp),%rax
xorq 64(%rsp),%rbp
call _mul_1x1
movq 0(%rsp),%rbx
movq 8(%rsp),%rcx
movq 16(%rsp),%rdi
movq 24(%rsp),%rsi
movq 32(%rsp),%rbp
xorq %rdx,%rax
xorq %rcx,%rdx
xorq %rbx,%rax
movq %rbx,0(%rbp)
xorq %rdi,%rdx
movq %rsi,24(%rbp)
xorq %rsi,%rax
xorq %rsi,%rdx
xorq %rdx,%rax
movq %rdx,16(%rbp)
movq %rax,8(%rbp)
movq 80(%rsp),%r14
.cfi_restore %r14
movq 88(%rsp),%r13
.cfi_restore %r13
movq 96(%rsp),%r12
.cfi_restore %r12
movq 104(%rsp),%rbp
.cfi_restore %rbp
movq 112(%rsp),%rbx
.cfi_restore %rbx
leaq 136(%rsp),%rsp
.cfi_adjust_cfa_offset -8*17
.Lepilogue_mul_2x2:
.byte 0xf3,0xc3
.Lend_mul_2x2:
.cfi_endproc
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,513 +0,0 @@
/* Do not modify. This file is auto-generated from x86_64cpuid.pl. */
.hidden OPENSSL_cpuid_setup
.section .init
call OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
.comm OPENSSL_ia32cap_P,16,4
.text
.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,@function
.align 16
OPENSSL_atomic_add:
.cfi_startproc
.byte 243,15,30,250
movl (%rdi),%eax
.Lspin: leaq (%rsi,%rax,1),%r8
.byte 0xf0
cmpxchgl %r8d,(%rdi)
jne .Lspin
movl %r8d,%eax
.byte 0x48,0x98
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.globl OPENSSL_rdtsc
.type OPENSSL_rdtsc,@function
.align 16
OPENSSL_rdtsc:
.cfi_startproc
.byte 243,15,30,250
rdtsc
shlq $32,%rdx
orq %rdx,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
.globl OPENSSL_ia32_cpuid
.type OPENSSL_ia32_cpuid,@function
.align 16
OPENSSL_ia32_cpuid:
.cfi_startproc
.byte 243,15,30,250
movq %rbx,%r8
.cfi_register %rbx,%r8
xorl %eax,%eax
movq %rax,8(%rdi)
cpuid
movl %eax,%r11d
xorl %eax,%eax
cmpl $0x756e6547,%ebx
setne %al
movl %eax,%r9d
cmpl $0x49656e69,%edx
setne %al
orl %eax,%r9d
cmpl $0x6c65746e,%ecx
setne %al
orl %eax,%r9d
jz .Lintel
cmpl $0x68747541,%ebx
setne %al
movl %eax,%r10d
cmpl $0x69746E65,%edx
setne %al
orl %eax,%r10d
cmpl $0x444D4163,%ecx
setne %al
orl %eax,%r10d
jnz .Lintel
movl $0x80000000,%eax
cpuid
cmpl $0x80000001,%eax
jb .Lintel
movl %eax,%r10d
movl $0x80000001,%eax
cpuid
orl %ecx,%r9d
andl $0x00000801,%r9d
cmpl $0x80000008,%r10d
jb .Lintel
movl $0x80000008,%eax
cpuid
movzbq %cl,%r10
incq %r10
movl $1,%eax
cpuid
btl $28,%edx
jnc .Lgeneric
shrl $16,%ebx
cmpb %r10b,%bl
ja .Lgeneric
andl $0xefffffff,%edx
jmp .Lgeneric
.Lintel:
cmpl $4,%r11d
movl $-1,%r10d
jb .Lnocacheinfo
movl $4,%eax
movl $0,%ecx
cpuid
movl %eax,%r10d
shrl $14,%r10d
andl $0xfff,%r10d
.Lnocacheinfo:
movl $1,%eax
cpuid
movd %eax,%xmm0
andl $0xbfefffff,%edx
cmpl $0,%r9d
jne .Lnotintel
orl $0x40000000,%edx
andb $15,%ah
cmpb $15,%ah
jne .LnotP4
orl $0x00100000,%edx
.LnotP4:
cmpb $6,%ah
jne .Lnotintel
andl $0x0fff0ff0,%eax
cmpl $0x00050670,%eax
je .Lknights
cmpl $0x00080650,%eax
jne .Lnotintel
.Lknights:
andl $0xfbffffff,%ecx
.Lnotintel:
btl $28,%edx
jnc .Lgeneric
andl $0xefffffff,%edx
cmpl $0,%r10d
je .Lgeneric
orl $0x10000000,%edx
shrl $16,%ebx
cmpb $1,%bl
ja .Lgeneric
andl $0xefffffff,%edx
.Lgeneric:
andl $0x00000800,%r9d
andl $0xfffff7ff,%ecx
orl %ecx,%r9d
movl %edx,%r10d
cmpl $7,%r11d
jb .Lno_extended_info
movl $7,%eax
xorl %ecx,%ecx
cpuid
btl $26,%r9d
jc .Lnotknights
andl $0xfff7ffff,%ebx
.Lnotknights:
movd %xmm0,%eax
andl $0x0fff0ff0,%eax
cmpl $0x00050650,%eax
jne .Lnotskylakex
andl $0xfffeffff,%ebx
.Lnotskylakex:
movl %ebx,8(%rdi)
movl %ecx,12(%rdi)
.Lno_extended_info:
btl $27,%r9d
jnc .Lclear_avx
xorl %ecx,%ecx
.byte 0x0f,0x01,0xd0
andl $0xe6,%eax
cmpl $0xe6,%eax
je .Ldone
andl $0x3fdeffff,8(%rdi)
andl $6,%eax
cmpl $6,%eax
je .Ldone
.Lclear_avx:
movl $0xefffe7ff,%eax
andl %eax,%r9d
movl $0x3fdeffdf,%eax
andl %eax,8(%rdi)
.Ldone:
shlq $32,%r9
movl %r10d,%eax
movq %r8,%rbx
.cfi_restore %rbx
orq %r9,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,@function
.align 16
OPENSSL_cleanse:
.cfi_startproc
.byte 243,15,30,250
xorq %rax,%rax
cmpq $15,%rsi
jae .Lot
cmpq $0,%rsi
je .Lret
.Little:
movb %al,(%rdi)
subq $1,%rsi
leaq 1(%rdi),%rdi
jnz .Little
.Lret:
.byte 0xf3,0xc3
.align 16
.Lot:
testq $7,%rdi
jz .Laligned
movb %al,(%rdi)
leaq -1(%rsi),%rsi
leaq 1(%rdi),%rdi
jmp .Lot
.Laligned:
movq %rax,(%rdi)
leaq -8(%rsi),%rsi
testq $-8,%rsi
leaq 8(%rdi),%rdi
jnz .Laligned
cmpq $0,%rsi
jne .Little
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,@function
.align 16
CRYPTO_memcmp:
.cfi_startproc
.byte 243,15,30,250
xorq %rax,%rax
xorq %r10,%r10
cmpq $0,%rdx
je .Lno_data
cmpq $16,%rdx
jne .Loop_cmp
movq (%rdi),%r10
movq 8(%rdi),%r11
movq $1,%rdx
xorq (%rsi),%r10
xorq 8(%rsi),%r11
orq %r11,%r10
cmovnzq %rdx,%rax
.byte 0xf3,0xc3
.align 16
.Loop_cmp:
movb (%rdi),%r10b
leaq 1(%rdi),%rdi
xorb (%rsi),%r10b
leaq 1(%rsi),%rsi
orb %r10b,%al
decq %rdx
jnz .Loop_cmp
negq %rax
shrq $63,%rax
.Lno_data:
.byte 0xf3,0xc3
.cfi_endproc
.size CRYPTO_memcmp,.-CRYPTO_memcmp
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,@function
.align 16
OPENSSL_wipe_cpu:
.cfi_startproc
.byte 243,15,30,250
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
pxor %xmm8,%xmm8
pxor %xmm9,%xmm9
pxor %xmm10,%xmm10
pxor %xmm11,%xmm11
pxor %xmm12,%xmm12
pxor %xmm13,%xmm13
pxor %xmm14,%xmm14
pxor %xmm15,%xmm15
xorq %rcx,%rcx
xorq %rdx,%rdx
xorq %rsi,%rsi
xorq %rdi,%rdi
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
leaq 8(%rsp),%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.globl OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,@function
.align 16
OPENSSL_instrument_bus:
.cfi_startproc
.byte 243,15,30,250
movq %rdi,%r10
movq %rsi,%rcx
movq %rsi,%r11
rdtsc
movl %eax,%r8d
movl $0,%r9d
clflush (%r10)
.byte 0xf0
addl %r9d,(%r10)
jmp .Loop
.align 16
.Loop: rdtsc
movl %eax,%edx
subl %r8d,%eax
movl %edx,%r8d
movl %eax,%r9d
clflush (%r10)
.byte 0xf0
addl %eax,(%r10)
leaq 4(%r10),%r10
subq $1,%rcx
jnz .Loop
movq %r11,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.globl OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,@function
.align 16
OPENSSL_instrument_bus2:
.cfi_startproc
.byte 243,15,30,250
movq %rdi,%r10
movq %rsi,%rcx
movq %rdx,%r11
movq %rcx,8(%rsp)
rdtsc
movl %eax,%r8d
movl $0,%r9d
clflush (%r10)
.byte 0xf0
addl %r9d,(%r10)
rdtsc
movl %eax,%edx
subl %r8d,%eax
movl %edx,%r8d
movl %eax,%r9d
.Loop2:
clflush (%r10)
.byte 0xf0
addl %eax,(%r10)
subq $1,%r11
jz .Ldone2
rdtsc
movl %eax,%edx
subl %r8d,%eax
movl %edx,%r8d
cmpl %r9d,%eax
movl %eax,%r9d
movl $0,%edx
setne %dl
subq %rdx,%rcx
leaq (%r10,%rdx,4),%r10
jnz .Loop2
.Ldone2:
movq 8(%rsp),%rax
subq %rcx,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.globl OPENSSL_ia32_rdrand_bytes
.type OPENSSL_ia32_rdrand_bytes,@function
.align 16
OPENSSL_ia32_rdrand_bytes:
.cfi_startproc
.byte 243,15,30,250
xorq %rax,%rax
cmpq $0,%rsi
je .Ldone_rdrand_bytes
movq $8,%r11
.Loop_rdrand_bytes:
.byte 73,15,199,242
jc .Lbreak_rdrand_bytes
decq %r11
jnz .Loop_rdrand_bytes
jmp .Ldone_rdrand_bytes
.align 16
.Lbreak_rdrand_bytes:
cmpq $8,%rsi
jb .Ltail_rdrand_bytes
movq %r10,(%rdi)
leaq 8(%rdi),%rdi
addq $8,%rax
subq $8,%rsi
jz .Ldone_rdrand_bytes
movq $8,%r11
jmp .Loop_rdrand_bytes
.align 16
.Ltail_rdrand_bytes:
movb %r10b,(%rdi)
leaq 1(%rdi),%rdi
incq %rax
shrq $8,%r10
decq %rsi
jnz .Ltail_rdrand_bytes
.Ldone_rdrand_bytes:
xorq %r10,%r10
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_ia32_rdrand_bytes,.-OPENSSL_ia32_rdrand_bytes
.globl OPENSSL_ia32_rdseed_bytes
.type OPENSSL_ia32_rdseed_bytes,@function
.align 16
OPENSSL_ia32_rdseed_bytes:
.cfi_startproc
.byte 243,15,30,250
xorq %rax,%rax
cmpq $0,%rsi
je .Ldone_rdseed_bytes
movq $8,%r11
.Loop_rdseed_bytes:
.byte 73,15,199,250
jc .Lbreak_rdseed_bytes
decq %r11
jnz .Loop_rdseed_bytes
jmp .Ldone_rdseed_bytes
.align 16
.Lbreak_rdseed_bytes:
cmpq $8,%rsi
jb .Ltail_rdseed_bytes
movq %r10,(%rdi)
leaq 8(%rdi),%rdi
addq $8,%rax
subq $8,%rsi
jz .Ldone_rdseed_bytes
movq $8,%r11
jmp .Loop_rdseed_bytes
.align 16
.Ltail_rdseed_bytes:
movb %r10b,(%rdi)
leaq 1(%rdi),%rdi
incq %rax
shrq $8,%r10
decq %rsi
jnz .Ltail_rdseed_bytes
.Ldone_rdseed_bytes:
xorq %r10,%r10
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_ia32_rdseed_bytes,.-OPENSSL_ia32_rdseed_bytes
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-236
View File
@@ -1,236 +0,0 @@
/* Do not modify. This file is auto-generated from armv4-gf2m.pl. */
#include "arm_arch.h"
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
.text
.type mul_1x1_ialu,%function
.align 5
mul_1x1_ialu:
mov r4,#0
bic r5,r1,#3<<30 @ a1=a&0x3fffffff
str r4,[sp,#0] @ tab[0]=0
add r6,r5,r5 @ a2=a1<<1
str r5,[sp,#4] @ tab[1]=a1
eor r7,r5,r6 @ a1^a2
str r6,[sp,#8] @ tab[2]=a2
mov r8,r5,lsl#2 @ a4=a1<<2
str r7,[sp,#12] @ tab[3]=a1^a2
eor r9,r5,r8 @ a1^a4
str r8,[sp,#16] @ tab[4]=a4
eor r4,r6,r8 @ a2^a4
str r9,[sp,#20] @ tab[5]=a1^a4
eor r7,r7,r8 @ a1^a2^a4
str r4,[sp,#24] @ tab[6]=a2^a4
and r8,r12,r0,lsl#2
str r7,[sp,#28] @ tab[7]=a1^a2^a4
and r9,r12,r0,lsr#1
ldr r5,[sp,r8] @ tab[b & 0x7]
and r8,r12,r0,lsr#4
ldr r7,[sp,r9] @ tab[b >> 3 & 0x7]
and r9,r12,r0,lsr#7
ldr r6,[sp,r8] @ tab[b >> 6 & 0x7]
eor r5,r5,r7,lsl#3 @ stall
mov r4,r7,lsr#29
ldr r7,[sp,r9] @ tab[b >> 9 & 0x7]
and r8,r12,r0,lsr#10
eor r5,r5,r6,lsl#6
eor r4,r4,r6,lsr#26
ldr r6,[sp,r8] @ tab[b >> 12 & 0x7]
and r9,r12,r0,lsr#13
eor r5,r5,r7,lsl#9
eor r4,r4,r7,lsr#23
ldr r7,[sp,r9] @ tab[b >> 15 & 0x7]
and r8,r12,r0,lsr#16
eor r5,r5,r6,lsl#12
eor r4,r4,r6,lsr#20
ldr r6,[sp,r8] @ tab[b >> 18 & 0x7]
and r9,r12,r0,lsr#19
eor r5,r5,r7,lsl#15
eor r4,r4,r7,lsr#17
ldr r7,[sp,r9] @ tab[b >> 21 & 0x7]
and r8,r12,r0,lsr#22
eor r5,r5,r6,lsl#18
eor r4,r4,r6,lsr#14
ldr r6,[sp,r8] @ tab[b >> 24 & 0x7]
and r9,r12,r0,lsr#25
eor r5,r5,r7,lsl#21
eor r4,r4,r7,lsr#11
ldr r7,[sp,r9] @ tab[b >> 27 & 0x7]
tst r1,#1<<30
and r8,r12,r0,lsr#28
eor r5,r5,r6,lsl#24
eor r4,r4,r6,lsr#8
ldr r6,[sp,r8] @ tab[b >> 30 ]
#ifdef __thumb2__
itt ne
#endif
eorne r5,r5,r0,lsl#30
eorne r4,r4,r0,lsr#2
tst r1,#1<<31
eor r5,r5,r7,lsl#27
eor r4,r4,r7,lsr#5
#ifdef __thumb2__
itt ne
#endif
eorne r5,r5,r0,lsl#31
eorne r4,r4,r0,lsr#1
eor r5,r5,r6,lsl#30
eor r4,r4,r6,lsr#2
mov pc,lr
.size mul_1x1_ialu,.-mul_1x1_ialu
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,%function
.align 5
bn_GF2m_mul_2x2:
#if __ARM_MAX_ARCH__>=7
stmdb sp!,{r10,lr}
ldr r12,.LOPENSSL_armcap
# if !defined(_WIN32)
adr r10,.LOPENSSL_armcap
ldr r12,[r12,r10]
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
# endif
tst r12,#ARMV7_NEON
itt ne
ldrne r10,[sp],#8
bne .LNEON
stmdb sp!,{r4,r5,r6,r7,r8,r9}
#else
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
#endif
mov r10,r0 @ reassign 1st argument
mov r0,r3 @ r0=b1
sub r7,sp,#36
mov r8,sp
and r7,r7,#-32
ldr r3,[sp,#32] @ load b0
mov r12,#7<<2
mov sp,r7 @ allocate tab[8]
str r8,[r7,#32]
bl mul_1x1_ialu @ a1·b1
str r5,[r10,#8]
str r4,[r10,#12]
eor r0,r0,r3 @ flip b0 and b1
eor r1,r1,r2 @ flip a0 and a1
eor r3,r3,r0
eor r2,r2,r1
eor r0,r0,r3
eor r1,r1,r2
bl mul_1x1_ialu @ a0·b0
str r5,[r10]
str r4,[r10,#4]
eor r1,r1,r2
eor r0,r0,r3
bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
ldmia r10,{r6,r7,r8,r9}
eor r5,r5,r4
ldr sp,[sp,#32] @ destroy tab[8]
eor r4,r4,r7
eor r5,r5,r6
eor r4,r4,r8
eor r5,r5,r9
eor r4,r4,r9
str r4,[r10,#8]
eor r5,r5,r4
str r5,[r10,#4]
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.align 5
.LNEON:
ldr r12, [sp] @ 5th argument
vmov d26, r2, r1
vmov d27, r12, r3
vmov.i64 d28, #0x0000ffffffffffff
vmov.i64 d29, #0x00000000ffffffff
vmov.i64 d30, #0x000000000000ffff
vext.8 d2, d26, d26, #1 @ A1
vmull.p8 q1, d2, d27 @ F = A1*B
vext.8 d0, d27, d27, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d4, d26, d26, #2 @ A2
vmull.p8 q2, d4, d27 @ H = A2*B
vext.8 d16, d27, d27, #2 @ B2
vmull.p8 q8, d26, d16 @ G = A*B2
vext.8 d6, d26, d26, #3 @ A3
veor q1, q1, q0 @ L = E + F
vmull.p8 q3, d6, d27 @ J = A3*B
vext.8 d0, d27, d27, #3 @ B3
veor q2, q2, q8 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8
vand d3, d3, d28
vext.8 d16, d27, d27, #4 @ B4
veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16
vand d5, d5, d29
vmull.p8 q8, d26, d16 @ K = A*B4
veor q3, q3, q0 @ N = I + J
veor d2, d2, d3
veor d4, d4, d5
veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24
vand d7, d7, d30
vext.8 q1, q1, q1, #15
veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d17, #0
vext.8 q2, q2, q2, #14
veor d6, d6, d7
vmull.p8 q0, d26, d27 @ D = A*B
vext.8 q8, q8, q8, #12
vext.8 q3, q3, q3, #13
veor q1, q1, q2
veor q3, q3, q8
veor q0, q0, q1
veor q0, q0, q3
vst1.32 {q0}, [r0]
bx lr @ bx lr
#endif
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
# ifdef _WIN32
.word OPENSSL_armcap_P
# else
.word OPENSSL_armcap_P-.
# endif
#endif
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 5
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
-961
View File
@@ -1,961 +0,0 @@
/* Do not modify. This file is auto-generated from armv4-mont.pl. */
#include "arm_arch.h"
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
.text
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
# ifdef _WIN32
.word OPENSSL_armcap_P
# else
.word OPENSSL_armcap_P-.Lbn_mul_mont
# endif
#endif
.globl bn_mul_mont
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
.Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
ldr r0,.LOPENSSL_armcap
#if !defined(_WIN32)
adr r2,.Lbn_mul_mont
ldr r0,[r0,r2]
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r0,[r0]
# endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
.Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
.L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
mov r7,sp
str r14,[r0,#4] @ tp[num]=
.Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
.Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
#ifdef __thumb2__
itt ne
#endif
movne r7,sp
bne .Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,r5 @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
.Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne .Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
.Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
#ifdef __thumb2__
it cc
#endif
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne .Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt:
#if __ARM_ARCH__>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont,.-bn_mul_mont
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
mov ip,sp
cmp r5,#8
bhi .LNEON_8n
@ special case for r5==8, everything is in register bank...
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
sub r7,sp,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
vmul.u32 d29,d29,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b .LNEON_outer8
.align 4
.LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne .LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#96
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b .LNEON_tail_entry
.align 4
.LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
sub r7,r7,r5,lsl#4
veor q8,q8,q8
and r7,r7,#-64
veor q9,q9,q9
mov sp,r7 @ alloca
veor q10,q10,q10
add r7,r7,#256
veor q11,q11,q11
sub r8,r5,#8
veor q12,q12,q12
veor q13,q13,q13
.LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne .LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b .LNEON_8n_outer
.align 4
.LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
add r7,sp,#128
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
vmlal.u32 q10,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q11,d28,d2[1]
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q6,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q7,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q8,d29,d5[0]
vshr.u64 d12,d12,#16
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vadd.u64 d12,d12,d13
vmlal.u32 q11,d29,d6[1]
vshr.u64 d12,d12,#16
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vadd.u64 d14,d14,d12
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]!
vmlal.u32 q8,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q9,d28,d1[0]
vshl.i64 d29,d15,#16
vmlal.u32 q10,d28,d1[1]
vadd.u64 d29,d29,d14
vmlal.u32 q11,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q12,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
vmlal.u32 q13,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q7,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q8,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q9,d29,d5[0]
vshr.u64 d14,d14,#16
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vadd.u64 d14,d14,d15
vmlal.u32 q12,d29,d6[1]
vshr.u64 d14,d14,#16
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vadd.u64 d16,d16,d14
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]!
vmlal.u32 q9,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q10,d28,d1[0]
vshl.i64 d29,d17,#16
vmlal.u32 q11,d28,d1[1]
vadd.u64 d29,d29,d16
vmlal.u32 q12,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q13,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
vmlal.u32 q6,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q8,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q9,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q10,d29,d5[0]
vshr.u64 d16,d16,#16
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vadd.u64 d16,d16,d17
vmlal.u32 q13,d29,d6[1]
vshr.u64 d16,d16,#16
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vadd.u64 d18,d18,d16
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]!
vmlal.u32 q10,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q11,d28,d1[0]
vshl.i64 d29,d19,#16
vmlal.u32 q12,d28,d1[1]
vadd.u64 d29,d29,d18
vmlal.u32 q13,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q6,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
vmlal.u32 q7,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q9,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q10,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q11,d29,d5[0]
vshr.u64 d18,d18,#16
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vadd.u64 d18,d18,d19
vmlal.u32 q6,d29,d6[1]
vshr.u64 d18,d18,#16
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vadd.u64 d20,d20,d18
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]!
vmlal.u32 q11,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q12,d28,d1[0]
vshl.i64 d29,d21,#16
vmlal.u32 q13,d28,d1[1]
vadd.u64 d29,d29,d20
vmlal.u32 q6,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q7,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
vmlal.u32 q8,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q10,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q11,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q12,d29,d5[0]
vshr.u64 d20,d20,#16
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vadd.u64 d20,d20,d21
vmlal.u32 q7,d29,d6[1]
vshr.u64 d20,d20,#16
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vadd.u64 d22,d22,d20
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]!
vmlal.u32 q12,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q13,d28,d1[0]
vshl.i64 d29,d23,#16
vmlal.u32 q6,d28,d1[1]
vadd.u64 d29,d29,d22
vmlal.u32 q7,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q8,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
vmlal.u32 q9,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q11,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q12,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q13,d29,d5[0]
vshr.u64 d22,d22,#16
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vadd.u64 d22,d22,d23
vmlal.u32 q8,d29,d6[1]
vshr.u64 d22,d22,#16
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vadd.u64 d24,d24,d22
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]!
vmlal.u32 q13,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q6,d28,d1[0]
vshl.i64 d29,d25,#16
vmlal.u32 q7,d28,d1[1]
vadd.u64 d29,d29,d24
vmlal.u32 q8,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q9,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
vmlal.u32 q10,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q12,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q13,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q6,d29,d5[0]
vshr.u64 d24,d24,#16
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vadd.u64 d24,d24,d25
vmlal.u32 q9,d29,d6[1]
vshr.u64 d24,d24,#16
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vadd.u64 d26,d26,d24
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]!
vmlal.u32 q6,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q7,d28,d1[0]
vshl.i64 d29,d27,#16
vmlal.u32 q8,d28,d1[1]
vadd.u64 d29,d29,d26
vmlal.u32 q9,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
vmlal.u32 q11,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q12,d28,d3[1]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q13,d29,d4[0]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q6,d29,d4[1]
vmlal.u32 q7,d29,d5[0]
vshr.u64 d26,d26,#16
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vadd.u64 d26,d26,d27
vmlal.u32 q10,d29,d6[1]
vshr.u64 d26,d26,#16
vmlal.u32 q11,d29,d7[0]
vmlal.u32 q12,d29,d7[1]
vadd.u64 d12,d12,d26
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b .LNEON_8n_inner
.align 4
.LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
vmlal.u32 q7,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
vmlal.u32 q8,d28,d1[0]
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q9,d28,d1[1]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmlal.u32 q11,d29,d6[1]
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q6},[r7,:128]!
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]
vmlal.u32 q8,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
vmlal.u32 q9,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d1[1]
vmlal.u32 q11,d28,d2[0]
vmlal.u32 q12,d28,d2[1]
vmlal.u32 q13,d28,d3[0]
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
vmlal.u32 q7,d29,d4[0]
vmlal.u32 q8,d29,d4[1]
vmlal.u32 q9,d29,d5[0]
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vmlal.u32 q12,d29,d6[1]
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vst1.64 {q7},[r7,:128]!
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]
vmlal.u32 q9,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
vmlal.u32 q10,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q11,d28,d1[1]
vmlal.u32 q12,d28,d2[0]
vmlal.u32 q13,d28,d2[1]
vmlal.u32 q6,d28,d3[0]
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
vmlal.u32 q8,d29,d4[0]
vmlal.u32 q9,d29,d4[1]
vmlal.u32 q10,d29,d5[0]
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vmlal.u32 q13,d29,d6[1]
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vst1.64 {q8},[r7,:128]!
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]
vmlal.u32 q10,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
vmlal.u32 q11,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q12,d28,d1[1]
vmlal.u32 q13,d28,d2[0]
vmlal.u32 q6,d28,d2[1]
vmlal.u32 q7,d28,d3[0]
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
vmlal.u32 q9,d29,d4[0]
vmlal.u32 q10,d29,d4[1]
vmlal.u32 q11,d29,d5[0]
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vmlal.u32 q6,d29,d6[1]
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vst1.64 {q9},[r7,:128]!
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]
vmlal.u32 q11,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
vmlal.u32 q12,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q13,d28,d1[1]
vmlal.u32 q6,d28,d2[0]
vmlal.u32 q7,d28,d2[1]
vmlal.u32 q8,d28,d3[0]
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
vmlal.u32 q10,d29,d4[0]
vmlal.u32 q11,d29,d4[1]
vmlal.u32 q12,d29,d5[0]
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vmlal.u32 q7,d29,d6[1]
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vst1.64 {q10},[r7,:128]!
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]
vmlal.u32 q12,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
vmlal.u32 q13,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q6,d28,d1[1]
vmlal.u32 q7,d28,d2[0]
vmlal.u32 q8,d28,d2[1]
vmlal.u32 q9,d28,d3[0]
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
vmlal.u32 q11,d29,d4[0]
vmlal.u32 q12,d29,d4[1]
vmlal.u32 q13,d29,d5[0]
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vmlal.u32 q8,d29,d6[1]
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vst1.64 {q11},[r7,:128]!
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]
vmlal.u32 q13,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
vmlal.u32 q6,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q7,d28,d1[1]
vmlal.u32 q8,d28,d2[0]
vmlal.u32 q9,d28,d2[1]
vmlal.u32 q10,d28,d3[0]
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
vmlal.u32 q12,d29,d4[0]
vmlal.u32 q13,d29,d4[1]
vmlal.u32 q6,d29,d5[0]
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vmlal.u32 q9,d29,d6[1]
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vst1.64 {q12},[r7,:128]!
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]
vmlal.u32 q6,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
vmlal.u32 q7,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q8,d28,d1[1]
vmlal.u32 q9,d28,d2[0]
vmlal.u32 q10,d28,d2[1]
vmlal.u32 q11,d28,d3[0]
vmlal.u32 q12,d28,d3[1]
it eq
subeq r1,r1,r5,lsl#2 @ rewind
vmlal.u32 q13,d29,d4[0]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q6,d29,d4[1]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q7,d29,d5[0]
add r10,sp,#8 @ rewind
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vmlal.u32 q10,d29,d6[1]
vmlal.u32 q11,d29,d7[0]
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne .LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
vst1.64 {q8,q9},[r7,:256]!
veor q3,q3,q3 @ d6-d7
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12},[r7,:128]
subs r9,r9,#8
vld1.64 {q6,q7},[r6,:256]!
vld1.64 {q8,q9},[r6,:256]!
vld1.64 {q10,q11},[r6,:256]!
vld1.64 {q12,q13},[r6,:256]!
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne .LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
vshr.u64 d10,d12,#16
vst1.64 {q2,q3},[sp,:256]!
vadd.u64 d13,d13,d10
vst1.64 {q2,q3}, [sp,:256]!
vshr.u64 d10,d13,#16
vst1.64 {q2,q3}, [sp,:256]!
vzip.16 d12,d13
mov r8,r5
b .LNEON_tail_entry
.align 4
.LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
vadd.u64 d13,d13,d10
vld1.64 {q10,q11}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
.LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vshr.u64 d10,d27,#16
vzip.16 d26,d27
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne .LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
.LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,r2,r11 @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
.LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
-273
View File
@@ -1,273 +0,0 @@
/* Do not modify. This file is auto-generated from armv4cpuid.pl. */
#include "arm_arch.h"
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
.thumb
#else
.code 32
#undef __thumb2__
#endif
.text
.align 5
.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function
OPENSSL_atomic_add:
#if __ARM_ARCH__>=6
.Ladd: ldrex r2,[r0]
add r3,r2,r1
strex r2,r3,[r0]
cmp r2,#0
bne .Ladd
mov r0,r3
bx lr
#else
stmdb sp!,{r4,r5,r6,lr}
ldr r2,.Lspinlock
adr r3,.Lspinlock
mov r4,r0
mov r5,r1
add r6,r3,r2 @ &spinlock
b .+8
.Lspin: bl sched_yield
mov r0,#-1
swp r0,r0,[r6]
cmp r0,#0
bne .Lspin
ldr r2,[r4]
add r2,r2,r5
str r2,[r4]
str r0,[r6] @ release spinlock
ldmia sp!,{r4,r5,r6,lr}
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,%function
OPENSSL_cleanse:
eor ip,ip,ip
cmp r1,#7
#ifdef __thumb2__
itt hs
#endif
subhs r1,r1,#4
bhs .Lot
cmp r1,#0
beq .Lcleanse_done
.Little:
strb ip,[r0],#1
subs r1,r1,#1
bhi .Little
b .Lcleanse_done
.Lot: tst r0,#3
beq .Laligned
strb ip,[r0],#1
sub r1,r1,#1
b .Lot
.Laligned:
str ip,[r0],#4
subs r1,r1,#4
bhs .Laligned
adds r1,r1,#4
bne .Little
.Lcleanse_done:
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,%function
.align 4
CRYPTO_memcmp:
eor ip,ip,ip
cmp r2,#0
beq .Lno_data
stmdb sp!,{r4,r5}
.Loop_cmp:
ldrb r4,[r0],#1
ldrb r5,[r1],#1
eor r4,r4,r5
orr ip,ip,r4
subs r2,r2,#1
bne .Loop_cmp
ldmia sp!,{r4,r5}
.Lno_data:
rsb r0,ip,#0
mov r0,r0,lsr#31
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size CRYPTO_memcmp,.-CRYPTO_memcmp
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.align 5
.globl _armv7_neon_probe
.type _armv7_neon_probe,%function
_armv7_neon_probe:
vorr q0,q0,q0
bx lr
.size _armv7_neon_probe,.-_armv7_neon_probe
.globl _armv7_tick
.type _armv7_tick,%function
_armv7_tick:
#ifdef __APPLE__
mrrc p15,0,r0,r1,c14 @ CNTPCT
#else
mrrc p15,1,r0,r1,c14 @ CNTVCT
#endif
bx lr
.size _armv7_tick,.-_armv7_tick
.globl _armv8_aes_probe
.type _armv8_aes_probe,%function
_armv8_aes_probe:
#if defined(__thumb2__) && !defined(__APPLE__)
.byte 0xb0,0xff,0x00,0x03 @ aese.8 q0,q0
#else
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
#endif
bx lr
.size _armv8_aes_probe,.-_armv8_aes_probe
.globl _armv8_sha1_probe
.type _armv8_sha1_probe,%function
_armv8_sha1_probe:
#if defined(__thumb2__) && !defined(__APPLE__)
.byte 0x00,0xef,0x40,0x0c @ sha1c.32 q0,q0,q0
#else
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
#endif
bx lr
.size _armv8_sha1_probe,.-_armv8_sha1_probe
.globl _armv8_sha256_probe
.type _armv8_sha256_probe,%function
_armv8_sha256_probe:
#if defined(__thumb2__) && !defined(__APPLE__)
.byte 0x00,0xff,0x40,0x0c @ sha256h.32 q0,q0,q0
#else
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
#endif
bx lr
.size _armv8_sha256_probe,.-_armv8_sha256_probe
.globl _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
#if defined(__thumb2__) && !defined(__APPLE__)
.byte 0xa0,0xef,0x00,0x0e @ vmull.p64 q0,d0,d0
#else
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
#endif
bx lr
.size _armv8_pmull_probe,.-_armv8_pmull_probe
#endif
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,%function
OPENSSL_wipe_cpu:
#if __ARM_MAX_ARCH__>=7
ldr r0,.LOPENSSL_armcap
adr r1,.LOPENSSL_armcap
ldr r0,[r1,r0]
#ifdef __APPLE__
ldr r0,[r0]
#endif
#endif
eor r2,r2,r2
eor r3,r3,r3
eor ip,ip,ip
#if __ARM_MAX_ARCH__>=7
tst r0,#1
beq .Lwipe_done
veor q0, q0, q0
veor q1, q1, q1
veor q2, q2, q2
veor q3, q3, q3
veor q8, q8, q8
veor q9, q9, q9
veor q10, q10, q10
veor q11, q11, q11
veor q12, q12, q12
veor q13, q13, q13
veor q14, q14, q14
veor q15, q15, q15
.Lwipe_done:
#endif
mov r0,sp
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.globl OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,%function
OPENSSL_instrument_bus:
eor r0,r0,r0
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.globl OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,%function
OPENSSL_instrument_bus2:
eor r0,r0,r0
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.
#endif
#if __ARM_ARCH__>=6
.align 5
#else
.Lspinlock:
.word atomic_add_spinlock-.Lspinlock
.align 5
.data
.align 2
atomic_add_spinlock:
.word 0
#endif
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-565
View File
@@ -1,565 +0,0 @@
/* Do not modify. This file is auto-generated from ghash-armv4.pl. */
#include "arm_arch.h"
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
#if defined(__thumb2__)
.thumb
#else
.code 32
#endif
.text
.type rem_4bit,%object
.align 5
rem_4bit:
.short 0x0000,0x1C20,0x3840,0x2460
.short 0x7080,0x6CA0,0x48C0,0x54E0
.short 0xE100,0xFD20,0xD940,0xC560
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
.size rem_4bit,.-rem_4bit
.type rem_4bit_get,%function
rem_4bit_get:
#if defined(__thumb2__)
adr r2,rem_4bit
#else
sub r2,pc,#8+32 @ &rem_4bit
#endif
b .Lrem_4bit_got
nop
nop
.size rem_4bit_get,.-rem_4bit_get
.globl gcm_ghash_4bit
.type gcm_ghash_4bit,%function
.align 4
gcm_ghash_4bit:
#if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
ldrb r12,[r2,#15]
ldrb r14,[r0,#15]
.Louter:
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
add r11,r1,r14
ldrb r12,[r2,#14]
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[sp,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
ldrb r14,[r0,#14]
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
eor r7,r7,r8,lsl#16
.Linner:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r2,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r8,[r0,r3]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl r12,r12,r8
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
bpl .Linner
ldr r3,[sp,#32] @ re-load r3/end
add r2,r2,#16
mov r14,r4
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
cmp r2,r3
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#ifdef __thumb2__
it ne
#endif
ldrneb r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
bne .Louter
add sp,sp,#36
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_gmult_4bit
.type gcm_gmult_4bit,%function
gcm_gmult_4bit:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldrb r12,[r0,#15]
b rem_4bit_get
.Lrem_4bit_got:
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
ldrb r12,[r0,#14]
add r11,r1,r14
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
and r14,r12,#0xf0
eor r7,r7,r8,lsl#16
and r12,r12,#0x0f
.Loop:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r0,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
bpl .Loop
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_gmult_4bit,.-gcm_gmult_4bit
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.globl gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
vshl.i64 d17,#57
vshr.u64 d16,#63 @ t0=0xc2....01
vdup.8 q9,d7[7]
vshr.u64 d26,d6,#63
vshr.s8 q9,#7 @ broadcast carry bit
vshl.i64 q3,q3,#1
vand q8,q8,q9
vorr d7,d26 @ H<<<=1
veor q3,q3,q8 @ twisted H
vstmia r0,{q3}
bx lr @ bx lr
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
.Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
.Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d18, d26, d26, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d26, d22 @ G = A*B2
vext.8 d20, d26, d26, #3 @ A3
veor q8, q8, q0 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d0, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d26, d22 @ K = A*B4
veor q10, q10, q0 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q0, d26, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q0, q0, q8
veor q0, q0, q10
veor d6,d6,d7 @ Karatsuba pre-processing
vext.8 d16, d28, d28, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d2, d6, d6, #1 @ B1
vmull.p8 q1, d28, d2 @ E = A*B1
vext.8 d18, d28, d28, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d28, d22 @ G = A*B2
vext.8 d20, d28, d28, #3 @ A3
veor q8, q8, q1 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d2, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q1, d28, d2 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d28, d22 @ K = A*B4
veor q10, q10, q1 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q1, d28, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q1, q1, q8
veor q1, q1, q10
vext.8 d16, d27, d27, #1 @ A1
vmull.p8 q8, d16, d7 @ F = A1*B
vext.8 d4, d7, d7, #1 @ B1
vmull.p8 q2, d27, d4 @ E = A*B1
vext.8 d18, d27, d27, #2 @ A2
vmull.p8 q9, d18, d7 @ H = A2*B
vext.8 d22, d7, d7, #2 @ B2
vmull.p8 q11, d27, d22 @ G = A*B2
vext.8 d20, d27, d27, #3 @ A3
veor q8, q8, q2 @ L = E + F
vmull.p8 q10, d20, d7 @ J = A3*B
vext.8 d4, d7, d7, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q2, d27, d4 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d7, d7, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d27, d22 @ K = A*B4
veor q10, q10, q2 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q2, d27, d7 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q2, q2, q8
veor q2, q2, q10
veor q1,q1,q0 @ Karatsuba post-processing
veor q1,q1,q2
veor d1,d1,d2
veor d4,d4,d3 @ Xh|Xl - 256-bit result
@ equivalent of reduction_avx from ghash-x86_64.pl
vshl.i64 q9,q0,#57 @ 1st phase
vshl.i64 q10,q0,#62
veor q10,q10,q9 @
vshl.i64 q9,q0,#63
veor q10, q10, q9 @
veor d1,d1,d20 @
veor d4,d4,d21
vshr.u64 q10,q0,#1 @ 2nd phase
veor q2,q2,q0
veor q0,q0,q10 @
vshr.u64 q10,q10,#6
vshr.u64 q0,q0,#1 @
veor q0,q0,q2 @
veor q0,q0,q10 @
subs r3,#16
bne .Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
sub r0,#16
vst1.64 d1,[r0]! @ write out Xi
vst1.64 d0,[r0]
bx lr @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
@@ -1,244 +0,0 @@
/* Do not modify. This file is auto-generated from ghashv8-armx.pl. */
#include "arm_arch.h"
#if __ARM_MAX_ARCH__>=7
.fpu neon
#ifdef __thumb2__
.syntax unified
.thumb
# define INST(a,b,c,d) .byte c,0xef,a,b
#else
.code 32
# define INST(a,b,c,d) .byte a,b,c,0xf2
#endif
.text
.globl gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
vext.8 q3,q9,q9,#8
vshr.u64 q10,q11,#63
vdup.32 q9,d18[1]
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
vshr.u64 q10,q3,#63
vshr.s32 q9,q9,#31 @ broadcast carry bit
vand q10,q10,q8
vshl.i64 q3,q3,#1
vext.8 q10,q10,q10,#8
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vst1.64 {q12},[r0]! @ store Htable[0]
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
INST(0xa8,0x0e,0xa8,0xf2) @ pmull q0,q12,q12
veor q8,q8,q12
INST(0xa9,0x4e,0xa9,0xf2) @ pmull2 q2,q12,q12
INST(0xa0,0x2e,0xa0,0xf2) @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0]! @ store Htable[1..2]
bx lr
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q3,q9,q9,#8
INST(0x86,0x0e,0xa8,0xf2) @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
INST(0x87,0x4e,0xa9,0xf2) @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
INST(0xa2,0x2e,0xaa,0xf2) @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
bx lr
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ algorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
it eq
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
#ifndef __ARMEB__
vrev64.8 q8,q8
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo .Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
INST(0x8e,0x8e,0xa8,0xf2) @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
INST(0x8f,0xce,0xa9,0xf2) @ pmull2 q6,q12,q7
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
INST(0x86,0x0e,0xac,0xf2) @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
it lo
movlo r12,#0 @ is it time to zero r12?
INST(0xa2,0xae,0xaa,0xf2) @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
INST(0x87,0x4e,0xad,0xf2) @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
INST(0xa5,0x2e,0xab,0xf2) @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
it eq
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
#ifndef __ARMEB__
vrev64.8 q8,q8
#endif
veor q1,q1,q10
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
INST(0x8e,0x8e,0xa8,0xf2) @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
INST(0x8f,0xce,0xa9,0xf2) @ pmull2 q6,q12,q7
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq .Ldone_v8 @ is r3 zero?
.Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
INST(0x86,0x0e,0xa8,0xf2) @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
INST(0x87,0x4e,0xa9,0xf2) @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
INST(0xa2,0x2e,0xaa,0xf2) @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
.Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-819
View File
@@ -1,819 +0,0 @@
/* Do not modify. This file is auto-generated from rc4-586.pl. */
#ifdef PIC
.text
.globl RC4
.type RC4,@function
.align 16
RC4:
.L_RC4_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebp
xorl %eax,%eax
xorl %ebx,%ebx
cmpl $0,%edx
je .L000abort
movb (%edi),%al
movb 4(%edi),%bl
addl $8,%edi
leal (%esi,%edx,1),%ecx
subl %esi,%ebp
movl %ecx,24(%esp)
incb %al
cmpl $-1,256(%edi)
je .L001RC4_CHAR
movl (%edi,%eax,4),%ecx
andl $-4,%edx
jz .L002loop1
movl %ebp,32(%esp)
testl $-8,%edx
jz .L003go4loop4
call .L004PIC_me_up
.L004PIC_me_up:
popl %ebp
leal OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp
btl $26,(%ebp)
jnc .L003go4loop4
movl 32(%esp),%ebp
andl $-8,%edx
leal -8(%esi,%edx,1),%edx
movl %edx,-4(%edi)
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
movq (%esi),%mm0
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
jmp .L005loop_mmx_enter
.align 16
.L006loop_mmx:
addb %cl,%bl
psllq $56,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movq (%esi),%mm0
movq %mm2,-8(%ebp,%esi,1)
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
.L005loop_mmx_enter:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm0,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $8,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $16,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $24,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $32,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $40,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $48,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
movl %ebx,%edx
xorl %ebx,%ebx
movb %dl,%bl
cmpl -4(%edi),%esi
leal 8(%esi),%esi
jb .L006loop_mmx
psllq $56,%mm1
pxor %mm1,%mm2
movq %mm2,-8(%ebp,%esi,1)
emms
cmpl 24(%esp),%esi
je .L007done
jmp .L002loop1
.align 16
.L003go4loop4:
leal -4(%esi,%edx,1),%edx
movl %edx,28(%esp)
.L008loop4:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%eax,4),%ecx
movl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl 32(%esp),%ecx
orl (%edi,%edx,4),%ebp
rorl $8,%ebp
xorl (%esi),%ebp
cmpl 28(%esp),%esi
movl %ebp,(%ecx,%esi,1)
leal 4(%esi),%esi
movl (%edi,%eax,4),%ecx
jb .L008loop4
cmpl 24(%esp),%esi
je .L007done
movl 32(%esp),%ebp
.align 16
.L002loop1:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%edx,4),%edx
xorb (%esi),%dl
leal 1(%esi),%esi
movl (%edi,%eax,4),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb .L002loop1
jmp .L007done
.align 16
.L001RC4_CHAR:
movzbl (%edi,%eax,1),%ecx
.L009cloop1:
addb %cl,%bl
movzbl (%edi,%ebx,1),%edx
movb %cl,(%edi,%ebx,1)
movb %dl,(%edi,%eax,1)
addb %cl,%dl
movzbl (%edi,%edx,1),%edx
addb $1,%al
xorb (%esi),%dl
leal 1(%esi),%esi
movzbl (%edi,%eax,1),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb .L009cloop1
.L007done:
decb %al
movl %ebx,-4(%edi)
movb %al,-8(%edi)
.L000abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size RC4,.-.L_RC4_begin
.globl RC4_set_key
.type RC4_set_key,@function
.align 16
RC4_set_key:
.L_RC4_set_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%ebp
movl 28(%esp),%esi
call .L010PIC_me_up
.L010PIC_me_up:
popl %edx
leal OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx
leal 8(%edi),%edi
leal (%esi,%ebp,1),%esi
negl %ebp
xorl %eax,%eax
movl %ebp,-4(%edi)
btl $20,(%edx)
jc .L011c1stloop
.align 16
.L012w1stloop:
movl %eax,(%edi,%eax,4)
addb $1,%al
jnc .L012w1stloop
xorl %ecx,%ecx
xorl %edx,%edx
.align 16
.L013w2ndloop:
movl (%edi,%ecx,4),%eax
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movl (%edi,%edx,4),%ebx
jnz .L014wnowrap
movl -4(%edi),%ebp
.L014wnowrap:
movl %eax,(%edi,%edx,4)
movl %ebx,(%edi,%ecx,4)
addb $1,%cl
jnc .L013w2ndloop
jmp .L015exit
.align 16
.L011c1stloop:
movb %al,(%edi,%eax,1)
addb $1,%al
jnc .L011c1stloop
xorl %ecx,%ecx
xorl %edx,%edx
xorl %ebx,%ebx
.align 16
.L016c2ndloop:
movb (%edi,%ecx,1),%al
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movb (%edi,%edx,1),%bl
jnz .L017cnowrap
movl -4(%edi),%ebp
.L017cnowrap:
movb %al,(%edi,%edx,1)
movb %bl,(%edi,%ecx,1)
addb $1,%cl
jnc .L016c2ndloop
movl $-1,256(%edi)
.L015exit:
xorl %eax,%eax
movl %eax,-8(%edi)
movl %eax,-4(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size RC4_set_key,.-.L_RC4_set_key_begin
.globl RC4_options
.type RC4_options,@function
.align 16
RC4_options:
.L_RC4_options_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L018pic_point
.L018pic_point:
popl %eax
leal .L019opts-.L018pic_point(%eax),%eax
call .L020PIC_me_up
.L020PIC_me_up:
popl %edx
leal OPENSSL_ia32cap_P-.L020PIC_me_up(%edx),%edx
movl (%edx),%edx
btl $20,%edx
jc .L0211xchar
btl $26,%edx
jnc .L022ret
addl $25,%eax
ret
.L0211xchar:
addl $12,%eax
.L022ret:
ret
.align 64
.L019opts:
.byte 114,99,52,40,52,120,44,105,110,116,41,0
.byte 114,99,52,40,49,120,44,99,104,97,114,41,0
.byte 114,99,52,40,56,120,44,109,109,120,41,0
.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.size RC4_options,.-.L_RC4_options_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl RC4
.type RC4,@function
.align 16
RC4:
.L_RC4_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebp
xorl %eax,%eax
xorl %ebx,%ebx
cmpl $0,%edx
je .L000abort
movb (%edi),%al
movb 4(%edi),%bl
addl $8,%edi
leal (%esi,%edx,1),%ecx
subl %esi,%ebp
movl %ecx,24(%esp)
incb %al
cmpl $-1,256(%edi)
je .L001RC4_CHAR
movl (%edi,%eax,4),%ecx
andl $-4,%edx
jz .L002loop1
movl %ebp,32(%esp)
testl $-8,%edx
jz .L003go4loop4
leal OPENSSL_ia32cap_P,%ebp
btl $26,(%ebp)
jnc .L003go4loop4
movl 32(%esp),%ebp
andl $-8,%edx
leal -8(%esi,%edx,1),%edx
movl %edx,-4(%edi)
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
movq (%esi),%mm0
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
jmp .L004loop_mmx_enter
.align 16
.L005loop_mmx:
addb %cl,%bl
psllq $56,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movq (%esi),%mm0
movq %mm2,-8(%ebp,%esi,1)
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
.L004loop_mmx_enter:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm0,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $8,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $16,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $24,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $32,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $40,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $48,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
movl %ebx,%edx
xorl %ebx,%ebx
movb %dl,%bl
cmpl -4(%edi),%esi
leal 8(%esi),%esi
jb .L005loop_mmx
psllq $56,%mm1
pxor %mm1,%mm2
movq %mm2,-8(%ebp,%esi,1)
emms
cmpl 24(%esp),%esi
je .L006done
jmp .L002loop1
.align 16
.L003go4loop4:
leal -4(%esi,%edx,1),%edx
movl %edx,28(%esp)
.L007loop4:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%eax,4),%ecx
movl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl 32(%esp),%ecx
orl (%edi,%edx,4),%ebp
rorl $8,%ebp
xorl (%esi),%ebp
cmpl 28(%esp),%esi
movl %ebp,(%ecx,%esi,1)
leal 4(%esi),%esi
movl (%edi,%eax,4),%ecx
jb .L007loop4
cmpl 24(%esp),%esi
je .L006done
movl 32(%esp),%ebp
.align 16
.L002loop1:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%edx,4),%edx
xorb (%esi),%dl
leal 1(%esi),%esi
movl (%edi,%eax,4),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb .L002loop1
jmp .L006done
.align 16
.L001RC4_CHAR:
movzbl (%edi,%eax,1),%ecx
.L008cloop1:
addb %cl,%bl
movzbl (%edi,%ebx,1),%edx
movb %cl,(%edi,%ebx,1)
movb %dl,(%edi,%eax,1)
addb %cl,%dl
movzbl (%edi,%edx,1),%edx
addb $1,%al
xorb (%esi),%dl
leal 1(%esi),%esi
movzbl (%edi,%eax,1),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb .L008cloop1
.L006done:
decb %al
movl %ebx,-4(%edi)
movb %al,-8(%edi)
.L000abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size RC4,.-.L_RC4_begin
.globl RC4_set_key
.type RC4_set_key,@function
.align 16
RC4_set_key:
.L_RC4_set_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%ebp
movl 28(%esp),%esi
leal OPENSSL_ia32cap_P,%edx
leal 8(%edi),%edi
leal (%esi,%ebp,1),%esi
negl %ebp
xorl %eax,%eax
movl %ebp,-4(%edi)
btl $20,(%edx)
jc .L009c1stloop
.align 16
.L010w1stloop:
movl %eax,(%edi,%eax,4)
addb $1,%al
jnc .L010w1stloop
xorl %ecx,%ecx
xorl %edx,%edx
.align 16
.L011w2ndloop:
movl (%edi,%ecx,4),%eax
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movl (%edi,%edx,4),%ebx
jnz .L012wnowrap
movl -4(%edi),%ebp
.L012wnowrap:
movl %eax,(%edi,%edx,4)
movl %ebx,(%edi,%ecx,4)
addb $1,%cl
jnc .L011w2ndloop
jmp .L013exit
.align 16
.L009c1stloop:
movb %al,(%edi,%eax,1)
addb $1,%al
jnc .L009c1stloop
xorl %ecx,%ecx
xorl %edx,%edx
xorl %ebx,%ebx
.align 16
.L014c2ndloop:
movb (%edi,%ecx,1),%al
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movb (%edi,%edx,1),%bl
jnz .L015cnowrap
movl -4(%edi),%ebp
.L015cnowrap:
movb %al,(%edi,%edx,1)
movb %bl,(%edi,%ecx,1)
addb $1,%cl
jnc .L014c2ndloop
movl $-1,256(%edi)
.L013exit:
xorl %eax,%eax
movl %eax,-8(%edi)
movl %eax,-4(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size RC4_set_key,.-.L_RC4_set_key_begin
.globl RC4_options
.type RC4_options,@function
.align 16
RC4_options:
.L_RC4_options_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L016pic_point
.L016pic_point:
popl %eax
leal .L017opts-.L016pic_point(%eax),%eax
leal OPENSSL_ia32cap_P,%edx
movl (%edx),%edx
btl $20,%edx
jc .L0181xchar
btl $26,%edx
jnc .L019ret
addl $25,%eax
ret
.L0181xchar:
addl $12,%eax
.L019ret:
ret
.align 64
.L017opts:
.byte 114,99,52,40,52,120,44,105,110,116,41,0
.byte 114,99,52,40,49,120,44,99,104,97,114,41,0
.byte 114,99,52,40,56,120,44,109,109,120,41,0
.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.size RC4_options,.-.L_RC4_options_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-755
View File
@@ -1,755 +0,0 @@
/* Do not modify. This file is auto-generated from x86-gf2m.pl. */
#ifdef PIC
.text
.type _mul_1x1_mmx,@function
.align 16
_mul_1x1_mmx:
#ifdef __CET__
.byte 243,15,30,251
#endif
subl $36,%esp
movl %eax,%ecx
leal (%eax,%eax,1),%edx
andl $1073741823,%ecx
leal (%edx,%edx,1),%ebp
movl $0,(%esp)
andl $2147483647,%edx
movd %eax,%mm2
movd %ebx,%mm3
movl %ecx,4(%esp)
xorl %edx,%ecx
pxor %mm5,%mm5
pxor %mm4,%mm4
movl %edx,8(%esp)
xorl %ebp,%edx
movl %ecx,12(%esp)
pcmpgtd %mm2,%mm5
paddd %mm2,%mm2
xorl %edx,%ecx
movl %ebp,16(%esp)
xorl %edx,%ebp
pand %mm3,%mm5
pcmpgtd %mm2,%mm4
movl %ecx,20(%esp)
xorl %ecx,%ebp
psllq $31,%mm5
pand %mm3,%mm4
movl %edx,24(%esp)
movl $7,%esi
movl %ebp,28(%esp)
movl %esi,%ebp
andl %ebx,%esi
shrl $3,%ebx
movl %ebp,%edi
psllq $30,%mm4
andl %ebx,%edi
shrl $3,%ebx
movd (%esp,%esi,4),%mm0
movl %ebp,%esi
andl %ebx,%esi
shrl $3,%ebx
movd (%esp,%edi,4),%mm2
movl %ebp,%edi
psllq $3,%mm2
andl %ebx,%edi
shrl $3,%ebx
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
movl %ebp,%esi
psllq $6,%mm1
andl %ebx,%esi
shrl $3,%ebx
pxor %mm1,%mm0
movd (%esp,%edi,4),%mm2
movl %ebp,%edi
psllq $9,%mm2
andl %ebx,%edi
shrl $3,%ebx
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
movl %ebp,%esi
psllq $12,%mm1
andl %ebx,%esi
shrl $3,%ebx
pxor %mm1,%mm0
movd (%esp,%edi,4),%mm2
movl %ebp,%edi
psllq $15,%mm2
andl %ebx,%edi
shrl $3,%ebx
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
movl %ebp,%esi
psllq $18,%mm1
andl %ebx,%esi
shrl $3,%ebx
pxor %mm1,%mm0
movd (%esp,%edi,4),%mm2
movl %ebp,%edi
psllq $21,%mm2
andl %ebx,%edi
shrl $3,%ebx
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
movl %ebp,%esi
psllq $24,%mm1
andl %ebx,%esi
shrl $3,%ebx
pxor %mm1,%mm0
movd (%esp,%edi,4),%mm2
pxor %mm4,%mm0
psllq $27,%mm2
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
pxor %mm5,%mm0
psllq $30,%mm1
addl $36,%esp
pxor %mm1,%mm0
ret
.size _mul_1x1_mmx,.-_mul_1x1_mmx
.type _mul_1x1_ialu,@function
.align 16
_mul_1x1_ialu:
#ifdef __CET__
.byte 243,15,30,251
#endif
subl $36,%esp
movl %eax,%ecx
leal (%eax,%eax,1),%edx
leal (,%eax,4),%ebp
andl $1073741823,%ecx
leal (%eax,%eax,1),%edi
sarl $31,%eax
movl $0,(%esp)
andl $2147483647,%edx
movl %ecx,4(%esp)
xorl %edx,%ecx
movl %edx,8(%esp)
xorl %ebp,%edx
movl %ecx,12(%esp)
xorl %edx,%ecx
movl %ebp,16(%esp)
xorl %edx,%ebp
movl %ecx,20(%esp)
xorl %ecx,%ebp
sarl $31,%edi
andl %ebx,%eax
movl %edx,24(%esp)
andl %ebx,%edi
movl %ebp,28(%esp)
movl %eax,%edx
shll $31,%eax
movl %edi,%ecx
shrl $1,%edx
movl $7,%esi
shll $30,%edi
andl %ebx,%esi
shrl $2,%ecx
xorl %edi,%eax
shrl $3,%ebx
movl $7,%edi
andl %ebx,%edi
shrl $3,%ebx
xorl %ecx,%edx
xorl (%esp,%esi,4),%eax
movl $7,%esi
andl %ebx,%esi
shrl $3,%ebx
movl (%esp,%edi,4),%ebp
movl $7,%edi
movl %ebp,%ecx
shll $3,%ebp
andl %ebx,%edi
shrl $29,%ecx
xorl %ebp,%eax
shrl $3,%ebx
xorl %ecx,%edx
movl (%esp,%esi,4),%ecx
movl $7,%esi
movl %ecx,%ebp
shll $6,%ecx
andl %ebx,%esi
shrl $26,%ebp
xorl %ecx,%eax
shrl $3,%ebx
xorl %ebp,%edx
movl (%esp,%edi,4),%ebp
movl $7,%edi
movl %ebp,%ecx
shll $9,%ebp
andl %ebx,%edi
shrl $23,%ecx
xorl %ebp,%eax
shrl $3,%ebx
xorl %ecx,%edx
movl (%esp,%esi,4),%ecx
movl $7,%esi
movl %ecx,%ebp
shll $12,%ecx
andl %ebx,%esi
shrl $20,%ebp
xorl %ecx,%eax
shrl $3,%ebx
xorl %ebp,%edx
movl (%esp,%edi,4),%ebp
movl $7,%edi
movl %ebp,%ecx
shll $15,%ebp
andl %ebx,%edi
shrl $17,%ecx
xorl %ebp,%eax
shrl $3,%ebx
xorl %ecx,%edx
movl (%esp,%esi,4),%ecx
movl $7,%esi
movl %ecx,%ebp
shll $18,%ecx
andl %ebx,%esi
shrl $14,%ebp
xorl %ecx,%eax
shrl $3,%ebx
xorl %ebp,%edx
movl (%esp,%edi,4),%ebp
movl $7,%edi
movl %ebp,%ecx
shll $21,%ebp
andl %ebx,%edi
shrl $11,%ecx
xorl %ebp,%eax
shrl $3,%ebx
xorl %ecx,%edx
movl (%esp,%esi,4),%ecx
movl $7,%esi
movl %ecx,%ebp
shll $24,%ecx
andl %ebx,%esi
shrl $8,%ebp
xorl %ecx,%eax
shrl $3,%ebx
xorl %ebp,%edx
movl (%esp,%edi,4),%ebp
movl %ebp,%ecx
shll $27,%ebp
movl (%esp,%esi,4),%edi
shrl $5,%ecx
movl %edi,%esi
xorl %ebp,%eax
shll $30,%edi
xorl %ecx,%edx
shrl $2,%esi
xorl %edi,%eax
xorl %esi,%edx
addl $36,%esp
ret
.size _mul_1x1_ialu,.-_mul_1x1_ialu
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,@function
.align 16
bn_GF2m_mul_2x2:
.L_bn_GF2m_mul_2x2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L000PIC_me_up
.L000PIC_me_up:
popl %edx
leal OPENSSL_ia32cap_P-.L000PIC_me_up(%edx),%edx
movl (%edx),%eax
movl 4(%edx),%edx
testl $8388608,%eax
jz .L001ialu
testl $16777216,%eax
jz .L002mmx
testl $2,%edx
jz .L002mmx
movups 8(%esp),%xmm0
shufps $177,%xmm0,%xmm0
.byte 102,15,58,68,192,1
movl 4(%esp),%eax
movups %xmm0,(%eax)
ret
.align 16
.L002mmx:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp),%eax
movl 32(%esp),%ebx
call _mul_1x1_mmx
movq %mm0,%mm7
movl 28(%esp),%eax
movl 36(%esp),%ebx
call _mul_1x1_mmx
movq %mm0,%mm6
movl 24(%esp),%eax
movl 32(%esp),%ebx
xorl 28(%esp),%eax
xorl 36(%esp),%ebx
call _mul_1x1_mmx
pxor %mm7,%mm0
movl 20(%esp),%eax
pxor %mm6,%mm0
movq %mm0,%mm2
psllq $32,%mm0
popl %edi
psrlq $32,%mm2
popl %esi
pxor %mm6,%mm0
popl %ebx
pxor %mm7,%mm2
movq %mm0,(%eax)
popl %ebp
movq %mm2,8(%eax)
emms
ret
.align 16
.L001ialu:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $20,%esp
movl 44(%esp),%eax
movl 52(%esp),%ebx
call _mul_1x1_ialu
movl %eax,8(%esp)
movl %edx,12(%esp)
movl 48(%esp),%eax
movl 56(%esp),%ebx
call _mul_1x1_ialu
movl %eax,(%esp)
movl %edx,4(%esp)
movl 44(%esp),%eax
movl 52(%esp),%ebx
xorl 48(%esp),%eax
xorl 56(%esp),%ebx
call _mul_1x1_ialu
movl 40(%esp),%ebp
movl (%esp),%ebx
movl 4(%esp),%ecx
movl 8(%esp),%edi
movl 12(%esp),%esi
xorl %edx,%eax
xorl %ecx,%edx
xorl %ebx,%eax
movl %ebx,(%ebp)
xorl %edi,%edx
movl %esi,12(%ebp)
xorl %esi,%eax
addl $20,%esp
xorl %esi,%edx
popl %edi
xorl %edx,%eax
popl %esi
movl %edx,8(%ebp)
popl %ebx
movl %eax,4(%ebp)
popl %ebp
ret
.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.type _mul_1x1_mmx,@function
.align 16
_mul_1x1_mmx:
#ifdef __CET__
.byte 243,15,30,251
#endif
subl $36,%esp
movl %eax,%ecx
leal (%eax,%eax,1),%edx
andl $1073741823,%ecx
leal (%edx,%edx,1),%ebp
movl $0,(%esp)
andl $2147483647,%edx
movd %eax,%mm2
movd %ebx,%mm3
movl %ecx,4(%esp)
xorl %edx,%ecx
pxor %mm5,%mm5
pxor %mm4,%mm4
movl %edx,8(%esp)
xorl %ebp,%edx
movl %ecx,12(%esp)
pcmpgtd %mm2,%mm5
paddd %mm2,%mm2
xorl %edx,%ecx
movl %ebp,16(%esp)
xorl %edx,%ebp
pand %mm3,%mm5
pcmpgtd %mm2,%mm4
movl %ecx,20(%esp)
xorl %ecx,%ebp
psllq $31,%mm5
pand %mm3,%mm4
movl %edx,24(%esp)
movl $7,%esi
movl %ebp,28(%esp)
movl %esi,%ebp
andl %ebx,%esi
shrl $3,%ebx
movl %ebp,%edi
psllq $30,%mm4
andl %ebx,%edi
shrl $3,%ebx
movd (%esp,%esi,4),%mm0
movl %ebp,%esi
andl %ebx,%esi
shrl $3,%ebx
movd (%esp,%edi,4),%mm2
movl %ebp,%edi
psllq $3,%mm2
andl %ebx,%edi
shrl $3,%ebx
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
movl %ebp,%esi
psllq $6,%mm1
andl %ebx,%esi
shrl $3,%ebx
pxor %mm1,%mm0
movd (%esp,%edi,4),%mm2
movl %ebp,%edi
psllq $9,%mm2
andl %ebx,%edi
shrl $3,%ebx
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
movl %ebp,%esi
psllq $12,%mm1
andl %ebx,%esi
shrl $3,%ebx
pxor %mm1,%mm0
movd (%esp,%edi,4),%mm2
movl %ebp,%edi
psllq $15,%mm2
andl %ebx,%edi
shrl $3,%ebx
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
movl %ebp,%esi
psllq $18,%mm1
andl %ebx,%esi
shrl $3,%ebx
pxor %mm1,%mm0
movd (%esp,%edi,4),%mm2
movl %ebp,%edi
psllq $21,%mm2
andl %ebx,%edi
shrl $3,%ebx
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
movl %ebp,%esi
psllq $24,%mm1
andl %ebx,%esi
shrl $3,%ebx
pxor %mm1,%mm0
movd (%esp,%edi,4),%mm2
pxor %mm4,%mm0
psllq $27,%mm2
pxor %mm2,%mm0
movd (%esp,%esi,4),%mm1
pxor %mm5,%mm0
psllq $30,%mm1
addl $36,%esp
pxor %mm1,%mm0
ret
.size _mul_1x1_mmx,.-_mul_1x1_mmx
.type _mul_1x1_ialu,@function
.align 16
_mul_1x1_ialu:
#ifdef __CET__
.byte 243,15,30,251
#endif
subl $36,%esp
movl %eax,%ecx
leal (%eax,%eax,1),%edx
leal (,%eax,4),%ebp
andl $1073741823,%ecx
leal (%eax,%eax,1),%edi
sarl $31,%eax
movl $0,(%esp)
andl $2147483647,%edx
movl %ecx,4(%esp)
xorl %edx,%ecx
movl %edx,8(%esp)
xorl %ebp,%edx
movl %ecx,12(%esp)
xorl %edx,%ecx
movl %ebp,16(%esp)
xorl %edx,%ebp
movl %ecx,20(%esp)
xorl %ecx,%ebp
sarl $31,%edi
andl %ebx,%eax
movl %edx,24(%esp)
andl %ebx,%edi
movl %ebp,28(%esp)
movl %eax,%edx
shll $31,%eax
movl %edi,%ecx
shrl $1,%edx
movl $7,%esi
shll $30,%edi
andl %ebx,%esi
shrl $2,%ecx
xorl %edi,%eax
shrl $3,%ebx
movl $7,%edi
andl %ebx,%edi
shrl $3,%ebx
xorl %ecx,%edx
xorl (%esp,%esi,4),%eax
movl $7,%esi
andl %ebx,%esi
shrl $3,%ebx
movl (%esp,%edi,4),%ebp
movl $7,%edi
movl %ebp,%ecx
shll $3,%ebp
andl %ebx,%edi
shrl $29,%ecx
xorl %ebp,%eax
shrl $3,%ebx
xorl %ecx,%edx
movl (%esp,%esi,4),%ecx
movl $7,%esi
movl %ecx,%ebp
shll $6,%ecx
andl %ebx,%esi
shrl $26,%ebp
xorl %ecx,%eax
shrl $3,%ebx
xorl %ebp,%edx
movl (%esp,%edi,4),%ebp
movl $7,%edi
movl %ebp,%ecx
shll $9,%ebp
andl %ebx,%edi
shrl $23,%ecx
xorl %ebp,%eax
shrl $3,%ebx
xorl %ecx,%edx
movl (%esp,%esi,4),%ecx
movl $7,%esi
movl %ecx,%ebp
shll $12,%ecx
andl %ebx,%esi
shrl $20,%ebp
xorl %ecx,%eax
shrl $3,%ebx
xorl %ebp,%edx
movl (%esp,%edi,4),%ebp
movl $7,%edi
movl %ebp,%ecx
shll $15,%ebp
andl %ebx,%edi
shrl $17,%ecx
xorl %ebp,%eax
shrl $3,%ebx
xorl %ecx,%edx
movl (%esp,%esi,4),%ecx
movl $7,%esi
movl %ecx,%ebp
shll $18,%ecx
andl %ebx,%esi
shrl $14,%ebp
xorl %ecx,%eax
shrl $3,%ebx
xorl %ebp,%edx
movl (%esp,%edi,4),%ebp
movl $7,%edi
movl %ebp,%ecx
shll $21,%ebp
andl %ebx,%edi
shrl $11,%ecx
xorl %ebp,%eax
shrl $3,%ebx
xorl %ecx,%edx
movl (%esp,%esi,4),%ecx
movl $7,%esi
movl %ecx,%ebp
shll $24,%ecx
andl %ebx,%esi
shrl $8,%ebp
xorl %ecx,%eax
shrl $3,%ebx
xorl %ebp,%edx
movl (%esp,%edi,4),%ebp
movl %ebp,%ecx
shll $27,%ebp
movl (%esp,%esi,4),%edi
shrl $5,%ecx
movl %edi,%esi
xorl %ebp,%eax
shll $30,%edi
xorl %ecx,%edx
shrl $2,%esi
xorl %edi,%eax
xorl %esi,%edx
addl $36,%esp
ret
.size _mul_1x1_ialu,.-_mul_1x1_ialu
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,@function
.align 16
bn_GF2m_mul_2x2:
.L_bn_GF2m_mul_2x2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
leal OPENSSL_ia32cap_P,%edx
movl (%edx),%eax
movl 4(%edx),%edx
testl $8388608,%eax
jz .L000ialu
testl $16777216,%eax
jz .L001mmx
testl $2,%edx
jz .L001mmx
movups 8(%esp),%xmm0
shufps $177,%xmm0,%xmm0
.byte 102,15,58,68,192,1
movl 4(%esp),%eax
movups %xmm0,(%eax)
ret
.align 16
.L001mmx:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp),%eax
movl 32(%esp),%ebx
call _mul_1x1_mmx
movq %mm0,%mm7
movl 28(%esp),%eax
movl 36(%esp),%ebx
call _mul_1x1_mmx
movq %mm0,%mm6
movl 24(%esp),%eax
movl 32(%esp),%ebx
xorl 28(%esp),%eax
xorl 36(%esp),%ebx
call _mul_1x1_mmx
pxor %mm7,%mm0
movl 20(%esp),%eax
pxor %mm6,%mm0
movq %mm0,%mm2
psllq $32,%mm0
popl %edi
psrlq $32,%mm2
popl %esi
pxor %mm6,%mm0
popl %ebx
pxor %mm7,%mm2
movq %mm0,(%eax)
popl %ebp
movq %mm2,8(%eax)
emms
ret
.align 16
.L000ialu:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $20,%esp
movl 44(%esp),%eax
movl 52(%esp),%ebx
call _mul_1x1_ialu
movl %eax,8(%esp)
movl %edx,12(%esp)
movl 48(%esp),%eax
movl 56(%esp),%ebx
call _mul_1x1_ialu
movl %eax,(%esp)
movl %edx,4(%esp)
movl 44(%esp),%eax
movl 52(%esp),%ebx
xorl 48(%esp),%eax
xorl 56(%esp),%ebx
call _mul_1x1_ialu
movl 40(%esp),%ebp
movl (%esp),%ebx
movl 4(%esp),%ecx
movl 8(%esp),%edi
movl 12(%esp),%esi
xorl %edx,%eax
xorl %ecx,%edx
xorl %ebx,%eax
movl %ebx,(%ebp)
xorl %edi,%edx
movl %esi,12(%ebp)
xorl %esi,%eax
addl $20,%esp
xorl %esi,%edx
popl %edi
xorl %edx,%eax
popl %esi
movl %edx,8(%ebp)
popl %ebx
movl %eax,4(%ebp)
popl %ebp
ret
.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif
-995
View File
@@ -1,995 +0,0 @@
/* Do not modify. This file is auto-generated from x86-mont.pl. */
#ifdef PIC
.text
.globl bn_mul_mont
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
.L_bn_mul_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
movl 40(%esp),%edi
cmpl $4,%edi
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%ebp
negl %edi
movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%ebp
xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%ebp
andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
jmp .L002page_walk_done
.align 16
.L001page_walk:
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
.L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %edx,24(%esp)
call .L003PIC_me_up
.L003PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L004non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
movl 12(%esp),%edi
movl 16(%esp),%ebp
xorl %edx,%edx
xorl %ecx,%ecx
movd (%edi),%mm4
movd (%esi),%mm5
movd (%ebp),%mm3
pmuludq %mm4,%mm5
movq %mm5,%mm2
movq %mm5,%mm0
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
incl %ecx
.align 16
.L0051st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0051st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L006outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
movd 32(%esp),%mm6
movd (%ebp),%mm3
pmuludq %mm4,%mm5
paddq %mm6,%mm5
movq %mm5,%mm0
movq %mm5,%mm2
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 36(%esp),%mm6
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L007inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
movd 36(%esp,%ecx,4),%mm6
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L007inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
movd 36(%esp,%ebx,4),%mm6
paddq %mm2,%mm3
paddq %mm6,%mm3
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L006outer
emms
jmp .L008common_tail
.align 16
.L004non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
xorl %ecx,%ecx
movl %esi,%edx
andl $1,%ebp
subl %edi,%edx
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L009bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L010mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L010mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
addl %ebp,%eax
movl 16(%esp),%esi
adcl $0,%edx
imull 32(%esp),%edi
movl %eax,32(%esp,%ebx,4)
xorl %ecx,%ecx
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
movl (%esi),%eax
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0112ndmadd
.align 16
.L0121stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0121stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
addl %eax,%ebp
adcl $0,%edx
imull 32(%esp),%edi
xorl %ecx,%ecx
addl 36(%esp,%ebx,4),%edx
movl %ebp,32(%esp,%ebx,4)
adcl $0,%ecx
movl (%esi),%eax
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
movl $1,%ecx
.align 16
.L0112ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0112ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
xorl %eax,%eax
movl 12(%esp),%ecx
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
leal 4(%ecx),%ecx
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je .L008common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0121stmadd
.align 16
.L009bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
mull %edi
movl %eax,32(%esp)
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
incl %ecx
.align 16
.L013sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal 1(%ecx),%ecx
adcl $0,%edx
leal (%ebx,%eax,2),%ebp
shrl $31,%eax
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L013sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
leal (%ebx,%eax,2),%ebp
imull 32(%esp),%edi
shrl $31,%eax
movl %ebp,32(%esp,%ecx,4)
leal (%eax,%edx,2),%ebp
movl (%esi),%eax
shrl $31,%edx
movl %ebp,36(%esp,%ecx,4)
movl %edx,40(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
movl %ecx,%ebx
adcl $0,%edx
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0143rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
movl 4(%esi,%ecx,4),%eax
adcl $0,%edx
movl %ebp,28(%esp,%ecx,4)
movl %edx,%ebp
mull %edi
addl 36(%esp,%ecx,4),%ebp
leal 2(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0143rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
movl 12(%esp),%ecx
xorl %eax,%eax
movl 8(%esp),%esi
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je .L008common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
movl %ecx,12(%esp)
mull %edi
addl 32(%esp,%ecx,4),%eax
adcl $0,%edx
movl %eax,32(%esp,%ecx,4)
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L015sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L016sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal (%eax,%eax,1),%ebp
adcl $0,%edx
shrl $31,%eax
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%eax
addl %ebx,%ebp
adcl $0,%eax
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L016sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L015sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
addl 32(%esp,%ecx,4),%edx
movl (%esi),%eax
adcl $0,%ebp
movl %edx,32(%esp,%ecx,4)
movl %ebp,36(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
leal -1(%ecx),%ebx
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0143rdmadd
.align 16
.L008common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
movl (%esi),%eax
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L017sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L017sub
sbbl $0,%eax
movl $-1,%edx
xorl %eax,%edx
jmp .L018copy
.align 16
.L018copy:
movl 32(%esp,%ebx,4),%esi
movl (%edi,%ebx,4),%ebp
movl %ecx,32(%esp,%ebx,4)
andl %eax,%esi
andl %edx,%ebp
orl %esi,%ebp
movl %ebp,(%edi,%ebx,4)
decl %ebx
jge .L018copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_mont,.-.L_bn_mul_mont_begin
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl bn_mul_mont
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
.L_bn_mul_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
movl 40(%esp),%edi
cmpl $4,%edi
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%ebp
negl %edi
movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%ebp
xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%ebp
andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
jmp .L002page_walk_done
.align 16
.L001page_walk:
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
.L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %edx,24(%esp)
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc .L003non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
movl 12(%esp),%edi
movl 16(%esp),%ebp
xorl %edx,%edx
xorl %ecx,%ecx
movd (%edi),%mm4
movd (%esi),%mm5
movd (%ebp),%mm3
pmuludq %mm4,%mm5
movq %mm5,%mm2
movq %mm5,%mm0
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
incl %ecx
.align 16
.L0041st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0041st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L005outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
movd 32(%esp),%mm6
movd (%ebp),%mm3
pmuludq %mm4,%mm5
paddq %mm6,%mm5
movq %mm5,%mm0
movq %mm5,%mm2
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 36(%esp),%mm6
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L006inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
movd 36(%esp,%ecx,4),%mm6
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L006inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
movd 36(%esp,%ebx,4),%mm6
paddq %mm2,%mm3
paddq %mm6,%mm3
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L005outer
emms
jmp .L007common_tail
.align 16
.L003non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
xorl %ecx,%ecx
movl %esi,%edx
andl $1,%ebp
subl %edi,%edx
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L008bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L009mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L009mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
addl %ebp,%eax
movl 16(%esp),%esi
adcl $0,%edx
imull 32(%esp),%edi
movl %eax,32(%esp,%ebx,4)
xorl %ecx,%ecx
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
movl (%esi),%eax
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0102ndmadd
.align 16
.L0111stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0111stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
addl %eax,%ebp
adcl $0,%edx
imull 32(%esp),%edi
xorl %ecx,%ecx
addl 36(%esp,%ebx,4),%edx
movl %ebp,32(%esp,%ebx,4)
adcl $0,%ecx
movl (%esi),%eax
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
movl $1,%ecx
.align 16
.L0102ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0102ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
xorl %eax,%eax
movl 12(%esp),%ecx
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
leal 4(%ecx),%ecx
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je .L007common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0111stmadd
.align 16
.L008bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
mull %edi
movl %eax,32(%esp)
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
incl %ecx
.align 16
.L012sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal 1(%ecx),%ecx
adcl $0,%edx
leal (%ebx,%eax,2),%ebp
shrl $31,%eax
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L012sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
leal (%ebx,%eax,2),%ebp
imull 32(%esp),%edi
shrl $31,%eax
movl %ebp,32(%esp,%ecx,4)
leal (%eax,%edx,2),%ebp
movl (%esi),%eax
shrl $31,%edx
movl %ebp,36(%esp,%ecx,4)
movl %edx,40(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
movl %ecx,%ebx
adcl $0,%edx
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0133rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
movl 4(%esi,%ecx,4),%eax
adcl $0,%edx
movl %ebp,28(%esp,%ecx,4)
movl %edx,%ebp
mull %edi
addl 36(%esp,%ecx,4),%ebp
leal 2(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0133rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
movl 12(%esp),%ecx
xorl %eax,%eax
movl 8(%esp),%esi
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je .L007common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
movl %ecx,12(%esp)
mull %edi
addl 32(%esp,%ecx,4),%eax
adcl $0,%edx
movl %eax,32(%esp,%ecx,4)
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L014sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L015sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal (%eax,%eax,1),%ebp
adcl $0,%edx
shrl $31,%eax
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%eax
addl %ebx,%ebp
adcl $0,%eax
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L015sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L014sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
addl 32(%esp,%ecx,4),%edx
movl (%esi),%eax
adcl $0,%ebp
movl %edx,32(%esp,%ecx,4)
movl %ebp,36(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
leal -1(%ecx),%ebx
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0133rdmadd
.align 16
.L007common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
movl (%esi),%eax
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L016sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L016sub
sbbl $0,%eax
movl $-1,%edx
xorl %eax,%edx
jmp .L017copy
.align 16
.L017copy:
movl 32(%esp,%ebx,4),%esi
movl (%edi,%ebx,4),%ebp
movl %ecx,32(%esp,%ebx,4)
andl %eax,%esi
andl %edx,%ebp
orl %esi,%ebp
movl %ebp,(%edi,%ebx,4)
decl %ebx
jge .L017copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_mont,.-.L_bn_mul_mont_begin
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,569 +0,0 @@
/* Do not modify. This file is auto-generated from ghashp8-ppc.pl. */
.machine "any"
.text
.globl gcm_init_p8
.type gcm_init_p8,@function
.align 5
gcm_init_p8:
li 0,-4096
li 8,0x10
mfspr 12,256
li 9,0x20
mtspr 256,0
li 10,0x30
.long 0x7D202699
vspltisb 8,-16
vspltisb 5,1
vaddubm 8,8,8
vxor 4,4,4
vor 8,8,5
vsldoi 8,8,4,15
vsldoi 6,4,5,1
vaddubm 8,8,8
vspltisb 7,7
vor 8,8,6
vspltb 6,9,0
vsl 9,9,5
vsrab 6,6,7
vand 6,6,8
vxor 3,9,6
vsldoi 9,3,3,8
vsldoi 8,4,8,8
vsldoi 11,4,9,8
vsldoi 10,9,4,8
.long 0x7D001F99
.long 0x7D681F99
li 8,0x40
.long 0x7D291F99
li 9,0x50
.long 0x7D4A1F99
li 10,0x60
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 16,0,6
vsldoi 17,16,16,8
vsldoi 19,4,17,8
vsldoi 18,17,4,8
.long 0x7E681F99
li 8,0x70
.long 0x7E291F99
li 9,0x80
.long 0x7E4A1F99
li 10,0x90
.long 0x10039CC8
.long 0x11B09CC8
.long 0x10238CC8
.long 0x11D08CC8
.long 0x104394C8
.long 0x11F094C8
.long 0x10E044C8
.long 0x114D44C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vsldoi 11,14,4,8
vsldoi 9,4,14,8
vxor 0,0,5
vxor 2,2,6
vxor 13,13,11
vxor 15,15,9
vsldoi 0,0,0,8
vsldoi 13,13,13,8
vxor 0,0,7
vxor 13,13,10
vsldoi 6,0,0,8
vsldoi 9,13,13,8
.long 0x100044C8
.long 0x11AD44C8
vxor 6,6,2
vxor 9,9,15
vxor 0,0,6
vxor 13,13,9
vsldoi 9,0,0,8
vsldoi 17,13,13,8
vsldoi 11,4,9,8
vsldoi 10,9,4,8
vsldoi 19,4,17,8
vsldoi 18,17,4,8
.long 0x7D681F99
li 8,0xa0
.long 0x7D291F99
li 9,0xb0
.long 0x7D4A1F99
li 10,0xc0
.long 0x7E681F99
.long 0x7E291F99
.long 0x7E4A1F99
mtspr 256,12
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size gcm_init_p8,.-gcm_init_p8
.globl gcm_gmult_p8
.type gcm_gmult_p8,@function
.align 5
gcm_gmult_p8:
lis 0,0xfff8
li 8,0x10
mfspr 12,256
li 9,0x20
mtspr 256,0
li 10,0x30
.long 0x7C601E99
.long 0x7D682699
.long 0x7D292699
.long 0x7D4A2699
.long 0x7D002699
vxor 4,4,4
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 0,0,6
.long 0x7C001F99
mtspr 256,12
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size gcm_gmult_p8,.-gcm_gmult_p8
.globl gcm_ghash_p8
.type gcm_ghash_p8,@function
.align 5
gcm_ghash_p8:
li 0,-4096
li 8,0x10
mfspr 12,256
li 9,0x20
mtspr 256,0
li 10,0x30
.long 0x7C001E99
.long 0x7D682699
li 8,0x40
.long 0x7D292699
li 9,0x50
.long 0x7D4A2699
li 10,0x60
.long 0x7D002699
vxor 4,4,4
cmplwi 6,64
bge .Lgcm_ghash_p8_4x
.long 0x7C602E99
addi 5,5,16
subic. 6,6,16
vxor 3,3,0
beq .Lshort
.long 0x7E682699
li 8,16
.long 0x7E292699
add 9,5,6
.long 0x7E4A2699
b .Loop_2x
.align 5
.Loop_2x:
.long 0x7E002E99
subic 6,6,32
.long 0x10039CC8
.long 0x11B05CC8
subfe 0,0,0
.long 0x10238CC8
.long 0x11D04CC8
and 0,0,6
.long 0x104394C8
.long 0x11F054C8
add 5,5,0
vxor 0,0,13
vxor 1,1,14
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 2,2,15
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
.long 0x7C682E99
addi 5,5,32
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 3,3,6
vxor 3,3,0
.long 0x7c092840
bgt .Loop_2x
cmplwi 6,0
bne .Leven
.Lshort:
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
.Leven:
vxor 0,0,6
.long 0x7C001F99
mtspr 256,12
blr
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.align 5
.gcm_ghash_p8_4x:
.Lgcm_ghash_p8_4x:
stwu 1,-232(1)
li 10,39
li 11,55
stvx 20,10,1
addi 10,10,32
stvx 21,11,1
addi 11,11,32
stvx 22,10,1
addi 10,10,32
stvx 23,11,1
addi 11,11,32
stvx 24,10,1
addi 10,10,32
stvx 25,11,1
addi 11,11,32
stvx 26,10,1
addi 10,10,32
stvx 27,11,1
addi 11,11,32
stvx 28,10,1
addi 10,10,32
stvx 29,11,1
addi 11,11,32
stvx 30,10,1
li 10,0x60
stvx 31,11,1
li 0,-1
stw 12,228(1)
mtspr 256,0
lvsl 5,0,8
li 8,0x70
.long 0x7E292699
li 9,0x80
vspltisb 6,8
li 10,0x90
.long 0x7EE82699
li 8,0xa0
.long 0x7F092699
li 9,0xb0
.long 0x7F2A2699
li 10,0xc0
.long 0x7FA82699
li 8,0x10
.long 0x7FC92699
li 9,0x20
.long 0x7FEA2699
li 10,0x30
vsldoi 7,4,6,8
vaddubm 18,5,7
vaddubm 19,6,18
srwi 6,6,4
.long 0x7C602E99
.long 0x7E082E99
subic. 6,6,8
.long 0x7EC92E99
.long 0x7F8A2E99
addi 5,5,0x40
vxor 2,3,0
.long 0x11B0BCC8
.long 0x11D0C4C8
.long 0x11F0CCC8
vperm 11,17,9,18
vperm 5,22,28,19
vperm 10,17,9,19
vperm 6,22,28,18
.long 0x12B68CC8
.long 0x12855CC8
.long 0x137C4CC8
.long 0x134654C8
vxor 21,21,14
vxor 20,20,13
vxor 27,27,21
vxor 26,26,15
blt .Ltail_4x
.Loop_4x:
.long 0x7C602E99
.long 0x7E082E99
subic. 6,6,4
.long 0x7EC92E99
.long 0x7F8A2E99
addi 5,5,0x40
.long 0x1002ECC8
.long 0x1022F4C8
.long 0x1042FCC8
.long 0x11B0BCC8
.long 0x11D0C4C8
.long 0x11F0CCC8
vxor 0,0,20
vxor 1,1,27
vxor 2,2,26
vperm 5,22,28,19
vperm 6,22,28,18
.long 0x10E044C8
.long 0x12855CC8
.long 0x134654C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x12B68CC8
.long 0x137C4CC8
.long 0x100044C8
vxor 20,20,13
vxor 26,26,15
vxor 2,2,3
vxor 21,21,14
vxor 2,2,6
vxor 27,27,21
vxor 2,2,0
bge .Loop_4x
.Ltail_4x:
.long 0x1002ECC8
.long 0x1022F4C8
.long 0x1042FCC8
vxor 0,0,20
vxor 1,1,27
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 2,2,26
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 0,0,6
addic. 6,6,4
beq .Ldone_4x
.long 0x7C602E99
cmplwi 6,2
li 6,-4
blt .Lone
.long 0x7E082E99
beq .Ltwo
.Lthree:
.long 0x7EC92E99
vxor 2,3,0
vor 29,23,23
vor 30,24,24
vor 31,25,25
vperm 5,16,22,19
vperm 6,16,22,18
.long 0x12B08CC8
.long 0x13764CC8
.long 0x12855CC8
.long 0x134654C8
vxor 27,27,21
b .Ltail_4x
.align 4
.Ltwo:
vxor 2,3,0
vperm 5,4,16,19
vperm 6,4,16,18
vsldoi 29,4,17,8
vor 30,17,17
vsldoi 31,17,4,8
.long 0x12855CC8
.long 0x13704CC8
.long 0x134654C8
b .Ltail_4x
.align 4
.Lone:
vsldoi 29,4,9,8
vor 30,9,9
vsldoi 31,9,4,8
vxor 2,3,0
vxor 20,20,20
vxor 27,27,27
vxor 26,26,26
b .Ltail_4x
.Ldone_4x:
.long 0x7C001F99
li 10,39
li 11,55
mtspr 256,12
lvx 20,10,1
addi 10,10,32
lvx 21,11,1
addi 11,11,32
lvx 22,10,1
addi 10,10,32
lvx 23,11,1
addi 11,11,32
lvx 24,10,1
addi 10,10,32
lvx 25,11,1
addi 11,11,32
lvx 26,10,1
addi 10,10,32
lvx 27,11,1
addi 11,11,32
lvx 28,10,1
addi 10,10,32
lvx 29,11,1
addi 11,11,32
lvx 30,10,1
lvx 31,11,1
addi 1,1,232
blr
.long 0
.byte 0,12,0x04,0,0x80,0,4,0
.long 0
.size gcm_ghash_p8,.-gcm_ghash_p8
.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
File diff suppressed because it is too large Load Diff
@@ -1,586 +0,0 @@
/* Do not modify. This file is auto-generated from poly1305-ppcfp.pl. */
.machine "any"
.text
.globl poly1305_init_fpu
.type poly1305_init_fpu,@function
.align 6
poly1305_init_fpu:
stwu 1,-24(1)
mflr 6
stw 6,28(1)
bl .LPICmeup
xor 0,0,0
mtlr 6
lfd 8,8*0(5)
lfd 9,8*1(5)
lfd 10,8*2(5)
lfd 11,8*3(5)
lfd 12,8*4(5)
lfd 13,8*5(5)
stfd 8,8*0(3)
stfd 9,8*1(3)
stfd 10,8*2(3)
stfd 11,8*3(3)
.long 0x7c040040
beq- .Lno_key
lfd 6,8*13(5)
mffs 7
stfd 8,8*4(3)
stfd 9,8*5(3)
stfd 10,8*6(3)
stfd 11,8*7(3)
li 8,4
li 9,8
li 10,12
lwbrx 7,0,4
lwbrx 8,8,4
lwbrx 9,9,4
lwbrx 10,10,4
lis 11,0xf000
ori 12,11,3
andc 7,7,11
andc 8,8,12
andc 9,9,12
andc 10,10,12
stw 7,36(3)
stw 8,44(3)
stw 9,52(3)
stw 10,60(3)
mtfsf 255,6
stfd 8,8*18(3)
stfd 9,8*19(3)
stfd 10,8*20(3)
stfd 11,8*21(3)
stfd 12,8*22(3)
stfd 13,8*23(3)
lfd 0,8*4(3)
lfd 2,8*5(3)
lfd 4,8*6(3)
lfd 6,8*7(3)
fsub 0,0,8
fsub 2,2,9
fsub 4,4,10
fsub 6,6,11
lfd 8,8*6(5)
lfd 9,8*7(5)
lfd 10,8*8(5)
lfd 11,8*9(5)
fmul 3,2,13
fmul 5,4,13
stfd 7,8*15(3)
fmul 7,6,13
fadd 1,0,8
stfd 3,8*12(3)
fadd 3,2,9
stfd 5,8*13(3)
fadd 5,4,10
stfd 7,8*14(3)
fadd 7,6,11
fsub 1,1,8
fsub 3,3,9
fsub 5,5,10
fsub 7,7,11
lfd 8,8*10(5)
lfd 9,8*11(5)
lfd 10,8*12(5)
fsub 0,0,1
fsub 2,2,3
fsub 4,4,5
fsub 6,6,7
stfd 1,8*5(3)
stfd 3,8*7(3)
stfd 5,8*9(3)
stfd 7,8*11(3)
stfd 0,8*4(3)
stfd 2,8*6(3)
stfd 4,8*8(3)
stfd 6,8*10(3)
lfd 2,8*12(3)
lfd 4,8*13(3)
lfd 6,8*14(3)
lfd 0,8*15(3)
fadd 3,2,8
fadd 5,4,9
fadd 7,6,10
fsub 3,3,8
fsub 5,5,9
fsub 7,7,10
fsub 2,2,3
fsub 4,4,5
fsub 6,6,7
stfd 3,8*13(3)
stfd 5,8*15(3)
stfd 7,8*17(3)
stfd 2,8*12(3)
stfd 4,8*14(3)
stfd 6,8*16(3)
mtfsf 255,0
.Lno_key:
xor 3,3,3
addi 1,1,24
blr
.long 0
.byte 0,12,4,1,0x80,0,2,0
.size poly1305_init_fpu,.-poly1305_init_fpu
.globl poly1305_blocks_fpu
.type poly1305_blocks_fpu,@function
.align 4
poly1305_blocks_fpu:
srwi. 5,5,4
beq- .Labort
stwu 1,-216(1)
mflr 0
stfd 14,72(1)
stfd 15,80(1)
stfd 16,88(1)
stfd 17,96(1)
stfd 18,104(1)
stfd 19,112(1)
stfd 20,120(1)
stfd 21,128(1)
stfd 22,136(1)
stfd 23,144(1)
stfd 24,152(1)
stfd 25,160(1)
stfd 26,168(1)
stfd 27,176(1)
stfd 28,184(1)
stfd 29,192(1)
stfd 30,200(1)
stfd 31,208(1)
stw 0,220(1)
xor 0,0,0
li 10,1
mtctr 5
neg 5,5
stw 0,56(1)
stw 10,60(1)
lfd 8,8*18(3)
lfd 9,8*19(3)
lfd 10,8*20(3)
lfd 11,8*21(3)
lfd 12,8*22(3)
lfd 13,8*23(3)
lfd 0,8*0(3)
lfd 2,8*1(3)
lfd 4,8*2(3)
lfd 6,8*3(3)
stfd 8,24(1)
oris 10,6,18736
stfd 9,32(1)
stfd 10,40(1)
stw 10,48(1)
li 11,4
li 12,8
li 6,12
lwbrx 7,0,4
lwbrx 8,11,4
lwbrx 9,12,4
lwbrx 10,6,4
addi 4,4,16
stw 7,28(1)
stw 8,36(1)
stw 9,44(1)
stw 10,52(1)
mffs 28
lfd 29,56(1)
lfd 14,8*4(3)
lfd 15,8*5(3)
lfd 16,8*6(3)
lfd 17,8*7(3)
lfd 18,8*8(3)
lfd 19,8*9(3)
lfd 24,8*10(3)
lfd 25,8*11(3)
lfd 26,8*12(3)
lfd 27,8*13(3)
lfd 20,8*14(3)
lfd 21,8*15(3)
lfd 22,8*16(3)
lfd 23,8*17(3)
stfd 28,56(1)
mtfsf 255,29
addic 5,5,1
addze 0,0
slwi. 0,0,4
sub 4,4,0
lfd 28,24(1)
lfd 29,32(1)
lfd 30,40(1)
lfd 31,48(1)
fsub 0,0,8
lwbrx 7,0,4
fsub 2,2,9
lwbrx 8,11,4
fsub 4,4,10
lwbrx 9,12,4
fsub 6,6,11
lwbrx 10,6,4
fsub 28,28,8
addi 4,4,16
fsub 29,29,9
fsub 30,30,10
fsub 31,31,11
fadd 28,28,0
stw 7,28(1)
fadd 29,29,2
stw 8,36(1)
fadd 30,30,4
stw 9,44(1)
fadd 31,31,6
stw 10,52(1)
b .Lentry
.align 4
.Loop:
fsub 30,30,8
addic 5,5,1
fsub 31,31,9
addze 0,0
fsub 26,26,10
slwi. 0,0,4
fsub 27,27,11
sub 4,4,0
fadd 0,0,30
fadd 1,1,31
fadd 4,4,26
fadd 5,5,27
fadd 26,2,10
lwbrx 7,0,4
fadd 27,3,10
lwbrx 8,11,4
fadd 30,6,12
lwbrx 9,12,4
fadd 31,7,12
lwbrx 10,6,4
fadd 24,0,9
addi 4,4,16
fadd 25,1,9
fadd 28,4,11
fadd 29,5,11
fsub 26,26,10
stw 7,28(1)
fsub 27,27,10
stw 8,36(1)
fsub 30,30,12
stw 9,44(1)
fsub 31,31,12
stw 10,52(1)
fsub 24,24,9
fsub 25,25,9
fsub 28,28,11
fsub 29,29,11
fsub 2,2,26
fsub 3,3,27
fsub 6,6,30
fsub 7,7,31
fsub 4,4,28
fsub 5,5,29
fsub 0,0,24
fsub 1,1,25
fadd 2,2,24
fadd 3,3,25
fadd 6,6,28
fadd 7,7,29
fadd 4,4,26
fadd 5,5,27
fmadd 0,30,13,0
fmadd 1,31,13,1
fadd 29,2,3
lfd 26,8*12(3)
fadd 31,6,7
lfd 27,8*13(3)
fadd 30,4,5
lfd 24,8*10(3)
fadd 28,0,1
lfd 25,8*11(3)
.Lentry:
fmul 0,22,29
fmul 1,23,29
fmul 4,16,29
fmul 5,17,29
fmul 2,14,29
fmul 3,15,29
fmul 6,18,29
fmul 7,19,29
fmadd 0,26,31,0
fmadd 1,27,31,1
fmadd 4,22,31,4
fmadd 5,23,31,5
fmadd 2,20,31,2
fmadd 3,21,31,3
fmadd 6,14,31,6
fmadd 7,15,31,7
fmadd 0,20,30,0
fmadd 1,21,30,1
fmadd 4,14,30,4
fmadd 5,15,30,5
fmadd 2,22,30,2
fmadd 3,23,30,3
fmadd 6,16,30,6
fmadd 7,17,30,7
fmadd 0,14,28,0
lfd 30,24(1)
fmadd 1,15,28,1
lfd 31,32(1)
fmadd 4,18,28,4
lfd 26,40(1)
fmadd 5,19,28,5
lfd 27,48(1)
fmadd 2,16,28,2
fmadd 3,17,28,3
fmadd 6,24,28,6
fmadd 7,25,28,7
bdnz .Loop
fadd 24,0,9
fadd 25,1,9
fadd 28,4,11
fadd 29,5,11
fadd 26,2,10
fadd 27,3,10
fadd 30,6,12
fadd 31,7,12
fsub 24,24,9
fsub 25,25,9
fsub 28,28,11
fsub 29,29,11
fsub 26,26,10
fsub 27,27,10
fsub 30,30,12
fsub 31,31,12
fsub 2,2,26
fsub 3,3,27
fsub 6,6,30
fsub 7,7,31
fsub 4,4,28
fsub 5,5,29
fsub 0,0,24
fsub 1,1,25
fadd 2,2,24
fadd 3,3,25
fadd 6,6,28
fadd 7,7,29
fadd 4,4,26
fadd 5,5,27
fmadd 0,30,13,0
fmadd 1,31,13,1
fadd 29,2,3
fadd 31,6,7
fadd 30,4,5
fadd 28,0,1
lfd 0,56(1)
fadd 29,29,9
fadd 31,31,11
fadd 30,30,10
fadd 28,28,8
stfd 29,8*1(3)
stfd 31,8*3(3)
stfd 30,8*2(3)
stfd 28,8*0(3)
mtfsf 255,0
lfd 14,72(1)
lfd 15,80(1)
lfd 16,88(1)
lfd 17,96(1)
lfd 18,104(1)
lfd 19,112(1)
lfd 20,120(1)
lfd 21,128(1)
lfd 22,136(1)
lfd 23,144(1)
lfd 24,152(1)
lfd 25,160(1)
lfd 26,168(1)
lfd 27,176(1)
lfd 28,184(1)
lfd 29,192(1)
lfd 30,200(1)
lfd 31,208(1)
addi 1,1,216
.Labort:
blr
.long 0
.byte 0,12,4,1,0x80,0,4,0
.size poly1305_blocks_fpu,.-poly1305_blocks_fpu
.globl poly1305_emit_fpu
.type poly1305_emit_fpu,@function
.align 4
poly1305_emit_fpu:
stwu 1,-40(1)
mflr 0
stw 28,24(1)
stw 29,28(1)
stw 30,32(1)
stw 31,36(1)
stw 0,44(1)
lwz 28,0(3)
lwz 7,4(3)
lwz 29,8(3)
lwz 8,12(3)
lwz 30,16(3)
lwz 9,20(3)
lwz 31,24(3)
lwz 10,28(3)
lis 0,0xfff0
andc 28,28,0
andc 29,29,0
andc 30,30,0
andc 31,31,0
li 0,3
srwi 6,31,2
and 11,31,0
andc 31,31,0
add 31,31,6
addc 7,7,31
adde 8,8,28
adde 9,9,29
adde 10,10,30
addze 11,11
addic 28,7,5
addze 29,8
addze 30,9
addze 31,10
addze 0,11
srwi 0,0,2
neg 0,0
srawi 0,0,31
andc 7,7,0
and 28,28,0
andc 8,8,0
and 29,29,0
or 7,7,28
lwz 28,0(5)
andc 9,9,0
and 30,30,0
or 8,8,29
lwz 29,4(5)
andc 10,10,0
and 31,31,0
or 9,9,30
lwz 30,8(5)
or 10,10,31
lwz 31,12(5)
addc 7,7,28
adde 8,8,29
adde 9,9,30
adde 10,10,31
li 29,4
stwbrx 7,0,4
li 30,8
stwbrx 8,29,4
li 31,12
stwbrx 9,30,4
stwbrx 10,31,4
lwz 28,24(1)
lwz 29,28(1)
lwz 30,32(1)
lwz 31,36(1)
addi 1,1,40
blr
.long 0
.byte 0,12,4,1,0x80,4,3,0
.size poly1305_emit_fpu,.-poly1305_emit_fpu
.align 6
.LPICmeup:
mflr 0
bcl 20,31,$+4
mflr 5
addi 5,5,56
mtlr 0
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.space 28
.long 0x43300000,0x00000000
.long 0x45300000,0x00000000
.long 0x47300000,0x00000000
.long 0x49300000,0x00000000
.long 0x4b500000,0x00000000
.long 0x37f40000,0x00000000
.long 0x44300000,0x00000000
.long 0x46300000,0x00000000
.long 0x48300000,0x00000000
.long 0x4a300000,0x00000000
.long 0x3e300000,0x00000000
.long 0x40300000,0x00000000
.long 0x42300000,0x00000000
.long 0x00000000,0x00000001
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,80,80,67,32,70,80,85,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 4
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,356 +0,0 @@
/* Do not modify. This file is auto-generated from ppccpuid.pl. */
.machine "any"
.text
.globl OPENSSL_fpu_probe
.type OPENSSL_fpu_probe,@function
.align 4
OPENSSL_fpu_probe:
fmr 0,0
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.size OPENSSL_fpu_probe,.-OPENSSL_fpu_probe
.globl OPENSSL_ppc64_probe
.type OPENSSL_ppc64_probe,@function
.align 4
OPENSSL_ppc64_probe:
fcfid 1,1
rldicl 0,0,32,32
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.size OPENSSL_ppc64_probe,.-OPENSSL_ppc64_probe
.globl OPENSSL_altivec_probe
.type OPENSSL_altivec_probe,@function
.align 4
OPENSSL_altivec_probe:
.long 0x10000484
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.size OPENSSL_altivec_probe,.-OPENSSL_altivec_probe
.globl OPENSSL_crypto207_probe
.type OPENSSL_crypto207_probe,@function
.align 4
OPENSSL_crypto207_probe:
.long 0x7C000E99
.long 0x10000508
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.size OPENSSL_crypto207_probe,.-OPENSSL_crypto207_probe
.globl OPENSSL_madd300_probe
.type OPENSSL_madd300_probe,@function
.align 4
OPENSSL_madd300_probe:
xor 0,0,0
.long 0x10600033
.long 0x10600031
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,@function
.align 4
OPENSSL_wipe_cpu:
xor 0,0,0
fmr 0,31
fmr 1,31
fmr 2,31
mr 3,1
fmr 3,31
xor 4,4,4
fmr 4,31
xor 5,5,5
fmr 5,31
xor 6,6,6
fmr 6,31
xor 7,7,7
fmr 7,31
xor 8,8,8
fmr 8,31
xor 9,9,9
fmr 9,31
xor 10,10,10
fmr 10,31
xor 11,11,11
fmr 11,31
xor 12,12,12
fmr 12,31
fmr 13,31
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,@function
.align 4
OPENSSL_atomic_add:
.Ladd: lwarx 5,0,3
add 0,4,5
stwcx. 0,0,3
bne- .Ladd
mr 3,0
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.globl OPENSSL_rdtsc_mftb
.type OPENSSL_rdtsc_mftb,@function
.align 4
OPENSSL_rdtsc_mftb:
mftb 3
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.size OPENSSL_rdtsc_mftb,.-OPENSSL_rdtsc_mftb
.globl OPENSSL_rdtsc_mfspr268
.type OPENSSL_rdtsc_mfspr268,@function
.align 4
OPENSSL_rdtsc_mfspr268:
mfspr 3,268
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.size OPENSSL_rdtsc_mfspr268,.-OPENSSL_rdtsc_mfspr268
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,@function
.align 4
OPENSSL_cleanse:
cmplwi 4,7
li 0,0
bge .Lot
cmplwi 4,0
.long 0x4DC20020
.Little: mtctr 4
stb 0,0(3)
addi 3,3,1
bdnz $-8
blr
.Lot: andi. 5,3,3
beq .Laligned
stb 0,0(3)
subi 4,4,1
addi 3,3,1
b .Lot
.Laligned:
srwi 5,4,2
mtctr 5
stw 0,0(3)
addi 3,3,4
bdnz $-8
andi. 4,4,3
bne .Little
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,@function
.align 4
CRYPTO_memcmp:
cmplwi 5,0
li 0,0
beq .Lno_data
mtctr 5
.Loop_cmp:
lbz 6,0(3)
addi 3,3,1
lbz 7,0(4)
addi 4,4,1
xor 6,6,7
or 0,0,6
bdnz .Loop_cmp
.Lno_data:
li 3,0
sub 3,3,0
extrwi 3,3,1,0
blr
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
.size CRYPTO_memcmp,.-CRYPTO_memcmp
.globl OPENSSL_instrument_bus_mftb
.type OPENSSL_instrument_bus_mftb,@function
.align 4
OPENSSL_instrument_bus_mftb:
mtctr 4
mftb 7
li 8,0
dcbf 0,3
lwarx 6,0,3
add 6,6,8
stwcx. 6,0,3
stwx 6,0,3
.Loop: mftb 6
sub 8,6,7
mr 7,6
dcbf 0,3
lwarx 6,0,3
add 6,6,8
stwcx. 6,0,3
stwx 6,0,3
addi 3,3,4
bdnz .Loop
mr 3,4
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size OPENSSL_instrument_bus_mftb,.-OPENSSL_instrument_bus_mftb
.globl OPENSSL_instrument_bus2_mftb
.type OPENSSL_instrument_bus2_mftb,@function
.align 4
OPENSSL_instrument_bus2_mftb:
mr 0,4
slwi 4,4,2
mftb 7
li 8,0
dcbf 0,3
lwarx 6,0,3
add 6,6,8
stwcx. 6,0,3
stwx 6,0,3
mftb 6
sub 8,6,7
mr 7,6
mr 9,8
.Loop2:
dcbf 0,3
lwarx 6,0,3
add 6,6,8
stwcx. 6,0,3
stwx 6,0,3
addic. 5,5,-1
beq .Ldone2
mftb 6
sub 8,6,7
mr 7,6
.long 0x7f884840
mr 9,8
mfcr 6
not 6,6
rlwinm 6,6,1,29,29
sub. 4,4,6
add 3,3,6
bne .Loop2
.Ldone2:
srwi 4,4,2
sub 3,0,4
blr
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
.size OPENSSL_instrument_bus2_mftb,.-OPENSSL_instrument_bus2_mftb
.globl OPENSSL_instrument_bus_mfspr268
.type OPENSSL_instrument_bus_mfspr268,@function
.align 4
OPENSSL_instrument_bus_mfspr268:
mtctr 4
mfspr 7,268
li 8,0
dcbf 0,3
lwarx 6,0,3
add 6,6,8
stwcx. 6,0,3
stwx 6,0,3
.Loop3: mfspr 6,268
sub 8,6,7
mr 7,6
dcbf 0,3
lwarx 6,0,3
add 6,6,8
stwcx. 6,0,3
stwx 6,0,3
addi 3,3,4
bdnz .Loop3
mr 3,4
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size OPENSSL_instrument_bus_mfspr268,.-OPENSSL_instrument_bus_mfspr268
.globl OPENSSL_instrument_bus2_mfspr268
.type OPENSSL_instrument_bus2_mfspr268,@function
.align 4
OPENSSL_instrument_bus2_mfspr268:
mr 0,4
slwi 4,4,2
mfspr 7,268
li 8,0
dcbf 0,3
lwarx 6,0,3
add 6,6,8
stwcx. 6,0,3
stwx 6,0,3
mfspr 6,268
sub 8,6,7
mr 7,6
mr 9,8
.Loop4:
dcbf 0,3
lwarx 6,0,3
add 6,6,8
stwcx. 6,0,3
stwx 6,0,3
addic. 5,5,-1
beq .Ldone4
mfspr 6,268
sub 8,6,7
mr 7,6
.long 0x7f884840
mr 9,8
mfcr 6
not 6,6
rlwinm 6,6,1,29,29
sub. 4,4,6
add 3,3,6
bne .Loop4
.Ldone4:
srwi 4,4,2
sub 3,0,4
blr
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
.size OPENSSL_instrument_bus2_mfspr268,.-OPENSSL_instrument_bus2_mfspr268
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,735 +0,0 @@
/* Do not modify. This file is auto-generated from sha512p8-ppc.pl. */
.machine "any"
.text
.globl sha256_block_p8
.type sha256_block_p8,@function
.align 6
sha256_block_p8:
stwu 1,-328(1)
mflr 8
li 10,175
li 11,191
stvx 24,10,1
addi 10,10,32
mfspr 12,256
stvx 25,11,1
addi 11,11,32
stvx 26,10,1
addi 10,10,32
stvx 27,11,1
addi 11,11,32
stvx 28,10,1
addi 10,10,32
stvx 29,11,1
addi 11,11,32
stvx 30,10,1
stvx 31,11,1
li 11,-4096+255
stw 12,300(1)
li 10,0x10
stw 26,304(1)
li 26,0x20
stw 27,308(1)
li 27,0x30
stw 28,312(1)
li 28,0x40
stw 29,316(1)
li 29,0x50
stw 30,320(1)
li 30,0x60
stw 31,324(1)
li 31,0x70
stw 8,332(1)
mtspr 256,11
bl .LPICmeup
addi 11,1,47
.long 0x7C001E19
.long 0x7C8A1E19
vsldoi 1,0,0,4
vsldoi 2,0,0,8
vsldoi 3,0,0,12
vsldoi 5,4,4,4
vsldoi 6,4,4,8
vsldoi 7,4,4,12
li 0,3
b .Loop
.align 5
.Loop:
lvx 28,0,6
.long 0x7D002699
addi 4,4,16
mr 7,6
stvx 0,0,11
stvx 1,10,11
stvx 2,26,11
stvx 3,27,11
stvx 4,28,11
stvx 5,29,11
stvx 6,30,11
stvx 7,31,11
vadduwm 7,7,28
lvx 28,10,6
vadduwm 7,7,8
vsel 29,6,5,4
vadduwm 6,6,28
vadduwm 7,7,29
.long 0x13C4FE82
vadduwm 7,7,30
vxor 29,0,1
vsel 29,1,2,29
vadduwm 3,3,7
.long 0x13C08682
vadduwm 30,30,29
vadduwm 7,7,30
lvx 28,26,7
vsldoi 9,8,8,4
vadduwm 6,6,9
vsel 29,5,4,3
vadduwm 5,5,28
vadduwm 6,6,29
.long 0x13C3FE82
vadduwm 6,6,30
vxor 29,7,0
vsel 29,0,1,29
vadduwm 2,2,6
.long 0x13C78682
vadduwm 30,30,29
vadduwm 6,6,30
lvx 28,27,7
vsldoi 10,9,9,4
vadduwm 5,5,10
vsel 29,4,3,2
vadduwm 4,4,28
vadduwm 5,5,29
.long 0x13C2FE82
vadduwm 5,5,30
vxor 29,6,7
vsel 29,7,0,29
vadduwm 1,1,5
.long 0x13C68682
vadduwm 30,30,29
vadduwm 5,5,30
lvx 28,28,7
.long 0x7D802699
addi 4,4,16
vsldoi 11,10,10,4
vadduwm 4,4,11
vsel 29,3,2,1
vadduwm 3,3,28
vadduwm 4,4,29
.long 0x13C1FE82
vadduwm 4,4,30
vxor 29,5,6
vsel 29,6,7,29
vadduwm 0,0,4
.long 0x13C58682
vadduwm 30,30,29
vadduwm 4,4,30
lvx 28,29,7
vadduwm 3,3,12
vsel 29,2,1,0
vadduwm 2,2,28
vadduwm 3,3,29
.long 0x13C0FE82
vadduwm 3,3,30
vxor 29,4,5
vsel 29,5,6,29
vadduwm 7,7,3
.long 0x13C48682
vadduwm 30,30,29
vadduwm 3,3,30
lvx 28,30,7
vsldoi 13,12,12,4
vadduwm 2,2,13
vsel 29,1,0,7
vadduwm 1,1,28
vadduwm 2,2,29
.long 0x13C7FE82
vadduwm 2,2,30
vxor 29,3,4
vsel 29,4,5,29
vadduwm 6,6,2
.long 0x13C38682
vadduwm 30,30,29
vadduwm 2,2,30
lvx 28,31,7
addi 7,7,0x80
vsldoi 14,13,13,4
vadduwm 1,1,14
vsel 29,0,7,6
vadduwm 0,0,28
vadduwm 1,1,29
.long 0x13C6FE82
vadduwm 1,1,30
vxor 29,2,3
vsel 29,3,4,29
vadduwm 5,5,1
.long 0x13C28682
vadduwm 30,30,29
vadduwm 1,1,30
lvx 28,0,7
.long 0x7E002699
addi 4,4,16
vsldoi 15,14,14,4
vadduwm 0,0,15
vsel 29,7,6,5
vadduwm 7,7,28
vadduwm 0,0,29
.long 0x13C5FE82
vadduwm 0,0,30
vxor 29,1,2
vsel 29,2,3,29
vadduwm 4,4,0
.long 0x13C18682
vadduwm 30,30,29
vadduwm 0,0,30
lvx 28,10,7
vadduwm 7,7,16
vsel 29,6,5,4
vadduwm 6,6,28
vadduwm 7,7,29
.long 0x13C4FE82
vadduwm 7,7,30
vxor 29,0,1
vsel 29,1,2,29
vadduwm 3,3,7
.long 0x13C08682
vadduwm 30,30,29
vadduwm 7,7,30
lvx 28,26,7
vsldoi 17,16,16,4
vadduwm 6,6,17
vsel 29,5,4,3
vadduwm 5,5,28
vadduwm 6,6,29
.long 0x13C3FE82
vadduwm 6,6,30
vxor 29,7,0
vsel 29,0,1,29
vadduwm 2,2,6
.long 0x13C78682
vadduwm 30,30,29
vadduwm 6,6,30
lvx 28,27,7
vsldoi 18,17,17,4
vadduwm 5,5,18
vsel 29,4,3,2
vadduwm 4,4,28
vadduwm 5,5,29
.long 0x13C2FE82
vadduwm 5,5,30
vxor 29,6,7
vsel 29,7,0,29
vadduwm 1,1,5
.long 0x13C68682
vadduwm 30,30,29
vadduwm 5,5,30
lvx 28,28,7
.long 0x7F002699
addi 4,4,16
vsldoi 19,18,18,4
vadduwm 4,4,19
vsel 29,3,2,1
vadduwm 3,3,28
vadduwm 4,4,29
.long 0x13C1FE82
vadduwm 4,4,30
vxor 29,5,6
vsel 29,6,7,29
vadduwm 0,0,4
.long 0x13C58682
vadduwm 30,30,29
vadduwm 4,4,30
lvx 28,29,7
vadduwm 3,3,24
vsel 29,2,1,0
vadduwm 2,2,28
vadduwm 3,3,29
.long 0x13C0FE82
vadduwm 3,3,30
vxor 29,4,5
vsel 29,5,6,29
vadduwm 7,7,3
.long 0x13C48682
vadduwm 30,30,29
vadduwm 3,3,30
lvx 28,30,7
vsldoi 25,24,24,4
vadduwm 2,2,25
vsel 29,1,0,7
vadduwm 1,1,28
vadduwm 2,2,29
.long 0x13C7FE82
vadduwm 2,2,30
vxor 29,3,4
vsel 29,4,5,29
vadduwm 6,6,2
.long 0x13C38682
vadduwm 30,30,29
vadduwm 2,2,30
lvx 28,31,7
addi 7,7,0x80
vsldoi 26,25,25,4
vadduwm 1,1,26
vsel 29,0,7,6
vadduwm 0,0,28
vadduwm 1,1,29
.long 0x13C6FE82
vadduwm 1,1,30
vxor 29,2,3
vsel 29,3,4,29
vadduwm 5,5,1
.long 0x13C28682
vadduwm 30,30,29
vadduwm 1,1,30
lvx 28,0,7
vsldoi 27,26,26,4
.long 0x13C90682
vadduwm 8,8,30
.long 0x13DA7E82
vadduwm 8,8,30
vadduwm 8,8,17
vadduwm 0,0,27
vsel 29,7,6,5
vadduwm 7,7,28
vadduwm 0,0,29
.long 0x13C5FE82
vadduwm 0,0,30
vxor 29,1,2
vsel 29,2,3,29
vadduwm 4,4,0
.long 0x13C18682
vadduwm 30,30,29
vadduwm 0,0,30
lvx 28,10,7
mtctr 0
b .L16_xx
.align 5
.L16_xx:
.long 0x13CA0682
vadduwm 9,9,30
.long 0x13DB7E82
vadduwm 9,9,30
vadduwm 9,9,18
vadduwm 7,7,8
vsel 29,6,5,4
vadduwm 6,6,28
vadduwm 7,7,29
.long 0x13C4FE82
vadduwm 7,7,30
vxor 29,0,1
vsel 29,1,2,29
vadduwm 3,3,7
.long 0x13C08682
vadduwm 30,30,29
vadduwm 7,7,30
lvx 28,26,7
.long 0x13CB0682
vadduwm 10,10,30
.long 0x13C87E82
vadduwm 10,10,30
vadduwm 10,10,19
vadduwm 6,6,9
vsel 29,5,4,3
vadduwm 5,5,28
vadduwm 6,6,29
.long 0x13C3FE82
vadduwm 6,6,30
vxor 29,7,0
vsel 29,0,1,29
vadduwm 2,2,6
.long 0x13C78682
vadduwm 30,30,29
vadduwm 6,6,30
lvx 28,27,7
.long 0x13CC0682
vadduwm 11,11,30
.long 0x13C97E82
vadduwm 11,11,30
vadduwm 11,11,24
vadduwm 5,5,10
vsel 29,4,3,2
vadduwm 4,4,28
vadduwm 5,5,29
.long 0x13C2FE82
vadduwm 5,5,30
vxor 29,6,7
vsel 29,7,0,29
vadduwm 1,1,5
.long 0x13C68682
vadduwm 30,30,29
vadduwm 5,5,30
lvx 28,28,7
.long 0x13CD0682
vadduwm 12,12,30
.long 0x13CA7E82
vadduwm 12,12,30
vadduwm 12,12,25
vadduwm 4,4,11
vsel 29,3,2,1
vadduwm 3,3,28
vadduwm 4,4,29
.long 0x13C1FE82
vadduwm 4,4,30
vxor 29,5,6
vsel 29,6,7,29
vadduwm 0,0,4
.long 0x13C58682
vadduwm 30,30,29
vadduwm 4,4,30
lvx 28,29,7
.long 0x13CE0682
vadduwm 13,13,30
.long 0x13CB7E82
vadduwm 13,13,30
vadduwm 13,13,26
vadduwm 3,3,12
vsel 29,2,1,0
vadduwm 2,2,28
vadduwm 3,3,29
.long 0x13C0FE82
vadduwm 3,3,30
vxor 29,4,5
vsel 29,5,6,29
vadduwm 7,7,3
.long 0x13C48682
vadduwm 30,30,29
vadduwm 3,3,30
lvx 28,30,7
.long 0x13CF0682
vadduwm 14,14,30
.long 0x13CC7E82
vadduwm 14,14,30
vadduwm 14,14,27
vadduwm 2,2,13
vsel 29,1,0,7
vadduwm 1,1,28
vadduwm 2,2,29
.long 0x13C7FE82
vadduwm 2,2,30
vxor 29,3,4
vsel 29,4,5,29
vadduwm 6,6,2
.long 0x13C38682
vadduwm 30,30,29
vadduwm 2,2,30
lvx 28,31,7
addi 7,7,0x80
.long 0x13D00682
vadduwm 15,15,30
.long 0x13CD7E82
vadduwm 15,15,30
vadduwm 15,15,8
vadduwm 1,1,14
vsel 29,0,7,6
vadduwm 0,0,28
vadduwm 1,1,29
.long 0x13C6FE82
vadduwm 1,1,30
vxor 29,2,3
vsel 29,3,4,29
vadduwm 5,5,1
.long 0x13C28682
vadduwm 30,30,29
vadduwm 1,1,30
lvx 28,0,7
.long 0x13D10682
vadduwm 16,16,30
.long 0x13CE7E82
vadduwm 16,16,30
vadduwm 16,16,9
vadduwm 0,0,15
vsel 29,7,6,5
vadduwm 7,7,28
vadduwm 0,0,29
.long 0x13C5FE82
vadduwm 0,0,30
vxor 29,1,2
vsel 29,2,3,29
vadduwm 4,4,0
.long 0x13C18682
vadduwm 30,30,29
vadduwm 0,0,30
lvx 28,10,7
.long 0x13D20682
vadduwm 17,17,30
.long 0x13CF7E82
vadduwm 17,17,30
vadduwm 17,17,10
vadduwm 7,7,16
vsel 29,6,5,4
vadduwm 6,6,28
vadduwm 7,7,29
.long 0x13C4FE82
vadduwm 7,7,30
vxor 29,0,1
vsel 29,1,2,29
vadduwm 3,3,7
.long 0x13C08682
vadduwm 30,30,29
vadduwm 7,7,30
lvx 28,26,7
.long 0x13D30682
vadduwm 18,18,30
.long 0x13D07E82
vadduwm 18,18,30
vadduwm 18,18,11
vadduwm 6,6,17
vsel 29,5,4,3
vadduwm 5,5,28
vadduwm 6,6,29
.long 0x13C3FE82
vadduwm 6,6,30
vxor 29,7,0
vsel 29,0,1,29
vadduwm 2,2,6
.long 0x13C78682
vadduwm 30,30,29
vadduwm 6,6,30
lvx 28,27,7
.long 0x13D80682
vadduwm 19,19,30
.long 0x13D17E82
vadduwm 19,19,30
vadduwm 19,19,12
vadduwm 5,5,18
vsel 29,4,3,2
vadduwm 4,4,28
vadduwm 5,5,29
.long 0x13C2FE82
vadduwm 5,5,30
vxor 29,6,7
vsel 29,7,0,29
vadduwm 1,1,5
.long 0x13C68682
vadduwm 30,30,29
vadduwm 5,5,30
lvx 28,28,7
.long 0x13D90682
vadduwm 24,24,30
.long 0x13D27E82
vadduwm 24,24,30
vadduwm 24,24,13
vadduwm 4,4,19
vsel 29,3,2,1
vadduwm 3,3,28
vadduwm 4,4,29
.long 0x13C1FE82
vadduwm 4,4,30
vxor 29,5,6
vsel 29,6,7,29
vadduwm 0,0,4
.long 0x13C58682
vadduwm 30,30,29
vadduwm 4,4,30
lvx 28,29,7
.long 0x13DA0682
vadduwm 25,25,30
.long 0x13D37E82
vadduwm 25,25,30
vadduwm 25,25,14
vadduwm 3,3,24
vsel 29,2,1,0
vadduwm 2,2,28
vadduwm 3,3,29
.long 0x13C0FE82
vadduwm 3,3,30
vxor 29,4,5
vsel 29,5,6,29
vadduwm 7,7,3
.long 0x13C48682
vadduwm 30,30,29
vadduwm 3,3,30
lvx 28,30,7
.long 0x13DB0682
vadduwm 26,26,30
.long 0x13D87E82
vadduwm 26,26,30
vadduwm 26,26,15
vadduwm 2,2,25
vsel 29,1,0,7
vadduwm 1,1,28
vadduwm 2,2,29
.long 0x13C7FE82
vadduwm 2,2,30
vxor 29,3,4
vsel 29,4,5,29
vadduwm 6,6,2
.long 0x13C38682
vadduwm 30,30,29
vadduwm 2,2,30
lvx 28,31,7
addi 7,7,0x80
.long 0x13C80682
vadduwm 27,27,30
.long 0x13D97E82
vadduwm 27,27,30
vadduwm 27,27,16
vadduwm 1,1,26
vsel 29,0,7,6
vadduwm 0,0,28
vadduwm 1,1,29
.long 0x13C6FE82
vadduwm 1,1,30
vxor 29,2,3
vsel 29,3,4,29
vadduwm 5,5,1
.long 0x13C28682
vadduwm 30,30,29
vadduwm 1,1,30
lvx 28,0,7
.long 0x13C90682
vadduwm 8,8,30
.long 0x13DA7E82
vadduwm 8,8,30
vadduwm 8,8,17
vadduwm 0,0,27
vsel 29,7,6,5
vadduwm 7,7,28
vadduwm 0,0,29
.long 0x13C5FE82
vadduwm 0,0,30
vxor 29,1,2
vsel 29,2,3,29
vadduwm 4,4,0
.long 0x13C18682
vadduwm 30,30,29
vadduwm 0,0,30
lvx 28,10,7
bdnz .L16_xx
lvx 10,0,11
subic. 5,5,1
lvx 11,10,11
vadduwm 0,0,10
lvx 12,26,11
vadduwm 1,1,11
lvx 13,27,11
vadduwm 2,2,12
lvx 14,28,11
vadduwm 3,3,13
lvx 15,29,11
vadduwm 4,4,14
lvx 16,30,11
vadduwm 5,5,15
lvx 17,31,11
vadduwm 6,6,16
vadduwm 7,7,17
bne .Loop
lvx 8,26,7
vperm 0,0,1,28
lvx 9,27,7
vperm 4,4,5,28
vperm 0,0,2,8
vperm 4,4,6,8
vperm 0,0,3,9
vperm 4,4,7,9
.long 0x7C001F19
.long 0x7C8A1F19
addi 11,1,175
mtlr 8
mtspr 256,12
lvx 24,0,11
lvx 25,10,11
lvx 26,26,11
lvx 27,27,11
lvx 28,28,11
lvx 29,29,11
lvx 30,30,11
lvx 31,31,11
lwz 26,304(1)
lwz 27,308(1)
lwz 28,312(1)
lwz 29,316(1)
lwz 30,320(1)
lwz 31,324(1)
addi 1,1,328
blr
.long 0
.byte 0,12,4,1,0x80,6,3,0
.long 0
.size sha256_block_p8,.-sha256_block_p8
.align 6
.LPICmeup:
mflr 0
bcl 20,31,$+4
mflr 6
addi 6,6,56
mtlr 0
blr
.long 0
.byte 0,12,0x14,0,0,0,0,0
.space 28
.long 0x428a2f98,0x428a2f98,0x428a2f98,0x428a2f98
.long 0x71374491,0x71374491,0x71374491,0x71374491
.long 0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf
.long 0xe9b5dba5,0xe9b5dba5,0xe9b5dba5,0xe9b5dba5
.long 0x3956c25b,0x3956c25b,0x3956c25b,0x3956c25b
.long 0x59f111f1,0x59f111f1,0x59f111f1,0x59f111f1
.long 0x923f82a4,0x923f82a4,0x923f82a4,0x923f82a4
.long 0xab1c5ed5,0xab1c5ed5,0xab1c5ed5,0xab1c5ed5
.long 0xd807aa98,0xd807aa98,0xd807aa98,0xd807aa98
.long 0x12835b01,0x12835b01,0x12835b01,0x12835b01
.long 0x243185be,0x243185be,0x243185be,0x243185be
.long 0x550c7dc3,0x550c7dc3,0x550c7dc3,0x550c7dc3
.long 0x72be5d74,0x72be5d74,0x72be5d74,0x72be5d74
.long 0x80deb1fe,0x80deb1fe,0x80deb1fe,0x80deb1fe
.long 0x9bdc06a7,0x9bdc06a7,0x9bdc06a7,0x9bdc06a7
.long 0xc19bf174,0xc19bf174,0xc19bf174,0xc19bf174
.long 0xe49b69c1,0xe49b69c1,0xe49b69c1,0xe49b69c1
.long 0xefbe4786,0xefbe4786,0xefbe4786,0xefbe4786
.long 0x0fc19dc6,0x0fc19dc6,0x0fc19dc6,0x0fc19dc6
.long 0x240ca1cc,0x240ca1cc,0x240ca1cc,0x240ca1cc
.long 0x2de92c6f,0x2de92c6f,0x2de92c6f,0x2de92c6f
.long 0x4a7484aa,0x4a7484aa,0x4a7484aa,0x4a7484aa
.long 0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc
.long 0x76f988da,0x76f988da,0x76f988da,0x76f988da
.long 0x983e5152,0x983e5152,0x983e5152,0x983e5152
.long 0xa831c66d,0xa831c66d,0xa831c66d,0xa831c66d
.long 0xb00327c8,0xb00327c8,0xb00327c8,0xb00327c8
.long 0xbf597fc7,0xbf597fc7,0xbf597fc7,0xbf597fc7
.long 0xc6e00bf3,0xc6e00bf3,0xc6e00bf3,0xc6e00bf3
.long 0xd5a79147,0xd5a79147,0xd5a79147,0xd5a79147
.long 0x06ca6351,0x06ca6351,0x06ca6351,0x06ca6351
.long 0x14292967,0x14292967,0x14292967,0x14292967
.long 0x27b70a85,0x27b70a85,0x27b70a85,0x27b70a85
.long 0x2e1b2138,0x2e1b2138,0x2e1b2138,0x2e1b2138
.long 0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc
.long 0x53380d13,0x53380d13,0x53380d13,0x53380d13
.long 0x650a7354,0x650a7354,0x650a7354,0x650a7354
.long 0x766a0abb,0x766a0abb,0x766a0abb,0x766a0abb
.long 0x81c2c92e,0x81c2c92e,0x81c2c92e,0x81c2c92e
.long 0x92722c85,0x92722c85,0x92722c85,0x92722c85
.long 0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1
.long 0xa81a664b,0xa81a664b,0xa81a664b,0xa81a664b
.long 0xc24b8b70,0xc24b8b70,0xc24b8b70,0xc24b8b70
.long 0xc76c51a3,0xc76c51a3,0xc76c51a3,0xc76c51a3
.long 0xd192e819,0xd192e819,0xd192e819,0xd192e819
.long 0xd6990624,0xd6990624,0xd6990624,0xd6990624
.long 0xf40e3585,0xf40e3585,0xf40e3585,0xf40e3585
.long 0x106aa070,0x106aa070,0x106aa070,0x106aa070
.long 0x19a4c116,0x19a4c116,0x19a4c116,0x19a4c116
.long 0x1e376c08,0x1e376c08,0x1e376c08,0x1e376c08
.long 0x2748774c,0x2748774c,0x2748774c,0x2748774c
.long 0x34b0bcb5,0x34b0bcb5,0x34b0bcb5,0x34b0bcb5
.long 0x391c0cb3,0x391c0cb3,0x391c0cb3,0x391c0cb3
.long 0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a
.long 0x5b9cca4f,0x5b9cca4f,0x5b9cca4f,0x5b9cca4f
.long 0x682e6ff3,0x682e6ff3,0x682e6ff3,0x682e6ff3
.long 0x748f82ee,0x748f82ee,0x748f82ee,0x748f82ee
.long 0x78a5636f,0x78a5636f,0x78a5636f,0x78a5636f
.long 0x84c87814,0x84c87814,0x84c87814,0x84c87814
.long 0x8cc70208,0x8cc70208,0x8cc70208,0x8cc70208
.long 0x90befffa,0x90befffa,0x90befffa,0x90befffa
.long 0xa4506ceb,0xa4506ceb,0xa4506ceb,0xa4506ceb
.long 0xbef9a3f7,0xbef9a3f7,0xbef9a3f7,0xbef9a3f7
.long 0xc67178f2,0xc67178f2,0xc67178f2,0xc67178f2
.long 0,0,0,0
.long 0x00010203,0x10111213,0x10111213,0x10111213
.long 0x00010203,0x04050607,0x10111213,0x10111213
.long 0x00010203,0x04050607,0x08090a0b,0x10111213
.byte 83,72,65,50,53,54,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2

Some files were not shown because too many files have changed in this diff Show More