libcrypto: Switch back to the generated assembly in sys/crypto/openssl
Reviewed by: markj Differential Revision: https://reviews.freebsd.org/D41569
This commit is contained in:
@@ -618,12 +618,12 @@ buildasm cleanasm:
|
||||
PICFLAG+= -DOPENSSL_PIC
|
||||
|
||||
.if defined(ASM_${MACHINE_CPUARCH})
|
||||
.PATH: ${SRCTOP}/secure/lib/libcrypto/arch/${MACHINE_CPUARCH}
|
||||
.PATH: ${SRCTOP}/sys/crypto/openssl/${MACHINE_CPUARCH}
|
||||
.if defined(ASM_amd64)
|
||||
.PATH: ${LCRYPTO_SRC}/crypto/bn/asm
|
||||
.endif
|
||||
.elif defined(ASM_${MACHINE_ARCH})
|
||||
.PATH: ${SRCTOP}/secure/lib/libcrypto/arch/${MACHINE_ARCH}
|
||||
.PATH: ${SRCTOP}/sys/crypto/openssl/${MACHINE_ARCH}
|
||||
.endif
|
||||
|
||||
.PATH: ${LCRYPTO_SRC}/crypto \
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,130 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from arm64cpuid.pl. */
|
||||
#include "arm_arch.h"
|
||||
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
|
||||
.align 5
|
||||
.globl _armv7_neon_probe
|
||||
.type _armv7_neon_probe,%function
|
||||
_armv7_neon_probe:
|
||||
orr v15.16b, v15.16b, v15.16b
|
||||
ret
|
||||
.size _armv7_neon_probe,.-_armv7_neon_probe
|
||||
|
||||
.globl _armv7_tick
|
||||
.type _armv7_tick,%function
|
||||
_armv7_tick:
|
||||
#ifdef __APPLE__
|
||||
mrs x0, CNTPCT_EL0
|
||||
#else
|
||||
mrs x0, CNTVCT_EL0
|
||||
#endif
|
||||
ret
|
||||
.size _armv7_tick,.-_armv7_tick
|
||||
|
||||
.globl _armv8_aes_probe
|
||||
.type _armv8_aes_probe,%function
|
||||
_armv8_aes_probe:
|
||||
aese v0.16b, v0.16b
|
||||
ret
|
||||
.size _armv8_aes_probe,.-_armv8_aes_probe
|
||||
|
||||
.globl _armv8_sha1_probe
|
||||
.type _armv8_sha1_probe,%function
|
||||
_armv8_sha1_probe:
|
||||
sha1h s0, s0
|
||||
ret
|
||||
.size _armv8_sha1_probe,.-_armv8_sha1_probe
|
||||
|
||||
.globl _armv8_sha256_probe
|
||||
.type _armv8_sha256_probe,%function
|
||||
_armv8_sha256_probe:
|
||||
sha256su0 v0.4s, v0.4s
|
||||
ret
|
||||
.size _armv8_sha256_probe,.-_armv8_sha256_probe
|
||||
|
||||
.globl _armv8_pmull_probe
|
||||
.type _armv8_pmull_probe,%function
|
||||
_armv8_pmull_probe:
|
||||
pmull v0.1q, v0.1d, v0.1d
|
||||
ret
|
||||
.size _armv8_pmull_probe,.-_armv8_pmull_probe
|
||||
|
||||
.globl _armv8_sha512_probe
|
||||
.type _armv8_sha512_probe,%function
|
||||
_armv8_sha512_probe:
|
||||
.long 0xcec08000 // sha512su0 v0.2d,v0.2d
|
||||
ret
|
||||
.size _armv8_sha512_probe,.-_armv8_sha512_probe
|
||||
|
||||
.globl _armv8_cpuid_probe
|
||||
.type _armv8_cpuid_probe,%function
|
||||
_armv8_cpuid_probe:
|
||||
mrs x0, midr_el1
|
||||
ret
|
||||
.size _armv8_cpuid_probe,.-_armv8_cpuid_probe
|
||||
|
||||
.globl OPENSSL_cleanse
|
||||
.type OPENSSL_cleanse,%function
|
||||
.align 5
|
||||
OPENSSL_cleanse:
|
||||
cbz x1,.Lret // len==0?
|
||||
cmp x1,#15
|
||||
b.hi .Lot // len>15
|
||||
nop
|
||||
.Little:
|
||||
strb wzr,[x0],#1 // store byte-by-byte
|
||||
subs x1,x1,#1
|
||||
b.ne .Little
|
||||
.Lret: ret
|
||||
|
||||
.align 4
|
||||
.Lot: tst x0,#7
|
||||
b.eq .Laligned // inp is aligned
|
||||
strb wzr,[x0],#1 // store byte-by-byte
|
||||
sub x1,x1,#1
|
||||
b .Lot
|
||||
|
||||
.align 4
|
||||
.Laligned:
|
||||
str xzr,[x0],#8 // store word-by-word
|
||||
sub x1,x1,#8
|
||||
tst x1,#-8
|
||||
b.ne .Laligned // len>=8
|
||||
cbnz x1,.Little // len!=0?
|
||||
ret
|
||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||
|
||||
.globl CRYPTO_memcmp
|
||||
.type CRYPTO_memcmp,%function
|
||||
.align 4
|
||||
CRYPTO_memcmp:
|
||||
eor w3,w3,w3
|
||||
cbz x2,.Lno_data // len==0?
|
||||
cmp x2,#16
|
||||
b.ne .Loop_cmp
|
||||
ldp x8,x9,[x0]
|
||||
ldp x10,x11,[x1]
|
||||
eor x8,x8,x10
|
||||
eor x9,x9,x11
|
||||
orr x8,x8,x9
|
||||
mov x0,#1
|
||||
cmp x8,#0
|
||||
csel x0,xzr,x0,eq
|
||||
ret
|
||||
|
||||
.align 4
|
||||
.Loop_cmp:
|
||||
ldrb w4,[x0],#1
|
||||
ldrb w5,[x1],#1
|
||||
eor w4,w4,w5
|
||||
orr w3,w3,w4
|
||||
subs x2,x2,#1
|
||||
b.ne .Loop_cmp
|
||||
|
||||
.Lno_data:
|
||||
neg w0,w3
|
||||
lsr w0,w0,#31
|
||||
ret
|
||||
.size CRYPTO_memcmp,.-CRYPTO_memcmp
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,553 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from ghashv8-armx.pl. */
|
||||
#include "arm_arch.h"
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv8-a+crypto
|
||||
.text
|
||||
.globl gcm_init_v8
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
ushr v18.2d,v19.2d,#63
|
||||
dup v17.4s,v17.s[1]
|
||||
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
|
||||
ushr v18.2d,v3.2d,#63
|
||||
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||||
and v18.16b,v18.16b,v16.16b
|
||||
shl v3.2d,v3.2d,#1
|
||||
ext v18.16b,v18.16b,v18.16b,#8
|
||||
and v16.16b,v16.16b,v17.16b
|
||||
orr v3.16b,v3.16b,v18.16b //H<<<=1
|
||||
eor v20.16b,v3.16b,v16.16b //twisted H
|
||||
st1 {v20.2d},[x0],#16 //store Htable[0]
|
||||
|
||||
//calculate H^2
|
||||
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
|
||||
pmull v0.1q,v20.1d,v20.1d
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
pmull2 v2.1q,v20.2d,v20.2d
|
||||
pmull v1.1q,v16.1d,v16.1d
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v22.16b,v0.16b,v18.16b
|
||||
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
|
||||
//calculate H^3 and H^4
|
||||
pmull v0.1q,v20.1d, v22.1d
|
||||
pmull v5.1q,v22.1d,v22.1d
|
||||
pmull2 v2.1q,v20.2d, v22.2d
|
||||
pmull2 v7.1q,v22.2d,v22.2d
|
||||
pmull v1.1q,v16.1d,v17.1d
|
||||
pmull v6.1q,v17.1d,v17.1d
|
||||
|
||||
ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
ext v17.16b,v5.16b,v7.16b,#8
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v16.16b
|
||||
eor v4.16b,v5.16b,v7.16b
|
||||
eor v6.16b,v6.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
eor v6.16b,v6.16b,v4.16b
|
||||
pmull v4.1q,v5.1d,v19.1d
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v7.d[0],v6.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ins v6.d[1],v5.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v4.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
ext v4.16b,v5.16b,v5.16b,#8
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v5.1q,v5.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v4.16b,v4.16b,v7.16b
|
||||
eor v20.16b, v0.16b,v18.16b //H^3
|
||||
eor v22.16b,v5.16b,v4.16b //H^4
|
||||
|
||||
ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing
|
||||
ext v17.16b,v22.16b,v22.16b,#8
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5]
|
||||
ret
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
shl v19.2d,v19.2d,#57
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_gmult_v8,.-gcm_gmult_v8
|
||||
.globl gcm_ghash_v8
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
cmp x3,#64
|
||||
b.hs .Lgcm_ghash_v8_4x
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
//copy is not processed
|
||||
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v22.2d},[x1]
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
|
||||
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
|
||||
b.lo .Lodd_tail_v8 //x3 was less than 32
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b .Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
.Loop_mod2x_v8:
|
||||
ext v18.16b,v3.16b,v3.16b,#8
|
||||
subs x3,x3,#32 //is there more data?
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
|
||||
csel x12,xzr,x12,lo //is it time to zero x12?
|
||||
|
||||
pmull v5.1q,v21.1d,v17.1d
|
||||
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
|
||||
eor v0.16b,v0.16b,v4.16b //accumulate
|
||||
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
|
||||
|
||||
eor v2.16b,v2.16b,v6.16b
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
eor v1.16b,v1.16b,v5.16b
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
#endif
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v3.16b,v3.16b,v18.16b
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
eor v3.16b,v3.16b,v0.16b
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
|
||||
|
||||
eor v2.16b,v2.16b,v18.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
|
||||
adds x3,x3,#32 //re-construct x3
|
||||
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
|
||||
b.eq .Ldone_v8 //is x3 zero?
|
||||
.Lodd_tail_v8:
|
||||
ext v18.16b,v0.16b,v0.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||||
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
.Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.type gcm_ghash_v8_4x,%function
|
||||
.align 4
|
||||
gcm_ghash_v8_4x:
|
||||
.Lgcm_ghash_v8_4x:
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#128
|
||||
b.lo .Ltail4x
|
||||
|
||||
b .Loop4x
|
||||
|
||||
.align 4
|
||||
.Loop4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#64
|
||||
b.hs .Loop4x
|
||||
|
||||
.Ltail4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
|
||||
adds x3,x3,#64
|
||||
b.eq .Ldone4x
|
||||
|
||||
cmp x3,#32
|
||||
b.lo .Lone
|
||||
b.eq .Ltwo
|
||||
.Lthree:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v29.1q,v20.1d,v24.1d //H·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v31.1q,v20.2d,v24.2d
|
||||
pmull v30.1q,v21.1d,v6.1d
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull2 v23.1q,v22.2d,v23.2d
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
pmull2 v5.1q,v21.2d,v5.2d
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v26.2d,v3.2d
|
||||
pmull v1.1q,v27.1d,v16.1d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Ltwo:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v23.1d //H·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull2 v31.1q,v20.2d,v23.2d
|
||||
pmull v30.1q,v21.1d,v5.1d
|
||||
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v22.2d,v3.2d
|
||||
pmull2 v1.1q,v21.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Lone:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v20.2d,v3.2d
|
||||
pmull v1.1q,v21.1d,v16.1d
|
||||
|
||||
.Ldone4x:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,864 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from poly1305-armv8.pl. */
|
||||
#include "arm_arch.h"
|
||||
|
||||
.text
|
||||
|
||||
// forward "declarations" are required for Apple
|
||||
|
||||
.hidden OPENSSL_armcap_P
|
||||
.globl poly1305_init
|
||||
.hidden poly1305_init
|
||||
.globl poly1305_blocks
|
||||
.hidden poly1305_blocks
|
||||
.globl poly1305_emit
|
||||
.hidden poly1305_emit
|
||||
|
||||
.type poly1305_init,%function
|
||||
.align 5
|
||||
poly1305_init:
|
||||
cmp x1,xzr
|
||||
stp xzr,xzr,[x0] // zero hash value
|
||||
stp xzr,xzr,[x0,#16] // [along with is_base2_26]
|
||||
|
||||
csel x0,xzr,x0,eq
|
||||
b.eq .Lno_key
|
||||
|
||||
adrp x17,OPENSSL_armcap_P
|
||||
ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
|
||||
|
||||
ldp x7,x8,[x1] // load key
|
||||
mov x9,#0xfffffffc0fffffff
|
||||
movk x9,#0x0fff,lsl#48
|
||||
#ifdef __ARMEB__
|
||||
rev x7,x7 // flip bytes
|
||||
rev x8,x8
|
||||
#endif
|
||||
and x7,x7,x9 // &=0ffffffc0fffffff
|
||||
and x9,x9,#-4
|
||||
and x8,x8,x9 // &=0ffffffc0ffffffc
|
||||
stp x7,x8,[x0,#32] // save key value
|
||||
|
||||
tst w17,#ARMV7_NEON
|
||||
|
||||
adr x12,.Lpoly1305_blocks
|
||||
adr x7,.Lpoly1305_blocks_neon
|
||||
adr x13,.Lpoly1305_emit
|
||||
adr x8,.Lpoly1305_emit_neon
|
||||
|
||||
csel x12,x12,x7,eq
|
||||
csel x13,x13,x8,eq
|
||||
|
||||
#ifdef __ILP32__
|
||||
stp w12,w13,[x2]
|
||||
#else
|
||||
stp x12,x13,[x2]
|
||||
#endif
|
||||
|
||||
mov x0,#1
|
||||
.Lno_key:
|
||||
ret
|
||||
.size poly1305_init,.-poly1305_init
|
||||
|
||||
.type poly1305_blocks,%function
|
||||
.align 5
|
||||
poly1305_blocks:
|
||||
.Lpoly1305_blocks:
|
||||
ands x2,x2,#-16
|
||||
b.eq .Lno_data
|
||||
|
||||
ldp x4,x5,[x0] // load hash value
|
||||
ldp x7,x8,[x0,#32] // load key value
|
||||
ldr x6,[x0,#16]
|
||||
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
b .Loop
|
||||
|
||||
.align 5
|
||||
.Loop:
|
||||
ldp x10,x11,[x1],#16 // load input
|
||||
sub x2,x2,#16
|
||||
#ifdef __ARMEB__
|
||||
rev x10,x10
|
||||
rev x11,x11
|
||||
#endif
|
||||
adds x4,x4,x10 // accumulate input
|
||||
adcs x5,x5,x11
|
||||
|
||||
mul x12,x4,x7 // h0*r0
|
||||
adc x6,x6,x3
|
||||
umulh x13,x4,x7
|
||||
|
||||
mul x10,x5,x9 // h1*5*r1
|
||||
umulh x11,x5,x9
|
||||
|
||||
adds x12,x12,x10
|
||||
mul x10,x4,x8 // h0*r1
|
||||
adc x13,x13,x11
|
||||
umulh x14,x4,x8
|
||||
|
||||
adds x13,x13,x10
|
||||
mul x10,x5,x7 // h1*r0
|
||||
adc x14,x14,xzr
|
||||
umulh x11,x5,x7
|
||||
|
||||
adds x13,x13,x10
|
||||
mul x10,x6,x9 // h2*5*r1
|
||||
adc x14,x14,x11
|
||||
mul x11,x6,x7 // h2*r0
|
||||
|
||||
adds x13,x13,x10
|
||||
adc x14,x14,x11
|
||||
|
||||
and x10,x14,#-4 // final reduction
|
||||
and x6,x14,#3
|
||||
add x10,x10,x14,lsr#2
|
||||
adds x4,x12,x10
|
||||
adcs x5,x13,xzr
|
||||
adc x6,x6,xzr
|
||||
|
||||
cbnz x2,.Loop
|
||||
|
||||
stp x4,x5,[x0] // store hash value
|
||||
str x6,[x0,#16]
|
||||
|
||||
.Lno_data:
|
||||
ret
|
||||
.size poly1305_blocks,.-poly1305_blocks
|
||||
|
||||
.type poly1305_emit,%function
|
||||
.align 5
|
||||
poly1305_emit:
|
||||
.Lpoly1305_emit:
|
||||
ldp x4,x5,[x0] // load hash base 2^64
|
||||
ldr x6,[x0,#16]
|
||||
ldp x10,x11,[x2] // load nonce
|
||||
|
||||
adds x12,x4,#5 // compare to modulus
|
||||
adcs x13,x5,xzr
|
||||
adc x14,x6,xzr
|
||||
|
||||
tst x14,#-4 // see if it's carried/borrowed
|
||||
|
||||
csel x4,x4,x12,eq
|
||||
csel x5,x5,x13,eq
|
||||
|
||||
#ifdef __ARMEB__
|
||||
ror x10,x10,#32 // flip nonce words
|
||||
ror x11,x11,#32
|
||||
#endif
|
||||
adds x4,x4,x10 // accumulate nonce
|
||||
adc x5,x5,x11
|
||||
#ifdef __ARMEB__
|
||||
rev x4,x4 // flip output bytes
|
||||
rev x5,x5
|
||||
#endif
|
||||
stp x4,x5,[x1] // write result
|
||||
|
||||
ret
|
||||
.size poly1305_emit,.-poly1305_emit
|
||||
.type poly1305_mult,%function
|
||||
.align 5
|
||||
poly1305_mult:
|
||||
mul x12,x4,x7 // h0*r0
|
||||
umulh x13,x4,x7
|
||||
|
||||
mul x10,x5,x9 // h1*5*r1
|
||||
umulh x11,x5,x9
|
||||
|
||||
adds x12,x12,x10
|
||||
mul x10,x4,x8 // h0*r1
|
||||
adc x13,x13,x11
|
||||
umulh x14,x4,x8
|
||||
|
||||
adds x13,x13,x10
|
||||
mul x10,x5,x7 // h1*r0
|
||||
adc x14,x14,xzr
|
||||
umulh x11,x5,x7
|
||||
|
||||
adds x13,x13,x10
|
||||
mul x10,x6,x9 // h2*5*r1
|
||||
adc x14,x14,x11
|
||||
mul x11,x6,x7 // h2*r0
|
||||
|
||||
adds x13,x13,x10
|
||||
adc x14,x14,x11
|
||||
|
||||
and x10,x14,#-4 // final reduction
|
||||
and x6,x14,#3
|
||||
add x10,x10,x14,lsr#2
|
||||
adds x4,x12,x10
|
||||
adcs x5,x13,xzr
|
||||
adc x6,x6,xzr
|
||||
|
||||
ret
|
||||
.size poly1305_mult,.-poly1305_mult
|
||||
|
||||
.type poly1305_splat,%function
|
||||
.align 5
|
||||
poly1305_splat:
|
||||
and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
|
||||
ubfx x13,x4,#26,#26
|
||||
extr x14,x5,x4,#52
|
||||
and x14,x14,#0x03ffffff
|
||||
ubfx x15,x5,#14,#26
|
||||
extr x16,x6,x5,#40
|
||||
|
||||
str w12,[x0,#16*0] // r0
|
||||
add w12,w13,w13,lsl#2 // r1*5
|
||||
str w13,[x0,#16*1] // r1
|
||||
add w13,w14,w14,lsl#2 // r2*5
|
||||
str w12,[x0,#16*2] // s1
|
||||
str w14,[x0,#16*3] // r2
|
||||
add w14,w15,w15,lsl#2 // r3*5
|
||||
str w13,[x0,#16*4] // s2
|
||||
str w15,[x0,#16*5] // r3
|
||||
add w15,w16,w16,lsl#2 // r4*5
|
||||
str w14,[x0,#16*6] // s3
|
||||
str w16,[x0,#16*7] // r4
|
||||
str w15,[x0,#16*8] // s4
|
||||
|
||||
ret
|
||||
.size poly1305_splat,.-poly1305_splat
|
||||
|
||||
.type poly1305_blocks_neon,%function
|
||||
.align 5
|
||||
poly1305_blocks_neon:
|
||||
.Lpoly1305_blocks_neon:
|
||||
ldr x17,[x0,#24]
|
||||
cmp x2,#128
|
||||
b.hs .Lblocks_neon
|
||||
cbz x17,.Lpoly1305_blocks
|
||||
|
||||
.Lblocks_neon:
|
||||
.inst 0xd503233f // paciasp
|
||||
stp x29,x30,[sp,#-80]!
|
||||
add x29,sp,#0
|
||||
|
||||
ands x2,x2,#-16
|
||||
b.eq .Lno_data_neon
|
||||
|
||||
cbz x17,.Lbase2_64_neon
|
||||
|
||||
ldp w10,w11,[x0] // load hash value base 2^26
|
||||
ldp w12,w13,[x0,#8]
|
||||
ldr w14,[x0,#16]
|
||||
|
||||
tst x2,#31
|
||||
b.eq .Leven_neon
|
||||
|
||||
ldp x7,x8,[x0,#32] // load key value
|
||||
|
||||
add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
|
||||
lsr x5,x12,#12
|
||||
adds x4,x4,x12,lsl#52
|
||||
add x5,x5,x13,lsl#14
|
||||
adc x5,x5,xzr
|
||||
lsr x6,x14,#24
|
||||
adds x5,x5,x14,lsl#40
|
||||
adc x14,x6,xzr // can be partially reduced...
|
||||
|
||||
ldp x12,x13,[x1],#16 // load input
|
||||
sub x2,x2,#16
|
||||
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
|
||||
and x10,x14,#-4 // ... so reduce
|
||||
and x6,x14,#3
|
||||
add x10,x10,x14,lsr#2
|
||||
adds x4,x4,x10
|
||||
adcs x5,x5,xzr
|
||||
adc x6,x6,xzr
|
||||
|
||||
#ifdef __ARMEB__
|
||||
rev x12,x12
|
||||
rev x13,x13
|
||||
#endif
|
||||
adds x4,x4,x12 // accumulate input
|
||||
adcs x5,x5,x13
|
||||
adc x6,x6,x3
|
||||
|
||||
bl poly1305_mult
|
||||
ldr x30,[sp,#8]
|
||||
|
||||
cbz x3,.Lstore_base2_64_neon
|
||||
|
||||
and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
|
||||
ubfx x11,x4,#26,#26
|
||||
extr x12,x5,x4,#52
|
||||
and x12,x12,#0x03ffffff
|
||||
ubfx x13,x5,#14,#26
|
||||
extr x14,x6,x5,#40
|
||||
|
||||
cbnz x2,.Leven_neon
|
||||
|
||||
stp w10,w11,[x0] // store hash value base 2^26
|
||||
stp w12,w13,[x0,#8]
|
||||
str w14,[x0,#16]
|
||||
b .Lno_data_neon
|
||||
|
||||
.align 4
|
||||
.Lstore_base2_64_neon:
|
||||
stp x4,x5,[x0] // store hash value base 2^64
|
||||
stp x6,xzr,[x0,#16] // note that is_base2_26 is zeroed
|
||||
b .Lno_data_neon
|
||||
|
||||
.align 4
|
||||
.Lbase2_64_neon:
|
||||
ldp x7,x8,[x0,#32] // load key value
|
||||
|
||||
ldp x4,x5,[x0] // load hash value base 2^64
|
||||
ldr x6,[x0,#16]
|
||||
|
||||
tst x2,#31
|
||||
b.eq .Linit_neon
|
||||
|
||||
ldp x12,x13,[x1],#16 // load input
|
||||
sub x2,x2,#16
|
||||
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
#ifdef __ARMEB__
|
||||
rev x12,x12
|
||||
rev x13,x13
|
||||
#endif
|
||||
adds x4,x4,x12 // accumulate input
|
||||
adcs x5,x5,x13
|
||||
adc x6,x6,x3
|
||||
|
||||
bl poly1305_mult
|
||||
|
||||
.Linit_neon:
|
||||
and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
|
||||
ubfx x11,x4,#26,#26
|
||||
extr x12,x5,x4,#52
|
||||
and x12,x12,#0x03ffffff
|
||||
ubfx x13,x5,#14,#26
|
||||
extr x14,x6,x5,#40
|
||||
|
||||
stp d8,d9,[sp,#16] // meet ABI requirements
|
||||
stp d10,d11,[sp,#32]
|
||||
stp d12,d13,[sp,#48]
|
||||
stp d14,d15,[sp,#64]
|
||||
|
||||
fmov d24,x10
|
||||
fmov d25,x11
|
||||
fmov d26,x12
|
||||
fmov d27,x13
|
||||
fmov d28,x14
|
||||
|
||||
////////////////////////////////// initialize r^n table
|
||||
mov x4,x7 // r^1
|
||||
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
mov x5,x8
|
||||
mov x6,xzr
|
||||
add x0,x0,#48+12
|
||||
bl poly1305_splat
|
||||
|
||||
bl poly1305_mult // r^2
|
||||
sub x0,x0,#4
|
||||
bl poly1305_splat
|
||||
|
||||
bl poly1305_mult // r^3
|
||||
sub x0,x0,#4
|
||||
bl poly1305_splat
|
||||
|
||||
bl poly1305_mult // r^4
|
||||
sub x0,x0,#4
|
||||
bl poly1305_splat
|
||||
ldr x30,[sp,#8]
|
||||
|
||||
add x16,x1,#32
|
||||
adr x17,.Lzeros
|
||||
subs x2,x2,#64
|
||||
csel x16,x17,x16,lo
|
||||
|
||||
mov x4,#1
|
||||
stur x4,[x0,#-24] // set is_base2_26
|
||||
sub x0,x0,#48 // restore original x0
|
||||
b .Ldo_neon
|
||||
|
||||
.align 4
|
||||
.Leven_neon:
|
||||
add x16,x1,#32
|
||||
adr x17,.Lzeros
|
||||
subs x2,x2,#64
|
||||
csel x16,x17,x16,lo
|
||||
|
||||
stp d8,d9,[sp,#16] // meet ABI requirements
|
||||
stp d10,d11,[sp,#32]
|
||||
stp d12,d13,[sp,#48]
|
||||
stp d14,d15,[sp,#64]
|
||||
|
||||
fmov d24,x10
|
||||
fmov d25,x11
|
||||
fmov d26,x12
|
||||
fmov d27,x13
|
||||
fmov d28,x14
|
||||
|
||||
.Ldo_neon:
|
||||
ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
|
||||
ldp x9,x13,[x16],#48
|
||||
|
||||
lsl x3,x3,#24
|
||||
add x15,x0,#48
|
||||
|
||||
#ifdef __ARMEB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
and x5,x9,#0x03ffffff
|
||||
ubfx x6,x8,#26,#26
|
||||
ubfx x7,x9,#26,#26
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
extr x8,x12,x8,#52
|
||||
extr x9,x13,x9,#52
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
fmov d14,x4
|
||||
and x8,x8,#0x03ffffff
|
||||
and x9,x9,#0x03ffffff
|
||||
ubfx x10,x12,#14,#26
|
||||
ubfx x11,x13,#14,#26
|
||||
add x12,x3,x12,lsr#40
|
||||
add x13,x3,x13,lsr#40
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
fmov d15,x6
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
fmov d16,x8
|
||||
fmov d17,x10
|
||||
fmov d18,x12
|
||||
|
||||
ldp x8,x12,[x1],#16 // inp[0:1]
|
||||
ldp x9,x13,[x1],#48
|
||||
|
||||
ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
|
||||
ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
|
||||
ld1 {v8.4s},[x15]
|
||||
|
||||
#ifdef __ARMEB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
and x5,x9,#0x03ffffff
|
||||
ubfx x6,x8,#26,#26
|
||||
ubfx x7,x9,#26,#26
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
extr x8,x12,x8,#52
|
||||
extr x9,x13,x9,#52
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
fmov d9,x4
|
||||
and x8,x8,#0x03ffffff
|
||||
and x9,x9,#0x03ffffff
|
||||
ubfx x10,x12,#14,#26
|
||||
ubfx x11,x13,#14,#26
|
||||
add x12,x3,x12,lsr#40
|
||||
add x13,x3,x13,lsr#40
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
fmov d10,x6
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
movi v31.2d,#-1
|
||||
fmov d11,x8
|
||||
fmov d12,x10
|
||||
fmov d13,x12
|
||||
ushr v31.2d,v31.2d,#38
|
||||
|
||||
b.ls .Lskip_loop
|
||||
|
||||
.align 4
|
||||
.Loop_neon:
|
||||
////////////////////////////////////////////////////////////////
|
||||
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
|
||||
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
|
||||
// ___________________/
|
||||
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
|
||||
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
|
||||
// ___________________/ ____________________/
|
||||
//
|
||||
// Note that we start with inp[2:3]*r^2. This is because it
|
||||
// doesn't depend on reduction in previous iteration.
|
||||
////////////////////////////////////////////////////////////////
|
||||
// d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
|
||||
// d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
|
||||
// d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
|
||||
// d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
|
||||
// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
|
||||
|
||||
subs x2,x2,#64
|
||||
umull v23.2d,v14.2s,v7.s[2]
|
||||
csel x16,x17,x16,lo
|
||||
umull v22.2d,v14.2s,v5.s[2]
|
||||
umull v21.2d,v14.2s,v3.s[2]
|
||||
ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
|
||||
umull v20.2d,v14.2s,v1.s[2]
|
||||
ldp x9,x13,[x16],#48
|
||||
umull v19.2d,v14.2s,v0.s[2]
|
||||
#ifdef __ARMEB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
|
||||
umlal v23.2d,v15.2s,v5.s[2]
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
umlal v22.2d,v15.2s,v3.s[2]
|
||||
and x5,x9,#0x03ffffff
|
||||
umlal v21.2d,v15.2s,v1.s[2]
|
||||
ubfx x6,x8,#26,#26
|
||||
umlal v20.2d,v15.2s,v0.s[2]
|
||||
ubfx x7,x9,#26,#26
|
||||
umlal v19.2d,v15.2s,v8.s[2]
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
|
||||
umlal v23.2d,v16.2s,v3.s[2]
|
||||
extr x8,x12,x8,#52
|
||||
umlal v22.2d,v16.2s,v1.s[2]
|
||||
extr x9,x13,x9,#52
|
||||
umlal v21.2d,v16.2s,v0.s[2]
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
umlal v20.2d,v16.2s,v8.s[2]
|
||||
fmov d14,x4
|
||||
umlal v19.2d,v16.2s,v6.s[2]
|
||||
and x8,x8,#0x03ffffff
|
||||
|
||||
umlal v23.2d,v17.2s,v1.s[2]
|
||||
and x9,x9,#0x03ffffff
|
||||
umlal v22.2d,v17.2s,v0.s[2]
|
||||
ubfx x10,x12,#14,#26
|
||||
umlal v21.2d,v17.2s,v8.s[2]
|
||||
ubfx x11,x13,#14,#26
|
||||
umlal v20.2d,v17.2s,v6.s[2]
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
umlal v19.2d,v17.2s,v4.s[2]
|
||||
fmov d15,x6
|
||||
|
||||
add v11.2s,v11.2s,v26.2s
|
||||
add x12,x3,x12,lsr#40
|
||||
umlal v23.2d,v18.2s,v0.s[2]
|
||||
add x13,x3,x13,lsr#40
|
||||
umlal v22.2d,v18.2s,v8.s[2]
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
umlal v21.2d,v18.2s,v6.s[2]
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
umlal v20.2d,v18.2s,v4.s[2]
|
||||
fmov d16,x8
|
||||
umlal v19.2d,v18.2s,v2.s[2]
|
||||
fmov d17,x10
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// (hash+inp[0:1])*r^4 and accumulate
|
||||
|
||||
add v9.2s,v9.2s,v24.2s
|
||||
fmov d18,x12
|
||||
umlal v22.2d,v11.2s,v1.s[0]
|
||||
ldp x8,x12,[x1],#16 // inp[0:1]
|
||||
umlal v19.2d,v11.2s,v6.s[0]
|
||||
ldp x9,x13,[x1],#48
|
||||
umlal v23.2d,v11.2s,v3.s[0]
|
||||
umlal v20.2d,v11.2s,v8.s[0]
|
||||
umlal v21.2d,v11.2s,v0.s[0]
|
||||
#ifdef __ARMEB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
|
||||
add v10.2s,v10.2s,v25.2s
|
||||
umlal v22.2d,v9.2s,v5.s[0]
|
||||
umlal v23.2d,v9.2s,v7.s[0]
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
umlal v21.2d,v9.2s,v3.s[0]
|
||||
and x5,x9,#0x03ffffff
|
||||
umlal v19.2d,v9.2s,v0.s[0]
|
||||
ubfx x6,x8,#26,#26
|
||||
umlal v20.2d,v9.2s,v1.s[0]
|
||||
ubfx x7,x9,#26,#26
|
||||
|
||||
add v12.2s,v12.2s,v27.2s
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
umlal v22.2d,v10.2s,v3.s[0]
|
||||
extr x8,x12,x8,#52
|
||||
umlal v23.2d,v10.2s,v5.s[0]
|
||||
extr x9,x13,x9,#52
|
||||
umlal v19.2d,v10.2s,v8.s[0]
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
umlal v21.2d,v10.2s,v1.s[0]
|
||||
fmov d9,x4
|
||||
umlal v20.2d,v10.2s,v0.s[0]
|
||||
and x8,x8,#0x03ffffff
|
||||
|
||||
add v13.2s,v13.2s,v28.2s
|
||||
and x9,x9,#0x03ffffff
|
||||
umlal v22.2d,v12.2s,v0.s[0]
|
||||
ubfx x10,x12,#14,#26
|
||||
umlal v19.2d,v12.2s,v4.s[0]
|
||||
ubfx x11,x13,#14,#26
|
||||
umlal v23.2d,v12.2s,v1.s[0]
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
umlal v20.2d,v12.2s,v6.s[0]
|
||||
fmov d10,x6
|
||||
umlal v21.2d,v12.2s,v8.s[0]
|
||||
add x12,x3,x12,lsr#40
|
||||
|
||||
umlal v22.2d,v13.2s,v8.s[0]
|
||||
add x13,x3,x13,lsr#40
|
||||
umlal v19.2d,v13.2s,v2.s[0]
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
umlal v23.2d,v13.2s,v0.s[0]
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
umlal v20.2d,v13.2s,v4.s[0]
|
||||
fmov d11,x8
|
||||
umlal v21.2d,v13.2s,v6.s[0]
|
||||
fmov d12,x10
|
||||
fmov d13,x12
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
|
||||
// and P. Schwabe
|
||||
//
|
||||
// [see discussion in poly1305-armv4 module]
|
||||
|
||||
ushr v29.2d,v22.2d,#26
|
||||
xtn v27.2s,v22.2d
|
||||
ushr v30.2d,v19.2d,#26
|
||||
and v19.16b,v19.16b,v31.16b
|
||||
add v23.2d,v23.2d,v29.2d // h3 -> h4
|
||||
bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
|
||||
add v20.2d,v20.2d,v30.2d // h0 -> h1
|
||||
|
||||
ushr v29.2d,v23.2d,#26
|
||||
xtn v28.2s,v23.2d
|
||||
ushr v30.2d,v20.2d,#26
|
||||
xtn v25.2s,v20.2d
|
||||
bic v28.2s,#0xfc,lsl#24
|
||||
add v21.2d,v21.2d,v30.2d // h1 -> h2
|
||||
|
||||
add v19.2d,v19.2d,v29.2d
|
||||
shl v29.2d,v29.2d,#2
|
||||
shrn v30.2s,v21.2d,#26
|
||||
xtn v26.2s,v21.2d
|
||||
add v19.2d,v19.2d,v29.2d // h4 -> h0
|
||||
bic v25.2s,#0xfc,lsl#24
|
||||
add v27.2s,v27.2s,v30.2s // h2 -> h3
|
||||
bic v26.2s,#0xfc,lsl#24
|
||||
|
||||
shrn v29.2s,v19.2d,#26
|
||||
xtn v24.2s,v19.2d
|
||||
ushr v30.2s,v27.2s,#26
|
||||
bic v27.2s,#0xfc,lsl#24
|
||||
bic v24.2s,#0xfc,lsl#24
|
||||
add v25.2s,v25.2s,v29.2s // h0 -> h1
|
||||
add v28.2s,v28.2s,v30.2s // h3 -> h4
|
||||
|
||||
b.hi .Loop_neon
|
||||
|
||||
.Lskip_loop:
|
||||
dup v16.2d,v16.d[0]
|
||||
add v11.2s,v11.2s,v26.2s
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
|
||||
|
||||
adds x2,x2,#32
|
||||
b.ne .Long_tail
|
||||
|
||||
dup v16.2d,v11.d[0]
|
||||
add v14.2s,v9.2s,v24.2s
|
||||
add v17.2s,v12.2s,v27.2s
|
||||
add v15.2s,v10.2s,v25.2s
|
||||
add v18.2s,v13.2s,v28.2s
|
||||
|
||||
.Long_tail:
|
||||
dup v14.2d,v14.d[0]
|
||||
umull2 v19.2d,v16.4s,v6.4s
|
||||
umull2 v22.2d,v16.4s,v1.4s
|
||||
umull2 v23.2d,v16.4s,v3.4s
|
||||
umull2 v21.2d,v16.4s,v0.4s
|
||||
umull2 v20.2d,v16.4s,v8.4s
|
||||
|
||||
dup v15.2d,v15.d[0]
|
||||
umlal2 v19.2d,v14.4s,v0.4s
|
||||
umlal2 v21.2d,v14.4s,v3.4s
|
||||
umlal2 v22.2d,v14.4s,v5.4s
|
||||
umlal2 v23.2d,v14.4s,v7.4s
|
||||
umlal2 v20.2d,v14.4s,v1.4s
|
||||
|
||||
dup v17.2d,v17.d[0]
|
||||
umlal2 v19.2d,v15.4s,v8.4s
|
||||
umlal2 v22.2d,v15.4s,v3.4s
|
||||
umlal2 v21.2d,v15.4s,v1.4s
|
||||
umlal2 v23.2d,v15.4s,v5.4s
|
||||
umlal2 v20.2d,v15.4s,v0.4s
|
||||
|
||||
dup v18.2d,v18.d[0]
|
||||
umlal2 v22.2d,v17.4s,v0.4s
|
||||
umlal2 v23.2d,v17.4s,v1.4s
|
||||
umlal2 v19.2d,v17.4s,v4.4s
|
||||
umlal2 v20.2d,v17.4s,v6.4s
|
||||
umlal2 v21.2d,v17.4s,v8.4s
|
||||
|
||||
umlal2 v22.2d,v18.4s,v8.4s
|
||||
umlal2 v19.2d,v18.4s,v2.4s
|
||||
umlal2 v23.2d,v18.4s,v0.4s
|
||||
umlal2 v20.2d,v18.4s,v4.4s
|
||||
umlal2 v21.2d,v18.4s,v6.4s
|
||||
|
||||
b.eq .Lshort_tail
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// (hash+inp[0:1])*r^4:r^3 and accumulate
|
||||
|
||||
add v9.2s,v9.2s,v24.2s
|
||||
umlal v22.2d,v11.2s,v1.2s
|
||||
umlal v19.2d,v11.2s,v6.2s
|
||||
umlal v23.2d,v11.2s,v3.2s
|
||||
umlal v20.2d,v11.2s,v8.2s
|
||||
umlal v21.2d,v11.2s,v0.2s
|
||||
|
||||
add v10.2s,v10.2s,v25.2s
|
||||
umlal v22.2d,v9.2s,v5.2s
|
||||
umlal v19.2d,v9.2s,v0.2s
|
||||
umlal v23.2d,v9.2s,v7.2s
|
||||
umlal v20.2d,v9.2s,v1.2s
|
||||
umlal v21.2d,v9.2s,v3.2s
|
||||
|
||||
add v12.2s,v12.2s,v27.2s
|
||||
umlal v22.2d,v10.2s,v3.2s
|
||||
umlal v19.2d,v10.2s,v8.2s
|
||||
umlal v23.2d,v10.2s,v5.2s
|
||||
umlal v20.2d,v10.2s,v0.2s
|
||||
umlal v21.2d,v10.2s,v1.2s
|
||||
|
||||
add v13.2s,v13.2s,v28.2s
|
||||
umlal v22.2d,v12.2s,v0.2s
|
||||
umlal v19.2d,v12.2s,v4.2s
|
||||
umlal v23.2d,v12.2s,v1.2s
|
||||
umlal v20.2d,v12.2s,v6.2s
|
||||
umlal v21.2d,v12.2s,v8.2s
|
||||
|
||||
umlal v22.2d,v13.2s,v8.2s
|
||||
umlal v19.2d,v13.2s,v2.2s
|
||||
umlal v23.2d,v13.2s,v0.2s
|
||||
umlal v20.2d,v13.2s,v4.2s
|
||||
umlal v21.2d,v13.2s,v6.2s
|
||||
|
||||
.Lshort_tail:
|
||||
////////////////////////////////////////////////////////////////
|
||||
// horizontal add
|
||||
|
||||
addp v22.2d,v22.2d,v22.2d
|
||||
ldp d8,d9,[sp,#16] // meet ABI requirements
|
||||
addp v19.2d,v19.2d,v19.2d
|
||||
ldp d10,d11,[sp,#32]
|
||||
addp v23.2d,v23.2d,v23.2d
|
||||
ldp d12,d13,[sp,#48]
|
||||
addp v20.2d,v20.2d,v20.2d
|
||||
ldp d14,d15,[sp,#64]
|
||||
addp v21.2d,v21.2d,v21.2d
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// lazy reduction, but without narrowing
|
||||
|
||||
ushr v29.2d,v22.2d,#26
|
||||
and v22.16b,v22.16b,v31.16b
|
||||
ushr v30.2d,v19.2d,#26
|
||||
and v19.16b,v19.16b,v31.16b
|
||||
|
||||
add v23.2d,v23.2d,v29.2d // h3 -> h4
|
||||
add v20.2d,v20.2d,v30.2d // h0 -> h1
|
||||
|
||||
ushr v29.2d,v23.2d,#26
|
||||
and v23.16b,v23.16b,v31.16b
|
||||
ushr v30.2d,v20.2d,#26
|
||||
and v20.16b,v20.16b,v31.16b
|
||||
add v21.2d,v21.2d,v30.2d // h1 -> h2
|
||||
|
||||
add v19.2d,v19.2d,v29.2d
|
||||
shl v29.2d,v29.2d,#2
|
||||
ushr v30.2d,v21.2d,#26
|
||||
and v21.16b,v21.16b,v31.16b
|
||||
add v19.2d,v19.2d,v29.2d // h4 -> h0
|
||||
add v22.2d,v22.2d,v30.2d // h2 -> h3
|
||||
|
||||
ushr v29.2d,v19.2d,#26
|
||||
and v19.16b,v19.16b,v31.16b
|
||||
ushr v30.2d,v22.2d,#26
|
||||
and v22.16b,v22.16b,v31.16b
|
||||
add v20.2d,v20.2d,v29.2d // h0 -> h1
|
||||
add v23.2d,v23.2d,v30.2d // h3 -> h4
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// write the result, can be partially reduced
|
||||
|
||||
st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
|
||||
st1 {v23.s}[0],[x0]
|
||||
|
||||
.Lno_data_neon:
|
||||
ldr x29,[sp],#80
|
||||
.inst 0xd50323bf // autiasp
|
||||
ret
|
||||
.size poly1305_blocks_neon,.-poly1305_blocks_neon
|
||||
|
||||
.type poly1305_emit_neon,%function
|
||||
.align 5
|
||||
poly1305_emit_neon:
|
||||
.Lpoly1305_emit_neon:
|
||||
ldr x17,[x0,#24]
|
||||
cbz x17,poly1305_emit
|
||||
|
||||
ldp w10,w11,[x0] // load hash value base 2^26
|
||||
ldp w12,w13,[x0,#8]
|
||||
ldr w14,[x0,#16]
|
||||
|
||||
add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
|
||||
lsr x5,x12,#12
|
||||
adds x4,x4,x12,lsl#52
|
||||
add x5,x5,x13,lsl#14
|
||||
adc x5,x5,xzr
|
||||
lsr x6,x14,#24
|
||||
adds x5,x5,x14,lsl#40
|
||||
adc x6,x6,xzr // can be partially reduced...
|
||||
|
||||
ldp x10,x11,[x2] // load nonce
|
||||
|
||||
and x12,x6,#-4 // ... so reduce
|
||||
add x12,x12,x6,lsr#2
|
||||
and x6,x6,#3
|
||||
adds x4,x4,x12
|
||||
adcs x5,x5,xzr
|
||||
adc x6,x6,xzr
|
||||
|
||||
adds x12,x4,#5 // compare to modulus
|
||||
adcs x13,x5,xzr
|
||||
adc x14,x6,xzr
|
||||
|
||||
tst x14,#-4 // see if it's carried/borrowed
|
||||
|
||||
csel x4,x4,x12,eq
|
||||
csel x5,x5,x13,eq
|
||||
|
||||
#ifdef __ARMEB__
|
||||
ror x10,x10,#32 // flip nonce words
|
||||
ror x11,x11,#32
|
||||
#endif
|
||||
adds x4,x4,x10 // accumulate nonce
|
||||
adc x5,x5,x11
|
||||
#ifdef __ARMEB__
|
||||
rev x4,x4 // flip output bytes
|
||||
rev x5,x5
|
||||
#endif
|
||||
stp x4,x5,[x1] // write result
|
||||
|
||||
ret
|
||||
.size poly1305_emit_neon,.-poly1305_emit_neon
|
||||
|
||||
.align 5
|
||||
.Lzeros:
|
||||
.long 0,0,0,0,0,0,0,0
|
||||
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,811 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from aesni-gcm-x86_64.pl. */
|
||||
.text
|
||||
|
||||
.type _aesni_ctr32_ghash_6x,@function
|
||||
.align 32
|
||||
_aesni_ctr32_ghash_6x:
|
||||
.cfi_startproc
|
||||
vmovdqu 32(%r11),%xmm2
|
||||
subq $6,%rdx
|
||||
vpxor %xmm4,%xmm4,%xmm4
|
||||
vmovdqu 0-128(%rcx),%xmm15
|
||||
vpaddb %xmm2,%xmm1,%xmm10
|
||||
vpaddb %xmm2,%xmm10,%xmm11
|
||||
vpaddb %xmm2,%xmm11,%xmm12
|
||||
vpaddb %xmm2,%xmm12,%xmm13
|
||||
vpaddb %xmm2,%xmm13,%xmm14
|
||||
vpxor %xmm15,%xmm1,%xmm9
|
||||
vmovdqu %xmm4,16+8(%rsp)
|
||||
jmp .Loop6x
|
||||
|
||||
.align 32
|
||||
.Loop6x:
|
||||
addl $100663296,%ebx
|
||||
jc .Lhandle_ctr32
|
||||
vmovdqu 0-32(%r9),%xmm3
|
||||
vpaddb %xmm2,%xmm14,%xmm1
|
||||
vpxor %xmm15,%xmm10,%xmm10
|
||||
vpxor %xmm15,%xmm11,%xmm11
|
||||
|
||||
.Lresume_ctr32:
|
||||
vmovdqu %xmm1,(%r8)
|
||||
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
|
||||
vpxor %xmm15,%xmm12,%xmm12
|
||||
vmovups 16-128(%rcx),%xmm2
|
||||
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
|
||||
xorq %r12,%r12
|
||||
cmpq %r14,%r15
|
||||
|
||||
vaesenc %xmm2,%xmm9,%xmm9
|
||||
vmovdqu 48+8(%rsp),%xmm0
|
||||
vpxor %xmm15,%xmm13,%xmm13
|
||||
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
|
||||
vaesenc %xmm2,%xmm10,%xmm10
|
||||
vpxor %xmm15,%xmm14,%xmm14
|
||||
setnc %r12b
|
||||
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
|
||||
vaesenc %xmm2,%xmm11,%xmm11
|
||||
vmovdqu 16-32(%r9),%xmm3
|
||||
negq %r12
|
||||
vaesenc %xmm2,%xmm12,%xmm12
|
||||
vpxor %xmm5,%xmm6,%xmm6
|
||||
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
|
||||
vpxor %xmm4,%xmm8,%xmm8
|
||||
vaesenc %xmm2,%xmm13,%xmm13
|
||||
vpxor %xmm5,%xmm1,%xmm4
|
||||
andq $0x60,%r12
|
||||
vmovups 32-128(%rcx),%xmm15
|
||||
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
|
||||
vaesenc %xmm2,%xmm14,%xmm14
|
||||
|
||||
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
|
||||
leaq (%r14,%r12,1),%r14
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor 16+8(%rsp),%xmm8,%xmm8
|
||||
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
|
||||
vmovdqu 64+8(%rsp),%xmm0
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
movbeq 88(%r14),%r13
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 80(%r14),%r12
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r13,32+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
movq %r12,40+8(%rsp)
|
||||
vmovdqu 48-32(%r9),%xmm5
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vmovups 48-128(%rcx),%xmm15
|
||||
vpxor %xmm1,%xmm6,%xmm6
|
||||
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm2,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vpxor %xmm3,%xmm7,%xmm7
|
||||
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
|
||||
vmovdqu 80+8(%rsp),%xmm0
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vpxor %xmm1,%xmm4,%xmm4
|
||||
vmovdqu 64-32(%r9),%xmm1
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vmovups 64-128(%rcx),%xmm15
|
||||
vpxor %xmm2,%xmm6,%xmm6
|
||||
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm3,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
movbeq 72(%r14),%r13
|
||||
vpxor %xmm5,%xmm7,%xmm7
|
||||
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 64(%r14),%r12
|
||||
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
|
||||
vmovdqu 96+8(%rsp),%xmm0
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r13,48+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
movq %r12,56+8(%rsp)
|
||||
vpxor %xmm2,%xmm4,%xmm4
|
||||
vmovdqu 96-32(%r9),%xmm2
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vmovups 80-128(%rcx),%xmm15
|
||||
vpxor %xmm3,%xmm6,%xmm6
|
||||
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm5,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
movbeq 56(%r14),%r13
|
||||
vpxor %xmm1,%xmm7,%xmm7
|
||||
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
|
||||
vpxor 112+8(%rsp),%xmm8,%xmm8
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 48(%r14),%r12
|
||||
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r13,64+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
movq %r12,72+8(%rsp)
|
||||
vpxor %xmm3,%xmm4,%xmm4
|
||||
vmovdqu 112-32(%r9),%xmm3
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vmovups 96-128(%rcx),%xmm15
|
||||
vpxor %xmm5,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm1,%xmm6,%xmm6
|
||||
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
movbeq 40(%r14),%r13
|
||||
vpxor %xmm2,%xmm7,%xmm7
|
||||
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 32(%r14),%r12
|
||||
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r13,80+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
movq %r12,88+8(%rsp)
|
||||
vpxor %xmm5,%xmm6,%xmm6
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
vpxor %xmm1,%xmm6,%xmm6
|
||||
|
||||
vmovups 112-128(%rcx),%xmm15
|
||||
vpslldq $8,%xmm6,%xmm5
|
||||
vpxor %xmm2,%xmm4,%xmm4
|
||||
vmovdqu 16(%r11),%xmm3
|
||||
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm8,%xmm7,%xmm7
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
movbeq 24(%r14),%r13
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
movbeq 16(%r14),%r12
|
||||
vpalignr $8,%xmm4,%xmm4,%xmm0
|
||||
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
|
||||
movq %r13,96+8(%rsp)
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
movq %r12,104+8(%rsp)
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vmovups 128-128(%rcx),%xmm1
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vaesenc %xmm1,%xmm9,%xmm9
|
||||
vmovups 144-128(%rcx),%xmm15
|
||||
vaesenc %xmm1,%xmm10,%xmm10
|
||||
vpsrldq $8,%xmm6,%xmm6
|
||||
vaesenc %xmm1,%xmm11,%xmm11
|
||||
vpxor %xmm6,%xmm7,%xmm7
|
||||
vaesenc %xmm1,%xmm12,%xmm12
|
||||
vpxor %xmm0,%xmm4,%xmm4
|
||||
movbeq 8(%r14),%r13
|
||||
vaesenc %xmm1,%xmm13,%xmm13
|
||||
movbeq 0(%r14),%r12
|
||||
vaesenc %xmm1,%xmm14,%xmm14
|
||||
vmovups 160-128(%rcx),%xmm1
|
||||
cmpl $11,%ebp
|
||||
jb .Lenc_tail
|
||||
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vaesenc %xmm1,%xmm9,%xmm9
|
||||
vaesenc %xmm1,%xmm10,%xmm10
|
||||
vaesenc %xmm1,%xmm11,%xmm11
|
||||
vaesenc %xmm1,%xmm12,%xmm12
|
||||
vaesenc %xmm1,%xmm13,%xmm13
|
||||
vmovups 176-128(%rcx),%xmm15
|
||||
vaesenc %xmm1,%xmm14,%xmm14
|
||||
vmovups 192-128(%rcx),%xmm1
|
||||
je .Lenc_tail
|
||||
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
|
||||
vaesenc %xmm1,%xmm9,%xmm9
|
||||
vaesenc %xmm1,%xmm10,%xmm10
|
||||
vaesenc %xmm1,%xmm11,%xmm11
|
||||
vaesenc %xmm1,%xmm12,%xmm12
|
||||
vaesenc %xmm1,%xmm13,%xmm13
|
||||
vmovups 208-128(%rcx),%xmm15
|
||||
vaesenc %xmm1,%xmm14,%xmm14
|
||||
vmovups 224-128(%rcx),%xmm1
|
||||
jmp .Lenc_tail
|
||||
|
||||
.align 32
|
||||
.Lhandle_ctr32:
|
||||
vmovdqu (%r11),%xmm0
|
||||
vpshufb %xmm0,%xmm1,%xmm6
|
||||
vmovdqu 48(%r11),%xmm5
|
||||
vpaddd 64(%r11),%xmm6,%xmm10
|
||||
vpaddd %xmm5,%xmm6,%xmm11
|
||||
vmovdqu 0-32(%r9),%xmm3
|
||||
vpaddd %xmm5,%xmm10,%xmm12
|
||||
vpshufb %xmm0,%xmm10,%xmm10
|
||||
vpaddd %xmm5,%xmm11,%xmm13
|
||||
vpshufb %xmm0,%xmm11,%xmm11
|
||||
vpxor %xmm15,%xmm10,%xmm10
|
||||
vpaddd %xmm5,%xmm12,%xmm14
|
||||
vpshufb %xmm0,%xmm12,%xmm12
|
||||
vpxor %xmm15,%xmm11,%xmm11
|
||||
vpaddd %xmm5,%xmm13,%xmm1
|
||||
vpshufb %xmm0,%xmm13,%xmm13
|
||||
vpshufb %xmm0,%xmm14,%xmm14
|
||||
vpshufb %xmm0,%xmm1,%xmm1
|
||||
jmp .Lresume_ctr32
|
||||
|
||||
.align 32
|
||||
.Lenc_tail:
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vmovdqu %xmm7,16+8(%rsp)
|
||||
vpalignr $8,%xmm4,%xmm4,%xmm8
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
|
||||
vpxor 0(%rdi),%xmm1,%xmm2
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vpxor 16(%rdi),%xmm1,%xmm0
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vpxor 32(%rdi),%xmm1,%xmm5
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vpxor 48(%rdi),%xmm1,%xmm6
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
vpxor 64(%rdi),%xmm1,%xmm7
|
||||
vpxor 80(%rdi),%xmm1,%xmm3
|
||||
vmovdqu (%r8),%xmm1
|
||||
|
||||
vaesenclast %xmm2,%xmm9,%xmm9
|
||||
vmovdqu 32(%r11),%xmm2
|
||||
vaesenclast %xmm0,%xmm10,%xmm10
|
||||
vpaddb %xmm2,%xmm1,%xmm0
|
||||
movq %r13,112+8(%rsp)
|
||||
leaq 96(%rdi),%rdi
|
||||
vaesenclast %xmm5,%xmm11,%xmm11
|
||||
vpaddb %xmm2,%xmm0,%xmm5
|
||||
movq %r12,120+8(%rsp)
|
||||
leaq 96(%rsi),%rsi
|
||||
vmovdqu 0-128(%rcx),%xmm15
|
||||
vaesenclast %xmm6,%xmm12,%xmm12
|
||||
vpaddb %xmm2,%xmm5,%xmm6
|
||||
vaesenclast %xmm7,%xmm13,%xmm13
|
||||
vpaddb %xmm2,%xmm6,%xmm7
|
||||
vaesenclast %xmm3,%xmm14,%xmm14
|
||||
vpaddb %xmm2,%xmm7,%xmm3
|
||||
|
||||
addq $0x60,%r10
|
||||
subq $0x6,%rdx
|
||||
jc .L6x_done
|
||||
|
||||
vmovups %xmm9,-96(%rsi)
|
||||
vpxor %xmm15,%xmm1,%xmm9
|
||||
vmovups %xmm10,-80(%rsi)
|
||||
vmovdqa %xmm0,%xmm10
|
||||
vmovups %xmm11,-64(%rsi)
|
||||
vmovdqa %xmm5,%xmm11
|
||||
vmovups %xmm12,-48(%rsi)
|
||||
vmovdqa %xmm6,%xmm12
|
||||
vmovups %xmm13,-32(%rsi)
|
||||
vmovdqa %xmm7,%xmm13
|
||||
vmovups %xmm14,-16(%rsi)
|
||||
vmovdqa %xmm3,%xmm14
|
||||
vmovdqu 32+8(%rsp),%xmm7
|
||||
jmp .Loop6x
|
||||
|
||||
.L6x_done:
|
||||
vpxor 16+8(%rsp),%xmm8,%xmm8
|
||||
vpxor %xmm4,%xmm8,%xmm8
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
|
||||
.globl aesni_gcm_decrypt
|
||||
.type aesni_gcm_decrypt,@function
|
||||
.align 32
|
||||
aesni_gcm_decrypt:
|
||||
.cfi_startproc
|
||||
xorq %r10,%r10
|
||||
cmpq $0x60,%rdx
|
||||
jb .Lgcm_dec_abort
|
||||
|
||||
leaq (%rsp),%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
vzeroupper
|
||||
|
||||
vmovdqu (%r8),%xmm1
|
||||
addq $-128,%rsp
|
||||
movl 12(%r8),%ebx
|
||||
leaq .Lbswap_mask(%rip),%r11
|
||||
leaq -128(%rcx),%r14
|
||||
movq $0xf80,%r15
|
||||
vmovdqu (%r9),%xmm8
|
||||
andq $-128,%rsp
|
||||
vmovdqu (%r11),%xmm0
|
||||
leaq 128(%rcx),%rcx
|
||||
leaq 32+32(%r9),%r9
|
||||
movl 240-128(%rcx),%ebp
|
||||
vpshufb %xmm0,%xmm8,%xmm8
|
||||
|
||||
andq %r15,%r14
|
||||
andq %rsp,%r15
|
||||
subq %r14,%r15
|
||||
jc .Ldec_no_key_aliasing
|
||||
cmpq $768,%r15
|
||||
jnc .Ldec_no_key_aliasing
|
||||
subq %r15,%rsp
|
||||
.Ldec_no_key_aliasing:
|
||||
|
||||
vmovdqu 80(%rdi),%xmm7
|
||||
leaq (%rdi),%r14
|
||||
vmovdqu 64(%rdi),%xmm4
|
||||
leaq -192(%rdi,%rdx,1),%r15
|
||||
vmovdqu 48(%rdi),%xmm5
|
||||
shrq $4,%rdx
|
||||
xorq %r10,%r10
|
||||
vmovdqu 32(%rdi),%xmm6
|
||||
vpshufb %xmm0,%xmm7,%xmm7
|
||||
vmovdqu 16(%rdi),%xmm2
|
||||
vpshufb %xmm0,%xmm4,%xmm4
|
||||
vmovdqu (%rdi),%xmm3
|
||||
vpshufb %xmm0,%xmm5,%xmm5
|
||||
vmovdqu %xmm4,48(%rsp)
|
||||
vpshufb %xmm0,%xmm6,%xmm6
|
||||
vmovdqu %xmm5,64(%rsp)
|
||||
vpshufb %xmm0,%xmm2,%xmm2
|
||||
vmovdqu %xmm6,80(%rsp)
|
||||
vpshufb %xmm0,%xmm3,%xmm3
|
||||
vmovdqu %xmm2,96(%rsp)
|
||||
vmovdqu %xmm3,112(%rsp)
|
||||
|
||||
call _aesni_ctr32_ghash_6x
|
||||
|
||||
vmovups %xmm9,-96(%rsi)
|
||||
vmovups %xmm10,-80(%rsi)
|
||||
vmovups %xmm11,-64(%rsi)
|
||||
vmovups %xmm12,-48(%rsi)
|
||||
vmovups %xmm13,-32(%rsi)
|
||||
vmovups %xmm14,-16(%rsi)
|
||||
|
||||
vpshufb (%r11),%xmm8,%xmm8
|
||||
vmovdqu %xmm8,-64(%r9)
|
||||
|
||||
vzeroupper
|
||||
movq -48(%rax),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rax),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rax),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rax),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rax),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rax),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rax),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lgcm_dec_abort:
|
||||
movq %r10,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
|
||||
.type _aesni_ctr32_6x,@function
|
||||
.align 32
|
||||
_aesni_ctr32_6x:
|
||||
.cfi_startproc
|
||||
vmovdqu 0-128(%rcx),%xmm4
|
||||
vmovdqu 32(%r11),%xmm2
|
||||
leaq -1(%rbp),%r13
|
||||
vmovups 16-128(%rcx),%xmm15
|
||||
leaq 32-128(%rcx),%r12
|
||||
vpxor %xmm4,%xmm1,%xmm9
|
||||
addl $100663296,%ebx
|
||||
jc .Lhandle_ctr32_2
|
||||
vpaddb %xmm2,%xmm1,%xmm10
|
||||
vpaddb %xmm2,%xmm10,%xmm11
|
||||
vpxor %xmm4,%xmm10,%xmm10
|
||||
vpaddb %xmm2,%xmm11,%xmm12
|
||||
vpxor %xmm4,%xmm11,%xmm11
|
||||
vpaddb %xmm2,%xmm12,%xmm13
|
||||
vpxor %xmm4,%xmm12,%xmm12
|
||||
vpaddb %xmm2,%xmm13,%xmm14
|
||||
vpxor %xmm4,%xmm13,%xmm13
|
||||
vpaddb %xmm2,%xmm14,%xmm1
|
||||
vpxor %xmm4,%xmm14,%xmm14
|
||||
jmp .Loop_ctr32
|
||||
|
||||
.align 16
|
||||
.Loop_ctr32:
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
vmovups (%r12),%xmm15
|
||||
leaq 16(%r12),%r12
|
||||
decl %r13d
|
||||
jnz .Loop_ctr32
|
||||
|
||||
vmovdqu (%r12),%xmm3
|
||||
vaesenc %xmm15,%xmm9,%xmm9
|
||||
vpxor 0(%rdi),%xmm3,%xmm4
|
||||
vaesenc %xmm15,%xmm10,%xmm10
|
||||
vpxor 16(%rdi),%xmm3,%xmm5
|
||||
vaesenc %xmm15,%xmm11,%xmm11
|
||||
vpxor 32(%rdi),%xmm3,%xmm6
|
||||
vaesenc %xmm15,%xmm12,%xmm12
|
||||
vpxor 48(%rdi),%xmm3,%xmm8
|
||||
vaesenc %xmm15,%xmm13,%xmm13
|
||||
vpxor 64(%rdi),%xmm3,%xmm2
|
||||
vaesenc %xmm15,%xmm14,%xmm14
|
||||
vpxor 80(%rdi),%xmm3,%xmm3
|
||||
leaq 96(%rdi),%rdi
|
||||
|
||||
vaesenclast %xmm4,%xmm9,%xmm9
|
||||
vaesenclast %xmm5,%xmm10,%xmm10
|
||||
vaesenclast %xmm6,%xmm11,%xmm11
|
||||
vaesenclast %xmm8,%xmm12,%xmm12
|
||||
vaesenclast %xmm2,%xmm13,%xmm13
|
||||
vaesenclast %xmm3,%xmm14,%xmm14
|
||||
vmovups %xmm9,0(%rsi)
|
||||
vmovups %xmm10,16(%rsi)
|
||||
vmovups %xmm11,32(%rsi)
|
||||
vmovups %xmm12,48(%rsi)
|
||||
vmovups %xmm13,64(%rsi)
|
||||
vmovups %xmm14,80(%rsi)
|
||||
leaq 96(%rsi),%rsi
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
.align 32
|
||||
.Lhandle_ctr32_2:
|
||||
vpshufb %xmm0,%xmm1,%xmm6
|
||||
vmovdqu 48(%r11),%xmm5
|
||||
vpaddd 64(%r11),%xmm6,%xmm10
|
||||
vpaddd %xmm5,%xmm6,%xmm11
|
||||
vpaddd %xmm5,%xmm10,%xmm12
|
||||
vpshufb %xmm0,%xmm10,%xmm10
|
||||
vpaddd %xmm5,%xmm11,%xmm13
|
||||
vpshufb %xmm0,%xmm11,%xmm11
|
||||
vpxor %xmm4,%xmm10,%xmm10
|
||||
vpaddd %xmm5,%xmm12,%xmm14
|
||||
vpshufb %xmm0,%xmm12,%xmm12
|
||||
vpxor %xmm4,%xmm11,%xmm11
|
||||
vpaddd %xmm5,%xmm13,%xmm1
|
||||
vpshufb %xmm0,%xmm13,%xmm13
|
||||
vpxor %xmm4,%xmm12,%xmm12
|
||||
vpshufb %xmm0,%xmm14,%xmm14
|
||||
vpxor %xmm4,%xmm13,%xmm13
|
||||
vpshufb %xmm0,%xmm1,%xmm1
|
||||
vpxor %xmm4,%xmm14,%xmm14
|
||||
jmp .Loop_ctr32
|
||||
.cfi_endproc
|
||||
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
|
||||
|
||||
.globl aesni_gcm_encrypt
|
||||
.type aesni_gcm_encrypt,@function
|
||||
.align 32
|
||||
aesni_gcm_encrypt:
|
||||
.cfi_startproc
|
||||
xorq %r10,%r10
|
||||
cmpq $288,%rdx
|
||||
jb .Lgcm_enc_abort
|
||||
|
||||
leaq (%rsp),%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
vzeroupper
|
||||
|
||||
vmovdqu (%r8),%xmm1
|
||||
addq $-128,%rsp
|
||||
movl 12(%r8),%ebx
|
||||
leaq .Lbswap_mask(%rip),%r11
|
||||
leaq -128(%rcx),%r14
|
||||
movq $0xf80,%r15
|
||||
leaq 128(%rcx),%rcx
|
||||
vmovdqu (%r11),%xmm0
|
||||
andq $-128,%rsp
|
||||
movl 240-128(%rcx),%ebp
|
||||
|
||||
andq %r15,%r14
|
||||
andq %rsp,%r15
|
||||
subq %r14,%r15
|
||||
jc .Lenc_no_key_aliasing
|
||||
cmpq $768,%r15
|
||||
jnc .Lenc_no_key_aliasing
|
||||
subq %r15,%rsp
|
||||
.Lenc_no_key_aliasing:
|
||||
|
||||
leaq (%rsi),%r14
|
||||
leaq -192(%rsi,%rdx,1),%r15
|
||||
shrq $4,%rdx
|
||||
|
||||
call _aesni_ctr32_6x
|
||||
vpshufb %xmm0,%xmm9,%xmm8
|
||||
vpshufb %xmm0,%xmm10,%xmm2
|
||||
vmovdqu %xmm8,112(%rsp)
|
||||
vpshufb %xmm0,%xmm11,%xmm4
|
||||
vmovdqu %xmm2,96(%rsp)
|
||||
vpshufb %xmm0,%xmm12,%xmm5
|
||||
vmovdqu %xmm4,80(%rsp)
|
||||
vpshufb %xmm0,%xmm13,%xmm6
|
||||
vmovdqu %xmm5,64(%rsp)
|
||||
vpshufb %xmm0,%xmm14,%xmm7
|
||||
vmovdqu %xmm6,48(%rsp)
|
||||
|
||||
call _aesni_ctr32_6x
|
||||
|
||||
vmovdqu (%r9),%xmm8
|
||||
leaq 32+32(%r9),%r9
|
||||
subq $12,%rdx
|
||||
movq $192,%r10
|
||||
vpshufb %xmm0,%xmm8,%xmm8
|
||||
|
||||
call _aesni_ctr32_ghash_6x
|
||||
vmovdqu 32(%rsp),%xmm7
|
||||
vmovdqu (%r11),%xmm0
|
||||
vmovdqu 0-32(%r9),%xmm3
|
||||
vpunpckhqdq %xmm7,%xmm7,%xmm1
|
||||
vmovdqu 32-32(%r9),%xmm15
|
||||
vmovups %xmm9,-96(%rsi)
|
||||
vpshufb %xmm0,%xmm9,%xmm9
|
||||
vpxor %xmm7,%xmm1,%xmm1
|
||||
vmovups %xmm10,-80(%rsi)
|
||||
vpshufb %xmm0,%xmm10,%xmm10
|
||||
vmovups %xmm11,-64(%rsi)
|
||||
vpshufb %xmm0,%xmm11,%xmm11
|
||||
vmovups %xmm12,-48(%rsi)
|
||||
vpshufb %xmm0,%xmm12,%xmm12
|
||||
vmovups %xmm13,-32(%rsi)
|
||||
vpshufb %xmm0,%xmm13,%xmm13
|
||||
vmovups %xmm14,-16(%rsi)
|
||||
vpshufb %xmm0,%xmm14,%xmm14
|
||||
vmovdqu %xmm9,16(%rsp)
|
||||
vmovdqu 48(%rsp),%xmm6
|
||||
vmovdqu 16-32(%r9),%xmm0
|
||||
vpunpckhqdq %xmm6,%xmm6,%xmm2
|
||||
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
|
||||
vpxor %xmm6,%xmm2,%xmm2
|
||||
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
|
||||
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
|
||||
|
||||
vmovdqu 64(%rsp),%xmm9
|
||||
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
|
||||
vmovdqu 48-32(%r9),%xmm3
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
vpunpckhqdq %xmm9,%xmm9,%xmm5
|
||||
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
|
||||
vpxor %xmm9,%xmm5,%xmm5
|
||||
vpxor %xmm7,%xmm6,%xmm6
|
||||
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
|
||||
vmovdqu 80-32(%r9),%xmm15
|
||||
vpxor %xmm1,%xmm2,%xmm2
|
||||
|
||||
vmovdqu 80(%rsp),%xmm1
|
||||
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
|
||||
vmovdqu 64-32(%r9),%xmm0
|
||||
vpxor %xmm4,%xmm7,%xmm7
|
||||
vpunpckhqdq %xmm1,%xmm1,%xmm4
|
||||
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
|
||||
vpxor %xmm1,%xmm4,%xmm4
|
||||
vpxor %xmm6,%xmm9,%xmm9
|
||||
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
|
||||
vpxor %xmm2,%xmm5,%xmm5
|
||||
|
||||
vmovdqu 96(%rsp),%xmm2
|
||||
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
|
||||
vmovdqu 96-32(%r9),%xmm3
|
||||
vpxor %xmm7,%xmm6,%xmm6
|
||||
vpunpckhqdq %xmm2,%xmm2,%xmm7
|
||||
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
|
||||
vpxor %xmm2,%xmm7,%xmm7
|
||||
vpxor %xmm9,%xmm1,%xmm1
|
||||
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
|
||||
vmovdqu 128-32(%r9),%xmm15
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
|
||||
vpxor 112(%rsp),%xmm8,%xmm8
|
||||
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
|
||||
vmovdqu 112-32(%r9),%xmm0
|
||||
vpunpckhqdq %xmm8,%xmm8,%xmm9
|
||||
vpxor %xmm6,%xmm5,%xmm5
|
||||
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
|
||||
vpxor %xmm8,%xmm9,%xmm9
|
||||
vpxor %xmm1,%xmm2,%xmm2
|
||||
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
|
||||
vpxor %xmm4,%xmm7,%xmm4
|
||||
|
||||
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
|
||||
vmovdqu 0-32(%r9),%xmm3
|
||||
vpunpckhqdq %xmm14,%xmm14,%xmm1
|
||||
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
|
||||
vpxor %xmm14,%xmm1,%xmm1
|
||||
vpxor %xmm5,%xmm6,%xmm5
|
||||
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
|
||||
vmovdqu 32-32(%r9),%xmm15
|
||||
vpxor %xmm2,%xmm8,%xmm7
|
||||
vpxor %xmm4,%xmm9,%xmm6
|
||||
|
||||
vmovdqu 16-32(%r9),%xmm0
|
||||
vpxor %xmm5,%xmm7,%xmm9
|
||||
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
|
||||
vpxor %xmm9,%xmm6,%xmm6
|
||||
vpunpckhqdq %xmm13,%xmm13,%xmm2
|
||||
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
|
||||
vpxor %xmm13,%xmm2,%xmm2
|
||||
vpslldq $8,%xmm6,%xmm9
|
||||
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
|
||||
vpxor %xmm9,%xmm5,%xmm8
|
||||
vpsrldq $8,%xmm6,%xmm6
|
||||
vpxor %xmm6,%xmm7,%xmm7
|
||||
|
||||
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
|
||||
vmovdqu 48-32(%r9),%xmm3
|
||||
vpxor %xmm4,%xmm5,%xmm5
|
||||
vpunpckhqdq %xmm12,%xmm12,%xmm9
|
||||
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
|
||||
vpxor %xmm12,%xmm9,%xmm9
|
||||
vpxor %xmm14,%xmm13,%xmm13
|
||||
vpalignr $8,%xmm8,%xmm8,%xmm14
|
||||
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
|
||||
vmovdqu 80-32(%r9),%xmm15
|
||||
vpxor %xmm1,%xmm2,%xmm2
|
||||
|
||||
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
|
||||
vmovdqu 64-32(%r9),%xmm0
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
vpunpckhqdq %xmm11,%xmm11,%xmm1
|
||||
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
|
||||
vpxor %xmm11,%xmm1,%xmm1
|
||||
vpxor %xmm13,%xmm12,%xmm12
|
||||
vxorps 16(%rsp),%xmm7,%xmm7
|
||||
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
|
||||
vpxor %xmm2,%xmm9,%xmm9
|
||||
|
||||
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
|
||||
vxorps %xmm14,%xmm8,%xmm8
|
||||
|
||||
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
|
||||
vmovdqu 96-32(%r9),%xmm3
|
||||
vpxor %xmm4,%xmm5,%xmm5
|
||||
vpunpckhqdq %xmm10,%xmm10,%xmm2
|
||||
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
|
||||
vpxor %xmm10,%xmm2,%xmm2
|
||||
vpalignr $8,%xmm8,%xmm8,%xmm14
|
||||
vpxor %xmm12,%xmm11,%xmm11
|
||||
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
|
||||
vmovdqu 128-32(%r9),%xmm15
|
||||
vpxor %xmm9,%xmm1,%xmm1
|
||||
|
||||
vxorps %xmm7,%xmm14,%xmm14
|
||||
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
|
||||
vxorps %xmm14,%xmm8,%xmm8
|
||||
|
||||
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
|
||||
vmovdqu 112-32(%r9),%xmm0
|
||||
vpxor %xmm5,%xmm4,%xmm4
|
||||
vpunpckhqdq %xmm8,%xmm8,%xmm9
|
||||
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
|
||||
vpxor %xmm8,%xmm9,%xmm9
|
||||
vpxor %xmm11,%xmm10,%xmm10
|
||||
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
|
||||
vpxor %xmm1,%xmm2,%xmm2
|
||||
|
||||
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
|
||||
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
|
||||
vpxor %xmm4,%xmm5,%xmm5
|
||||
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
|
||||
vpxor %xmm10,%xmm7,%xmm7
|
||||
vpxor %xmm2,%xmm6,%xmm6
|
||||
|
||||
vpxor %xmm5,%xmm7,%xmm4
|
||||
vpxor %xmm4,%xmm6,%xmm6
|
||||
vpslldq $8,%xmm6,%xmm1
|
||||
vmovdqu 16(%r11),%xmm3
|
||||
vpsrldq $8,%xmm6,%xmm6
|
||||
vpxor %xmm1,%xmm5,%xmm8
|
||||
vpxor %xmm6,%xmm7,%xmm7
|
||||
|
||||
vpalignr $8,%xmm8,%xmm8,%xmm2
|
||||
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
|
||||
vpxor %xmm2,%xmm8,%xmm8
|
||||
|
||||
vpalignr $8,%xmm8,%xmm8,%xmm2
|
||||
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
|
||||
vpxor %xmm7,%xmm2,%xmm2
|
||||
vpxor %xmm2,%xmm8,%xmm8
|
||||
vpshufb (%r11),%xmm8,%xmm8
|
||||
vmovdqu %xmm8,-64(%r9)
|
||||
|
||||
vzeroupper
|
||||
movq -48(%rax),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rax),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rax),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rax),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rax),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rax),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rax),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lgcm_enc_abort:
|
||||
movq %r10,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
|
||||
.align 64
|
||||
.Lbswap_mask:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.Lpoly:
|
||||
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
||||
.Lone_msb:
|
||||
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
.Ltwo_lsb:
|
||||
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
.Lone_lsb:
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,546 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from keccak1600-x86_64.pl. */
|
||||
.text
|
||||
|
||||
.type __KeccakF1600,@function
|
||||
.align 32
|
||||
__KeccakF1600:
|
||||
.cfi_startproc
|
||||
movq 60(%rdi),%rax
|
||||
movq 68(%rdi),%rbx
|
||||
movq 76(%rdi),%rcx
|
||||
movq 84(%rdi),%rdx
|
||||
movq 92(%rdi),%rbp
|
||||
jmp .Loop
|
||||
|
||||
.align 32
|
||||
.Loop:
|
||||
movq -100(%rdi),%r8
|
||||
movq -52(%rdi),%r9
|
||||
movq -4(%rdi),%r10
|
||||
movq 44(%rdi),%r11
|
||||
|
||||
xorq -84(%rdi),%rcx
|
||||
xorq -76(%rdi),%rdx
|
||||
xorq %r8,%rax
|
||||
xorq -92(%rdi),%rbx
|
||||
xorq -44(%rdi),%rcx
|
||||
xorq -60(%rdi),%rax
|
||||
movq %rbp,%r12
|
||||
xorq -68(%rdi),%rbp
|
||||
|
||||
xorq %r10,%rcx
|
||||
xorq -20(%rdi),%rax
|
||||
xorq -36(%rdi),%rdx
|
||||
xorq %r9,%rbx
|
||||
xorq -28(%rdi),%rbp
|
||||
|
||||
xorq 36(%rdi),%rcx
|
||||
xorq 20(%rdi),%rax
|
||||
xorq 4(%rdi),%rdx
|
||||
xorq -12(%rdi),%rbx
|
||||
xorq 12(%rdi),%rbp
|
||||
|
||||
movq %rcx,%r13
|
||||
rolq $1,%rcx
|
||||
xorq %rax,%rcx
|
||||
xorq %r11,%rdx
|
||||
|
||||
rolq $1,%rax
|
||||
xorq %rdx,%rax
|
||||
xorq 28(%rdi),%rbx
|
||||
|
||||
rolq $1,%rdx
|
||||
xorq %rbx,%rdx
|
||||
xorq 52(%rdi),%rbp
|
||||
|
||||
rolq $1,%rbx
|
||||
xorq %rbp,%rbx
|
||||
|
||||
rolq $1,%rbp
|
||||
xorq %r13,%rbp
|
||||
xorq %rcx,%r9
|
||||
xorq %rdx,%r10
|
||||
rolq $44,%r9
|
||||
xorq %rbp,%r11
|
||||
xorq %rax,%r12
|
||||
rolq $43,%r10
|
||||
xorq %rbx,%r8
|
||||
movq %r9,%r13
|
||||
rolq $21,%r11
|
||||
orq %r10,%r9
|
||||
xorq %r8,%r9
|
||||
rolq $14,%r12
|
||||
|
||||
xorq (%r15),%r9
|
||||
leaq 8(%r15),%r15
|
||||
|
||||
movq %r12,%r14
|
||||
andq %r11,%r12
|
||||
movq %r9,-100(%rsi)
|
||||
xorq %r10,%r12
|
||||
notq %r10
|
||||
movq %r12,-84(%rsi)
|
||||
|
||||
orq %r11,%r10
|
||||
movq 76(%rdi),%r12
|
||||
xorq %r13,%r10
|
||||
movq %r10,-92(%rsi)
|
||||
|
||||
andq %r8,%r13
|
||||
movq -28(%rdi),%r9
|
||||
xorq %r14,%r13
|
||||
movq -20(%rdi),%r10
|
||||
movq %r13,-68(%rsi)
|
||||
|
||||
orq %r8,%r14
|
||||
movq -76(%rdi),%r8
|
||||
xorq %r11,%r14
|
||||
movq 28(%rdi),%r11
|
||||
movq %r14,-76(%rsi)
|
||||
|
||||
|
||||
xorq %rbp,%r8
|
||||
xorq %rdx,%r12
|
||||
rolq $28,%r8
|
||||
xorq %rcx,%r11
|
||||
xorq %rax,%r9
|
||||
rolq $61,%r12
|
||||
rolq $45,%r11
|
||||
xorq %rbx,%r10
|
||||
rolq $20,%r9
|
||||
movq %r8,%r13
|
||||
orq %r12,%r8
|
||||
rolq $3,%r10
|
||||
|
||||
xorq %r11,%r8
|
||||
movq %r8,-36(%rsi)
|
||||
|
||||
movq %r9,%r14
|
||||
andq %r13,%r9
|
||||
movq -92(%rdi),%r8
|
||||
xorq %r12,%r9
|
||||
notq %r12
|
||||
movq %r9,-28(%rsi)
|
||||
|
||||
orq %r11,%r12
|
||||
movq -44(%rdi),%r9
|
||||
xorq %r10,%r12
|
||||
movq %r12,-44(%rsi)
|
||||
|
||||
andq %r10,%r11
|
||||
movq 60(%rdi),%r12
|
||||
xorq %r14,%r11
|
||||
movq %r11,-52(%rsi)
|
||||
|
||||
orq %r10,%r14
|
||||
movq 4(%rdi),%r10
|
||||
xorq %r13,%r14
|
||||
movq 52(%rdi),%r11
|
||||
movq %r14,-60(%rsi)
|
||||
|
||||
|
||||
xorq %rbp,%r10
|
||||
xorq %rax,%r11
|
||||
rolq $25,%r10
|
||||
xorq %rdx,%r9
|
||||
rolq $8,%r11
|
||||
xorq %rbx,%r12
|
||||
rolq $6,%r9
|
||||
xorq %rcx,%r8
|
||||
rolq $18,%r12
|
||||
movq %r10,%r13
|
||||
andq %r11,%r10
|
||||
rolq $1,%r8
|
||||
|
||||
notq %r11
|
||||
xorq %r9,%r10
|
||||
movq %r10,-12(%rsi)
|
||||
|
||||
movq %r12,%r14
|
||||
andq %r11,%r12
|
||||
movq -12(%rdi),%r10
|
||||
xorq %r13,%r12
|
||||
movq %r12,-4(%rsi)
|
||||
|
||||
orq %r9,%r13
|
||||
movq 84(%rdi),%r12
|
||||
xorq %r8,%r13
|
||||
movq %r13,-20(%rsi)
|
||||
|
||||
andq %r8,%r9
|
||||
xorq %r14,%r9
|
||||
movq %r9,12(%rsi)
|
||||
|
||||
orq %r8,%r14
|
||||
movq -60(%rdi),%r9
|
||||
xorq %r11,%r14
|
||||
movq 36(%rdi),%r11
|
||||
movq %r14,4(%rsi)
|
||||
|
||||
|
||||
movq -68(%rdi),%r8
|
||||
|
||||
xorq %rcx,%r10
|
||||
xorq %rdx,%r11
|
||||
rolq $10,%r10
|
||||
xorq %rbx,%r9
|
||||
rolq $15,%r11
|
||||
xorq %rbp,%r12
|
||||
rolq $36,%r9
|
||||
xorq %rax,%r8
|
||||
rolq $56,%r12
|
||||
movq %r10,%r13
|
||||
orq %r11,%r10
|
||||
rolq $27,%r8
|
||||
|
||||
notq %r11
|
||||
xorq %r9,%r10
|
||||
movq %r10,28(%rsi)
|
||||
|
||||
movq %r12,%r14
|
||||
orq %r11,%r12
|
||||
xorq %r13,%r12
|
||||
movq %r12,36(%rsi)
|
||||
|
||||
andq %r9,%r13
|
||||
xorq %r8,%r13
|
||||
movq %r13,20(%rsi)
|
||||
|
||||
orq %r8,%r9
|
||||
xorq %r14,%r9
|
||||
movq %r9,52(%rsi)
|
||||
|
||||
andq %r14,%r8
|
||||
xorq %r11,%r8
|
||||
movq %r8,44(%rsi)
|
||||
|
||||
|
||||
xorq -84(%rdi),%rdx
|
||||
xorq -36(%rdi),%rbp
|
||||
rolq $62,%rdx
|
||||
xorq 68(%rdi),%rcx
|
||||
rolq $55,%rbp
|
||||
xorq 12(%rdi),%rax
|
||||
rolq $2,%rcx
|
||||
xorq 20(%rdi),%rbx
|
||||
xchgq %rsi,%rdi
|
||||
rolq $39,%rax
|
||||
rolq $41,%rbx
|
||||
movq %rdx,%r13
|
||||
andq %rbp,%rdx
|
||||
notq %rbp
|
||||
xorq %rcx,%rdx
|
||||
movq %rdx,92(%rdi)
|
||||
|
||||
movq %rax,%r14
|
||||
andq %rbp,%rax
|
||||
xorq %r13,%rax
|
||||
movq %rax,60(%rdi)
|
||||
|
||||
orq %rcx,%r13
|
||||
xorq %rbx,%r13
|
||||
movq %r13,84(%rdi)
|
||||
|
||||
andq %rbx,%rcx
|
||||
xorq %r14,%rcx
|
||||
movq %rcx,76(%rdi)
|
||||
|
||||
orq %r14,%rbx
|
||||
xorq %rbp,%rbx
|
||||
movq %rbx,68(%rdi)
|
||||
|
||||
movq %rdx,%rbp
|
||||
movq %r13,%rdx
|
||||
|
||||
testq $255,%r15
|
||||
jnz .Loop
|
||||
|
||||
leaq -192(%r15),%r15
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size __KeccakF1600,.-__KeccakF1600
|
||||
|
||||
.type KeccakF1600,@function
|
||||
.align 32
|
||||
KeccakF1600:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
|
||||
leaq 100(%rdi),%rdi
|
||||
subq $200,%rsp
|
||||
.cfi_adjust_cfa_offset 200
|
||||
|
||||
notq -92(%rdi)
|
||||
notq -84(%rdi)
|
||||
notq -36(%rdi)
|
||||
notq -4(%rdi)
|
||||
notq 36(%rdi)
|
||||
notq 60(%rdi)
|
||||
|
||||
leaq iotas(%rip),%r15
|
||||
leaq 100(%rsp),%rsi
|
||||
|
||||
call __KeccakF1600
|
||||
|
||||
notq -92(%rdi)
|
||||
notq -84(%rdi)
|
||||
notq -36(%rdi)
|
||||
notq -4(%rdi)
|
||||
notq 36(%rdi)
|
||||
notq 60(%rdi)
|
||||
leaq -100(%rdi),%rdi
|
||||
|
||||
addq $200,%rsp
|
||||
.cfi_adjust_cfa_offset -200
|
||||
|
||||
popq %r15
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r15
|
||||
popq %r14
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r14
|
||||
popq %r13
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r13
|
||||
popq %r12
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r12
|
||||
popq %rbp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %rbp
|
||||
popq %rbx
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %rbx
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size KeccakF1600,.-KeccakF1600
|
||||
.globl SHA3_absorb
|
||||
.type SHA3_absorb,@function
|
||||
.align 32
|
||||
SHA3_absorb:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
|
||||
leaq 100(%rdi),%rdi
|
||||
subq $232,%rsp
|
||||
.cfi_adjust_cfa_offset 232
|
||||
|
||||
movq %rsi,%r9
|
||||
leaq 100(%rsp),%rsi
|
||||
|
||||
notq -92(%rdi)
|
||||
notq -84(%rdi)
|
||||
notq -36(%rdi)
|
||||
notq -4(%rdi)
|
||||
notq 36(%rdi)
|
||||
notq 60(%rdi)
|
||||
leaq iotas(%rip),%r15
|
||||
|
||||
movq %rcx,216-100(%rsi)
|
||||
|
||||
.Loop_absorb:
|
||||
cmpq %rcx,%rdx
|
||||
jc .Ldone_absorb
|
||||
|
||||
shrq $3,%rcx
|
||||
leaq -100(%rdi),%r8
|
||||
|
||||
.Lblock_absorb:
|
||||
movq (%r9),%rax
|
||||
leaq 8(%r9),%r9
|
||||
xorq (%r8),%rax
|
||||
leaq 8(%r8),%r8
|
||||
subq $8,%rdx
|
||||
movq %rax,-8(%r8)
|
||||
subq $1,%rcx
|
||||
jnz .Lblock_absorb
|
||||
|
||||
movq %r9,200-100(%rsi)
|
||||
movq %rdx,208-100(%rsi)
|
||||
call __KeccakF1600
|
||||
movq 200-100(%rsi),%r9
|
||||
movq 208-100(%rsi),%rdx
|
||||
movq 216-100(%rsi),%rcx
|
||||
jmp .Loop_absorb
|
||||
|
||||
.align 32
|
||||
.Ldone_absorb:
|
||||
movq %rdx,%rax
|
||||
|
||||
notq -92(%rdi)
|
||||
notq -84(%rdi)
|
||||
notq -36(%rdi)
|
||||
notq -4(%rdi)
|
||||
notq 36(%rdi)
|
||||
notq 60(%rdi)
|
||||
|
||||
addq $232,%rsp
|
||||
.cfi_adjust_cfa_offset -232
|
||||
|
||||
popq %r15
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r15
|
||||
popq %r14
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r14
|
||||
popq %r13
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r13
|
||||
popq %r12
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r12
|
||||
popq %rbp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %rbp
|
||||
popq %rbx
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %rbx
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size SHA3_absorb,.-SHA3_absorb
|
||||
.globl SHA3_squeeze
|
||||
.type SHA3_squeeze,@function
|
||||
.align 32
|
||||
SHA3_squeeze:
|
||||
.cfi_startproc
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-16
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-24
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-32
|
||||
|
||||
shrq $3,%rcx
|
||||
movq %rdi,%r8
|
||||
movq %rsi,%r12
|
||||
movq %rdx,%r13
|
||||
movq %rcx,%r14
|
||||
jmp .Loop_squeeze
|
||||
|
||||
.align 32
|
||||
.Loop_squeeze:
|
||||
cmpq $8,%r13
|
||||
jb .Ltail_squeeze
|
||||
|
||||
movq (%r8),%rax
|
||||
leaq 8(%r8),%r8
|
||||
movq %rax,(%r12)
|
||||
leaq 8(%r12),%r12
|
||||
subq $8,%r13
|
||||
jz .Ldone_squeeze
|
||||
|
||||
subq $1,%rcx
|
||||
jnz .Loop_squeeze
|
||||
|
||||
call KeccakF1600
|
||||
movq %rdi,%r8
|
||||
movq %r14,%rcx
|
||||
jmp .Loop_squeeze
|
||||
|
||||
.Ltail_squeeze:
|
||||
movq %r8,%rsi
|
||||
movq %r12,%rdi
|
||||
movq %r13,%rcx
|
||||
.byte 0xf3,0xa4
|
||||
|
||||
.Ldone_squeeze:
|
||||
popq %r14
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r14
|
||||
popq %r13
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r13
|
||||
popq %r12
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r13
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size SHA3_squeeze,.-SHA3_squeeze
|
||||
.align 256
|
||||
.quad 0,0,0,0,0,0,0,0
|
||||
.type iotas,@object
|
||||
iotas:
|
||||
.quad 0x0000000000000001
|
||||
.quad 0x0000000000008082
|
||||
.quad 0x800000000000808a
|
||||
.quad 0x8000000080008000
|
||||
.quad 0x000000000000808b
|
||||
.quad 0x0000000080000001
|
||||
.quad 0x8000000080008081
|
||||
.quad 0x8000000000008009
|
||||
.quad 0x000000000000008a
|
||||
.quad 0x0000000000000088
|
||||
.quad 0x0000000080008009
|
||||
.quad 0x000000008000000a
|
||||
.quad 0x000000008000808b
|
||||
.quad 0x800000000000008b
|
||||
.quad 0x8000000000008089
|
||||
.quad 0x8000000000008003
|
||||
.quad 0x8000000000008002
|
||||
.quad 0x8000000000000080
|
||||
.quad 0x000000000000800a
|
||||
.quad 0x800000008000000a
|
||||
.quad 0x8000000080008081
|
||||
.quad 0x8000000000008080
|
||||
.quad 0x0000000080000001
|
||||
.quad 0x8000000080008008
|
||||
.size iotas,.-iotas
|
||||
.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
@@ -1,705 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from md5-x86_64.pl. */
|
||||
.text
|
||||
.align 16
|
||||
|
||||
.globl ossl_md5_block_asm_data_order
|
||||
.type ossl_md5_block_asm_data_order,@function
|
||||
ossl_md5_block_asm_data_order:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-16
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-40
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-48
|
||||
.Lprologue:
|
||||
|
||||
|
||||
|
||||
|
||||
movq %rdi,%rbp
|
||||
shlq $6,%rdx
|
||||
leaq (%rsi,%rdx,1),%rdi
|
||||
movl 0(%rbp),%eax
|
||||
movl 4(%rbp),%ebx
|
||||
movl 8(%rbp),%ecx
|
||||
movl 12(%rbp),%edx
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
cmpq %rdi,%rsi
|
||||
je .Lend
|
||||
|
||||
|
||||
.Lloop:
|
||||
movl %eax,%r8d
|
||||
movl %ebx,%r9d
|
||||
movl %ecx,%r14d
|
||||
movl %edx,%r15d
|
||||
movl 0(%rsi),%r10d
|
||||
movl %edx,%r11d
|
||||
xorl %ecx,%r11d
|
||||
leal -680876936(%rax,%r10,1),%eax
|
||||
andl %ebx,%r11d
|
||||
movl 4(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%eax
|
||||
roll $7,%eax
|
||||
movl %ecx,%r11d
|
||||
addl %ebx,%eax
|
||||
xorl %ebx,%r11d
|
||||
leal -389564586(%rdx,%r10,1),%edx
|
||||
andl %eax,%r11d
|
||||
movl 8(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%edx
|
||||
roll $12,%edx
|
||||
movl %ebx,%r11d
|
||||
addl %eax,%edx
|
||||
xorl %eax,%r11d
|
||||
leal 606105819(%rcx,%r10,1),%ecx
|
||||
andl %edx,%r11d
|
||||
movl 12(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%ecx
|
||||
roll $17,%ecx
|
||||
movl %eax,%r11d
|
||||
addl %edx,%ecx
|
||||
xorl %edx,%r11d
|
||||
leal -1044525330(%rbx,%r10,1),%ebx
|
||||
andl %ecx,%r11d
|
||||
movl 16(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%ebx
|
||||
roll $22,%ebx
|
||||
movl %edx,%r11d
|
||||
addl %ecx,%ebx
|
||||
xorl %ecx,%r11d
|
||||
leal -176418897(%rax,%r10,1),%eax
|
||||
andl %ebx,%r11d
|
||||
movl 20(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%eax
|
||||
roll $7,%eax
|
||||
movl %ecx,%r11d
|
||||
addl %ebx,%eax
|
||||
xorl %ebx,%r11d
|
||||
leal 1200080426(%rdx,%r10,1),%edx
|
||||
andl %eax,%r11d
|
||||
movl 24(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%edx
|
||||
roll $12,%edx
|
||||
movl %ebx,%r11d
|
||||
addl %eax,%edx
|
||||
xorl %eax,%r11d
|
||||
leal -1473231341(%rcx,%r10,1),%ecx
|
||||
andl %edx,%r11d
|
||||
movl 28(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%ecx
|
||||
roll $17,%ecx
|
||||
movl %eax,%r11d
|
||||
addl %edx,%ecx
|
||||
xorl %edx,%r11d
|
||||
leal -45705983(%rbx,%r10,1),%ebx
|
||||
andl %ecx,%r11d
|
||||
movl 32(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%ebx
|
||||
roll $22,%ebx
|
||||
movl %edx,%r11d
|
||||
addl %ecx,%ebx
|
||||
xorl %ecx,%r11d
|
||||
leal 1770035416(%rax,%r10,1),%eax
|
||||
andl %ebx,%r11d
|
||||
movl 36(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%eax
|
||||
roll $7,%eax
|
||||
movl %ecx,%r11d
|
||||
addl %ebx,%eax
|
||||
xorl %ebx,%r11d
|
||||
leal -1958414417(%rdx,%r10,1),%edx
|
||||
andl %eax,%r11d
|
||||
movl 40(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%edx
|
||||
roll $12,%edx
|
||||
movl %ebx,%r11d
|
||||
addl %eax,%edx
|
||||
xorl %eax,%r11d
|
||||
leal -42063(%rcx,%r10,1),%ecx
|
||||
andl %edx,%r11d
|
||||
movl 44(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%ecx
|
||||
roll $17,%ecx
|
||||
movl %eax,%r11d
|
||||
addl %edx,%ecx
|
||||
xorl %edx,%r11d
|
||||
leal -1990404162(%rbx,%r10,1),%ebx
|
||||
andl %ecx,%r11d
|
||||
movl 48(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%ebx
|
||||
roll $22,%ebx
|
||||
movl %edx,%r11d
|
||||
addl %ecx,%ebx
|
||||
xorl %ecx,%r11d
|
||||
leal 1804603682(%rax,%r10,1),%eax
|
||||
andl %ebx,%r11d
|
||||
movl 52(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%eax
|
||||
roll $7,%eax
|
||||
movl %ecx,%r11d
|
||||
addl %ebx,%eax
|
||||
xorl %ebx,%r11d
|
||||
leal -40341101(%rdx,%r10,1),%edx
|
||||
andl %eax,%r11d
|
||||
movl 56(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%edx
|
||||
roll $12,%edx
|
||||
movl %ebx,%r11d
|
||||
addl %eax,%edx
|
||||
xorl %eax,%r11d
|
||||
leal -1502002290(%rcx,%r10,1),%ecx
|
||||
andl %edx,%r11d
|
||||
movl 60(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%ecx
|
||||
roll $17,%ecx
|
||||
movl %eax,%r11d
|
||||
addl %edx,%ecx
|
||||
xorl %edx,%r11d
|
||||
leal 1236535329(%rbx,%r10,1),%ebx
|
||||
andl %ecx,%r11d
|
||||
movl 4(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%ebx
|
||||
roll $22,%ebx
|
||||
movl %edx,%r11d
|
||||
addl %ecx,%ebx
|
||||
movl %edx,%r11d
|
||||
movl %edx,%r12d
|
||||
notl %r11d
|
||||
andl %ebx,%r12d
|
||||
leal -165796510(%rax,%r10,1),%eax
|
||||
andl %ecx,%r11d
|
||||
movl 24(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %ecx,%r11d
|
||||
addl %r12d,%eax
|
||||
movl %ecx,%r12d
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
notl %r11d
|
||||
andl %eax,%r12d
|
||||
leal -1069501632(%rdx,%r10,1),%edx
|
||||
andl %ebx,%r11d
|
||||
movl 44(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %ebx,%r11d
|
||||
addl %r12d,%edx
|
||||
movl %ebx,%r12d
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
notl %r11d
|
||||
andl %edx,%r12d
|
||||
leal 643717713(%rcx,%r10,1),%ecx
|
||||
andl %eax,%r11d
|
||||
movl 0(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %eax,%r11d
|
||||
addl %r12d,%ecx
|
||||
movl %eax,%r12d
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
notl %r11d
|
||||
andl %ecx,%r12d
|
||||
leal -373897302(%rbx,%r10,1),%ebx
|
||||
andl %edx,%r11d
|
||||
movl 20(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %edx,%r11d
|
||||
addl %r12d,%ebx
|
||||
movl %edx,%r12d
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
notl %r11d
|
||||
andl %ebx,%r12d
|
||||
leal -701558691(%rax,%r10,1),%eax
|
||||
andl %ecx,%r11d
|
||||
movl 40(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %ecx,%r11d
|
||||
addl %r12d,%eax
|
||||
movl %ecx,%r12d
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
notl %r11d
|
||||
andl %eax,%r12d
|
||||
leal 38016083(%rdx,%r10,1),%edx
|
||||
andl %ebx,%r11d
|
||||
movl 60(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %ebx,%r11d
|
||||
addl %r12d,%edx
|
||||
movl %ebx,%r12d
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
notl %r11d
|
||||
andl %edx,%r12d
|
||||
leal -660478335(%rcx,%r10,1),%ecx
|
||||
andl %eax,%r11d
|
||||
movl 16(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %eax,%r11d
|
||||
addl %r12d,%ecx
|
||||
movl %eax,%r12d
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
notl %r11d
|
||||
andl %ecx,%r12d
|
||||
leal -405537848(%rbx,%r10,1),%ebx
|
||||
andl %edx,%r11d
|
||||
movl 36(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %edx,%r11d
|
||||
addl %r12d,%ebx
|
||||
movl %edx,%r12d
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
notl %r11d
|
||||
andl %ebx,%r12d
|
||||
leal 568446438(%rax,%r10,1),%eax
|
||||
andl %ecx,%r11d
|
||||
movl 56(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %ecx,%r11d
|
||||
addl %r12d,%eax
|
||||
movl %ecx,%r12d
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
notl %r11d
|
||||
andl %eax,%r12d
|
||||
leal -1019803690(%rdx,%r10,1),%edx
|
||||
andl %ebx,%r11d
|
||||
movl 12(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %ebx,%r11d
|
||||
addl %r12d,%edx
|
||||
movl %ebx,%r12d
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
notl %r11d
|
||||
andl %edx,%r12d
|
||||
leal -187363961(%rcx,%r10,1),%ecx
|
||||
andl %eax,%r11d
|
||||
movl 32(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %eax,%r11d
|
||||
addl %r12d,%ecx
|
||||
movl %eax,%r12d
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
notl %r11d
|
||||
andl %ecx,%r12d
|
||||
leal 1163531501(%rbx,%r10,1),%ebx
|
||||
andl %edx,%r11d
|
||||
movl 52(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %edx,%r11d
|
||||
addl %r12d,%ebx
|
||||
movl %edx,%r12d
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
notl %r11d
|
||||
andl %ebx,%r12d
|
||||
leal -1444681467(%rax,%r10,1),%eax
|
||||
andl %ecx,%r11d
|
||||
movl 8(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %ecx,%r11d
|
||||
addl %r12d,%eax
|
||||
movl %ecx,%r12d
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
notl %r11d
|
||||
andl %eax,%r12d
|
||||
leal -51403784(%rdx,%r10,1),%edx
|
||||
andl %ebx,%r11d
|
||||
movl 28(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %ebx,%r11d
|
||||
addl %r12d,%edx
|
||||
movl %ebx,%r12d
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
notl %r11d
|
||||
andl %edx,%r12d
|
||||
leal 1735328473(%rcx,%r10,1),%ecx
|
||||
andl %eax,%r11d
|
||||
movl 48(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %eax,%r11d
|
||||
addl %r12d,%ecx
|
||||
movl %eax,%r12d
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
notl %r11d
|
||||
andl %ecx,%r12d
|
||||
leal -1926607734(%rbx,%r10,1),%ebx
|
||||
andl %edx,%r11d
|
||||
movl 20(%rsi),%r10d
|
||||
orl %r11d,%r12d
|
||||
movl %edx,%r11d
|
||||
addl %r12d,%ebx
|
||||
movl %edx,%r12d
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
movl %ecx,%r11d
|
||||
leal -378558(%rax,%r10,1),%eax
|
||||
xorl %edx,%r11d
|
||||
movl 32(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%eax
|
||||
movl %ebx,%r11d
|
||||
roll $4,%eax
|
||||
addl %ebx,%eax
|
||||
leal -2022574463(%rdx,%r10,1),%edx
|
||||
xorl %ecx,%r11d
|
||||
movl 44(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%edx
|
||||
roll $11,%edx
|
||||
movl %eax,%r11d
|
||||
addl %eax,%edx
|
||||
leal 1839030562(%rcx,%r10,1),%ecx
|
||||
xorl %ebx,%r11d
|
||||
movl 56(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%ecx
|
||||
movl %edx,%r11d
|
||||
roll $16,%ecx
|
||||
addl %edx,%ecx
|
||||
leal -35309556(%rbx,%r10,1),%ebx
|
||||
xorl %eax,%r11d
|
||||
movl 4(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%ebx
|
||||
roll $23,%ebx
|
||||
movl %ecx,%r11d
|
||||
addl %ecx,%ebx
|
||||
leal -1530992060(%rax,%r10,1),%eax
|
||||
xorl %edx,%r11d
|
||||
movl 16(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%eax
|
||||
movl %ebx,%r11d
|
||||
roll $4,%eax
|
||||
addl %ebx,%eax
|
||||
leal 1272893353(%rdx,%r10,1),%edx
|
||||
xorl %ecx,%r11d
|
||||
movl 28(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%edx
|
||||
roll $11,%edx
|
||||
movl %eax,%r11d
|
||||
addl %eax,%edx
|
||||
leal -155497632(%rcx,%r10,1),%ecx
|
||||
xorl %ebx,%r11d
|
||||
movl 40(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%ecx
|
||||
movl %edx,%r11d
|
||||
roll $16,%ecx
|
||||
addl %edx,%ecx
|
||||
leal -1094730640(%rbx,%r10,1),%ebx
|
||||
xorl %eax,%r11d
|
||||
movl 52(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%ebx
|
||||
roll $23,%ebx
|
||||
movl %ecx,%r11d
|
||||
addl %ecx,%ebx
|
||||
leal 681279174(%rax,%r10,1),%eax
|
||||
xorl %edx,%r11d
|
||||
movl 0(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%eax
|
||||
movl %ebx,%r11d
|
||||
roll $4,%eax
|
||||
addl %ebx,%eax
|
||||
leal -358537222(%rdx,%r10,1),%edx
|
||||
xorl %ecx,%r11d
|
||||
movl 12(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%edx
|
||||
roll $11,%edx
|
||||
movl %eax,%r11d
|
||||
addl %eax,%edx
|
||||
leal -722521979(%rcx,%r10,1),%ecx
|
||||
xorl %ebx,%r11d
|
||||
movl 24(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%ecx
|
||||
movl %edx,%r11d
|
||||
roll $16,%ecx
|
||||
addl %edx,%ecx
|
||||
leal 76029189(%rbx,%r10,1),%ebx
|
||||
xorl %eax,%r11d
|
||||
movl 36(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%ebx
|
||||
roll $23,%ebx
|
||||
movl %ecx,%r11d
|
||||
addl %ecx,%ebx
|
||||
leal -640364487(%rax,%r10,1),%eax
|
||||
xorl %edx,%r11d
|
||||
movl 48(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%eax
|
||||
movl %ebx,%r11d
|
||||
roll $4,%eax
|
||||
addl %ebx,%eax
|
||||
leal -421815835(%rdx,%r10,1),%edx
|
||||
xorl %ecx,%r11d
|
||||
movl 60(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%edx
|
||||
roll $11,%edx
|
||||
movl %eax,%r11d
|
||||
addl %eax,%edx
|
||||
leal 530742520(%rcx,%r10,1),%ecx
|
||||
xorl %ebx,%r11d
|
||||
movl 8(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%ecx
|
||||
movl %edx,%r11d
|
||||
roll $16,%ecx
|
||||
addl %edx,%ecx
|
||||
leal -995338651(%rbx,%r10,1),%ebx
|
||||
xorl %eax,%r11d
|
||||
movl 0(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%ebx
|
||||
roll $23,%ebx
|
||||
movl %ecx,%r11d
|
||||
addl %ecx,%ebx
|
||||
movl $0xffffffff,%r11d
|
||||
xorl %edx,%r11d
|
||||
leal -198630844(%rax,%r10,1),%eax
|
||||
orl %ebx,%r11d
|
||||
movl 28(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%eax
|
||||
movl $0xffffffff,%r11d
|
||||
roll $6,%eax
|
||||
xorl %ecx,%r11d
|
||||
addl %ebx,%eax
|
||||
leal 1126891415(%rdx,%r10,1),%edx
|
||||
orl %eax,%r11d
|
||||
movl 56(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%edx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $10,%edx
|
||||
xorl %ebx,%r11d
|
||||
addl %eax,%edx
|
||||
leal -1416354905(%rcx,%r10,1),%ecx
|
||||
orl %edx,%r11d
|
||||
movl 20(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%ecx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $15,%ecx
|
||||
xorl %eax,%r11d
|
||||
addl %edx,%ecx
|
||||
leal -57434055(%rbx,%r10,1),%ebx
|
||||
orl %ecx,%r11d
|
||||
movl 48(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%ebx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $21,%ebx
|
||||
xorl %edx,%r11d
|
||||
addl %ecx,%ebx
|
||||
leal 1700485571(%rax,%r10,1),%eax
|
||||
orl %ebx,%r11d
|
||||
movl 12(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%eax
|
||||
movl $0xffffffff,%r11d
|
||||
roll $6,%eax
|
||||
xorl %ecx,%r11d
|
||||
addl %ebx,%eax
|
||||
leal -1894986606(%rdx,%r10,1),%edx
|
||||
orl %eax,%r11d
|
||||
movl 40(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%edx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $10,%edx
|
||||
xorl %ebx,%r11d
|
||||
addl %eax,%edx
|
||||
leal -1051523(%rcx,%r10,1),%ecx
|
||||
orl %edx,%r11d
|
||||
movl 4(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%ecx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $15,%ecx
|
||||
xorl %eax,%r11d
|
||||
addl %edx,%ecx
|
||||
leal -2054922799(%rbx,%r10,1),%ebx
|
||||
orl %ecx,%r11d
|
||||
movl 32(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%ebx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $21,%ebx
|
||||
xorl %edx,%r11d
|
||||
addl %ecx,%ebx
|
||||
leal 1873313359(%rax,%r10,1),%eax
|
||||
orl %ebx,%r11d
|
||||
movl 60(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%eax
|
||||
movl $0xffffffff,%r11d
|
||||
roll $6,%eax
|
||||
xorl %ecx,%r11d
|
||||
addl %ebx,%eax
|
||||
leal -30611744(%rdx,%r10,1),%edx
|
||||
orl %eax,%r11d
|
||||
movl 24(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%edx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $10,%edx
|
||||
xorl %ebx,%r11d
|
||||
addl %eax,%edx
|
||||
leal -1560198380(%rcx,%r10,1),%ecx
|
||||
orl %edx,%r11d
|
||||
movl 52(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%ecx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $15,%ecx
|
||||
xorl %eax,%r11d
|
||||
addl %edx,%ecx
|
||||
leal 1309151649(%rbx,%r10,1),%ebx
|
||||
orl %ecx,%r11d
|
||||
movl 16(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%ebx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $21,%ebx
|
||||
xorl %edx,%r11d
|
||||
addl %ecx,%ebx
|
||||
leal -145523070(%rax,%r10,1),%eax
|
||||
orl %ebx,%r11d
|
||||
movl 44(%rsi),%r10d
|
||||
xorl %ecx,%r11d
|
||||
addl %r11d,%eax
|
||||
movl $0xffffffff,%r11d
|
||||
roll $6,%eax
|
||||
xorl %ecx,%r11d
|
||||
addl %ebx,%eax
|
||||
leal -1120210379(%rdx,%r10,1),%edx
|
||||
orl %eax,%r11d
|
||||
movl 8(%rsi),%r10d
|
||||
xorl %ebx,%r11d
|
||||
addl %r11d,%edx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $10,%edx
|
||||
xorl %ebx,%r11d
|
||||
addl %eax,%edx
|
||||
leal 718787259(%rcx,%r10,1),%ecx
|
||||
orl %edx,%r11d
|
||||
movl 36(%rsi),%r10d
|
||||
xorl %eax,%r11d
|
||||
addl %r11d,%ecx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $15,%ecx
|
||||
xorl %eax,%r11d
|
||||
addl %edx,%ecx
|
||||
leal -343485551(%rbx,%r10,1),%ebx
|
||||
orl %ecx,%r11d
|
||||
movl 0(%rsi),%r10d
|
||||
xorl %edx,%r11d
|
||||
addl %r11d,%ebx
|
||||
movl $0xffffffff,%r11d
|
||||
roll $21,%ebx
|
||||
xorl %edx,%r11d
|
||||
addl %ecx,%ebx
|
||||
|
||||
addl %r8d,%eax
|
||||
addl %r9d,%ebx
|
||||
addl %r14d,%ecx
|
||||
addl %r15d,%edx
|
||||
|
||||
|
||||
addq $64,%rsi
|
||||
cmpq %rdi,%rsi
|
||||
jb .Lloop
|
||||
|
||||
|
||||
.Lend:
|
||||
movl %eax,0(%rbp)
|
||||
movl %ebx,4(%rbp)
|
||||
movl %ecx,8(%rbp)
|
||||
movl %edx,12(%rbp)
|
||||
|
||||
movq (%rsp),%r15
|
||||
.cfi_restore %r15
|
||||
movq 8(%rsp),%r14
|
||||
.cfi_restore %r14
|
||||
movq 16(%rsp),%r12
|
||||
.cfi_restore %r12
|
||||
movq 24(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
movq 32(%rsp),%rbp
|
||||
.cfi_restore %rbp
|
||||
addq $40,%rsp
|
||||
.cfi_adjust_cfa_offset -40
|
||||
.Lepilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size ossl_md5_block_asm_data_order,.-ossl_md5_block_asm_data_order
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,657 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from rc4-x86_64.pl. */
|
||||
.text
|
||||
|
||||
|
||||
.globl RC4
|
||||
.type RC4,@function
|
||||
.align 16
|
||||
RC4:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
orq %rsi,%rsi
|
||||
jne .Lentry
|
||||
.byte 0xf3,0xc3
|
||||
.Lentry:
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-24
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-32
|
||||
.Lprologue:
|
||||
movq %rsi,%r11
|
||||
movq %rdx,%r12
|
||||
movq %rcx,%r13
|
||||
xorq %r10,%r10
|
||||
xorq %rcx,%rcx
|
||||
|
||||
leaq 8(%rdi),%rdi
|
||||
movb -8(%rdi),%r10b
|
||||
movb -4(%rdi),%cl
|
||||
cmpl $-1,256(%rdi)
|
||||
je .LRC4_CHAR
|
||||
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||||
xorq %rbx,%rbx
|
||||
incb %r10b
|
||||
subq %r10,%rbx
|
||||
subq %r12,%r13
|
||||
movl (%rdi,%r10,4),%eax
|
||||
testq $-16,%r11
|
||||
jz .Lloop1
|
||||
btl $30,%r8d
|
||||
jc .Lintel
|
||||
andq $7,%rbx
|
||||
leaq 1(%r10),%rsi
|
||||
jz .Loop8
|
||||
subq %rbx,%r11
|
||||
.Loop8_warmup:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r12,%r13,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %rbx
|
||||
jnz .Loop8_warmup
|
||||
|
||||
leaq 1(%r10),%rsi
|
||||
jmp .Loop8
|
||||
.align 16
|
||||
.Loop8:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 0(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,0(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 4(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,4(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 8(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,8(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 12(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,12(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 16(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,16(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 20(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,20(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 24(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,24(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb $8,%sil
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl -4(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,28(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb $8,%r10b
|
||||
rorq $8,%r8
|
||||
subq $8,%r11
|
||||
|
||||
xorq (%r12),%r8
|
||||
movq %r8,(%r12,%r13,1)
|
||||
leaq 8(%r12),%r12
|
||||
|
||||
testq $-8,%r11
|
||||
jnz .Loop8
|
||||
cmpq $0,%r11
|
||||
jne .Lloop1
|
||||
jmp .Lexit
|
||||
|
||||
.align 16
|
||||
.Lintel:
|
||||
testq $-32,%r11
|
||||
jz .Lloop1
|
||||
andq $15,%rbx
|
||||
jz .Loop16_is_hot
|
||||
subq %rbx,%r11
|
||||
.Loop16_warmup:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r12,%r13,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %rbx
|
||||
jnz .Loop16_warmup
|
||||
|
||||
movq %rcx,%rbx
|
||||
xorq %rcx,%rcx
|
||||
movb %bl,%cl
|
||||
|
||||
.Loop16_is_hot:
|
||||
leaq (%rdi,%r10,4),%rsi
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm0,%xmm0
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 4(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,0(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||||
jmp .Loop16_enter
|
||||
.align 16
|
||||
.Loop16:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm0,%xmm2
|
||||
psllq $8,%xmm1
|
||||
pxor %xmm0,%xmm0
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 4(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,0(%rsi)
|
||||
pxor %xmm1,%xmm2
|
||||
addb %bl,%cl
|
||||
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||||
movdqu %xmm2,(%r12,%r13,1)
|
||||
leaq 16(%r12),%r12
|
||||
.Loop16_enter:
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm1,%xmm1
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 8(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,4(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $0,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 12(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,8(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $1,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 16(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,12(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $1,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 20(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,16(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $2,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 24(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,20(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $2,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 28(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,24(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $3,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 32(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,28(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $3,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 36(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,32(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $4,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 40(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,36(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $4,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 44(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,40(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $5,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 48(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,44(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $5,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 52(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,48(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $6,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 56(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,52(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $6,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 60(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,56(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $7,(%rdi,%rax,4),%xmm0
|
||||
addb $16,%r10b
|
||||
movdqu (%r12),%xmm2
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,60(%rsi)
|
||||
leaq (%rdi,%r10,4),%rsi
|
||||
pinsrw $7,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rsi),%eax
|
||||
movq %rcx,%rbx
|
||||
xorq %rcx,%rcx
|
||||
subq $16,%r11
|
||||
movb %bl,%cl
|
||||
testq $-16,%r11
|
||||
jnz .Loop16
|
||||
|
||||
psllq $8,%xmm1
|
||||
pxor %xmm0,%xmm2
|
||||
pxor %xmm1,%xmm2
|
||||
movdqu %xmm2,(%r12,%r13,1)
|
||||
leaq 16(%r12),%r12
|
||||
|
||||
cmpq $0,%r11
|
||||
jne .Lloop1
|
||||
jmp .Lexit
|
||||
|
||||
.align 16
|
||||
.Lloop1:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r12,%r13,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %r11
|
||||
jnz .Lloop1
|
||||
jmp .Lexit
|
||||
|
||||
.align 16
|
||||
.LRC4_CHAR:
|
||||
addb $1,%r10b
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
testq $-8,%r11
|
||||
jz .Lcloop1
|
||||
jmp .Lcloop8
|
||||
.align 16
|
||||
.Lcloop8:
|
||||
movl (%r12),%r8d
|
||||
movl 4(%r12),%r9d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov0
|
||||
movq %rax,%rbx
|
||||
.Lcmov0:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov1
|
||||
movq %rbx,%rax
|
||||
.Lcmov1:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov2
|
||||
movq %rax,%rbx
|
||||
.Lcmov2:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov3
|
||||
movq %rbx,%rax
|
||||
.Lcmov3:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov4
|
||||
movq %rax,%rbx
|
||||
.Lcmov4:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov5
|
||||
movq %rbx,%rax
|
||||
.Lcmov5:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov6
|
||||
movq %rax,%rbx
|
||||
.Lcmov6:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov7
|
||||
movq %rbx,%rax
|
||||
.Lcmov7:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
leaq -8(%r11),%r11
|
||||
movl %r8d,(%r13)
|
||||
leaq 8(%r12),%r12
|
||||
movl %r9d,4(%r13)
|
||||
leaq 8(%r13),%r13
|
||||
|
||||
testq $-8,%r11
|
||||
jnz .Lcloop8
|
||||
cmpq $0,%r11
|
||||
jne .Lcloop1
|
||||
jmp .Lexit
|
||||
.align 16
|
||||
.Lcloop1:
|
||||
addb %al,%cl
|
||||
movzbl %cl,%ecx
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
addb %al,%dl
|
||||
addb $1,%r10b
|
||||
movzbl %dl,%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%rdx,1),%edx
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
xorb (%r12),%dl
|
||||
leaq 1(%r12),%r12
|
||||
movb %dl,(%r13)
|
||||
leaq 1(%r13),%r13
|
||||
subq $1,%r11
|
||||
jnz .Lcloop1
|
||||
jmp .Lexit
|
||||
|
||||
.align 16
|
||||
.Lexit:
|
||||
subb $1,%r10b
|
||||
movl %r10d,-8(%rdi)
|
||||
movl %ecx,-4(%rdi)
|
||||
|
||||
movq (%rsp),%r13
|
||||
.cfi_restore %r13
|
||||
movq 8(%rsp),%r12
|
||||
.cfi_restore %r12
|
||||
movq 16(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
addq $24,%rsp
|
||||
.cfi_adjust_cfa_offset -24
|
||||
.Lepilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size RC4,.-RC4
|
||||
.globl RC4_set_key
|
||||
.type RC4_set_key,@function
|
||||
.align 16
|
||||
RC4_set_key:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
leaq 8(%rdi),%rdi
|
||||
leaq (%rdx,%rsi,1),%rdx
|
||||
negq %rsi
|
||||
movq %rsi,%rcx
|
||||
xorl %eax,%eax
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
|
||||
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||||
btl $20,%r8d
|
||||
jc .Lc1stloop
|
||||
jmp .Lw1stloop
|
||||
|
||||
.align 16
|
||||
.Lw1stloop:
|
||||
movl %eax,(%rdi,%rax,4)
|
||||
addb $1,%al
|
||||
jnc .Lw1stloop
|
||||
|
||||
xorq %r9,%r9
|
||||
xorq %r8,%r8
|
||||
.align 16
|
||||
.Lw2ndloop:
|
||||
movl (%rdi,%r9,4),%r10d
|
||||
addb (%rdx,%rsi,1),%r8b
|
||||
addb %r10b,%r8b
|
||||
addq $1,%rsi
|
||||
movl (%rdi,%r8,4),%r11d
|
||||
cmovzq %rcx,%rsi
|
||||
movl %r10d,(%rdi,%r8,4)
|
||||
movl %r11d,(%rdi,%r9,4)
|
||||
addb $1,%r9b
|
||||
jnc .Lw2ndloop
|
||||
jmp .Lexit_key
|
||||
|
||||
.align 16
|
||||
.Lc1stloop:
|
||||
movb %al,(%rdi,%rax,1)
|
||||
addb $1,%al
|
||||
jnc .Lc1stloop
|
||||
|
||||
xorq %r9,%r9
|
||||
xorq %r8,%r8
|
||||
.align 16
|
||||
.Lc2ndloop:
|
||||
movb (%rdi,%r9,1),%r10b
|
||||
addb (%rdx,%rsi,1),%r8b
|
||||
addb %r10b,%r8b
|
||||
addq $1,%rsi
|
||||
movb (%rdi,%r8,1),%r11b
|
||||
jnz .Lcnowrap
|
||||
movq %rcx,%rsi
|
||||
.Lcnowrap:
|
||||
movb %r10b,(%rdi,%r8,1)
|
||||
movb %r11b,(%rdi,%r9,1)
|
||||
addb $1,%r9b
|
||||
jnc .Lc2ndloop
|
||||
movl $-1,256(%rdi)
|
||||
|
||||
.align 16
|
||||
.Lexit_key:
|
||||
xorl %eax,%eax
|
||||
movl %eax,-8(%rdi)
|
||||
movl %eax,-4(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size RC4_set_key,.-RC4_set_key
|
||||
|
||||
.globl RC4_options
|
||||
.type RC4_options,@function
|
||||
.align 16
|
||||
RC4_options:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
leaq .Lopts(%rip),%rax
|
||||
movl OPENSSL_ia32cap_P(%rip),%edx
|
||||
btl $20,%edx
|
||||
jc .L8xchar
|
||||
btl $30,%edx
|
||||
jnc .Ldone
|
||||
addq $25,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.L8xchar:
|
||||
addq $12,%rax
|
||||
.Ldone:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.align 64
|
||||
.Lopts:
|
||||
.byte 114,99,52,40,56,120,44,105,110,116,41,0
|
||||
.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
|
||||
.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
|
||||
.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
.size RC4_options,.-RC4_options
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,902 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from rsaz-avx512.pl. */
|
||||
|
||||
.globl ossl_rsaz_avx512ifma_eligible
|
||||
.type ossl_rsaz_avx512ifma_eligible,@function
|
||||
.align 32
|
||||
ossl_rsaz_avx512ifma_eligible:
|
||||
movl OPENSSL_ia32cap_P+8(%rip),%ecx
|
||||
xorl %eax,%eax
|
||||
andl $2149777408,%ecx
|
||||
cmpl $2149777408,%ecx
|
||||
cmovel %ecx,%eax
|
||||
.byte 0xf3,0xc3
|
||||
.size ossl_rsaz_avx512ifma_eligible, .-ossl_rsaz_avx512ifma_eligible
|
||||
.text
|
||||
|
||||
.globl ossl_rsaz_amm52x20_x1_256
|
||||
.type ossl_rsaz_amm52x20_x1_256,@function
|
||||
.align 32
|
||||
ossl_rsaz_amm52x20_x1_256:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
.Lrsaz_amm52x20_x1_256_body:
|
||||
|
||||
|
||||
vpxord %ymm0,%ymm0,%ymm0
|
||||
vmovdqa64 %ymm0,%ymm1
|
||||
vmovdqa64 %ymm0,%ymm16
|
||||
vmovdqa64 %ymm0,%ymm17
|
||||
vmovdqa64 %ymm0,%ymm18
|
||||
vmovdqa64 %ymm0,%ymm19
|
||||
|
||||
xorl %r9d,%r9d
|
||||
|
||||
movq %rdx,%r11
|
||||
movq $0xfffffffffffff,%rax
|
||||
|
||||
|
||||
movl $5,%ebx
|
||||
|
||||
.align 32
|
||||
.Lloop5:
|
||||
movq 0(%r11),%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm3
|
||||
movq 0(%rsi),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
movq %r12,%r10
|
||||
adcq $0,%r10
|
||||
|
||||
movq %r8,%r13
|
||||
imulq %r9,%r13
|
||||
andq %rax,%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm4
|
||||
movq 0(%rcx),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
adcq %r12,%r10
|
||||
|
||||
shrq $52,%r9
|
||||
salq $12,%r10
|
||||
orq %r10,%r9
|
||||
|
||||
vpmadd52luq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52luq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52luq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52luq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52luq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52luq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52luq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52luq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52luq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52luq 128(%rcx),%ymm4,%ymm19
|
||||
|
||||
|
||||
valignq $1,%ymm1,%ymm16,%ymm1
|
||||
valignq $1,%ymm16,%ymm17,%ymm16
|
||||
valignq $1,%ymm17,%ymm18,%ymm17
|
||||
valignq $1,%ymm18,%ymm19,%ymm18
|
||||
valignq $1,%ymm19,%ymm0,%ymm19
|
||||
|
||||
vmovq %xmm1,%r13
|
||||
addq %r13,%r9
|
||||
|
||||
vpmadd52huq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52huq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52huq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52huq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52huq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52huq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52huq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52huq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52huq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52huq 128(%rcx),%ymm4,%ymm19
|
||||
movq 8(%r11),%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm3
|
||||
movq 0(%rsi),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
movq %r12,%r10
|
||||
adcq $0,%r10
|
||||
|
||||
movq %r8,%r13
|
||||
imulq %r9,%r13
|
||||
andq %rax,%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm4
|
||||
movq 0(%rcx),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
adcq %r12,%r10
|
||||
|
||||
shrq $52,%r9
|
||||
salq $12,%r10
|
||||
orq %r10,%r9
|
||||
|
||||
vpmadd52luq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52luq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52luq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52luq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52luq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52luq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52luq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52luq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52luq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52luq 128(%rcx),%ymm4,%ymm19
|
||||
|
||||
|
||||
valignq $1,%ymm1,%ymm16,%ymm1
|
||||
valignq $1,%ymm16,%ymm17,%ymm16
|
||||
valignq $1,%ymm17,%ymm18,%ymm17
|
||||
valignq $1,%ymm18,%ymm19,%ymm18
|
||||
valignq $1,%ymm19,%ymm0,%ymm19
|
||||
|
||||
vmovq %xmm1,%r13
|
||||
addq %r13,%r9
|
||||
|
||||
vpmadd52huq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52huq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52huq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52huq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52huq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52huq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52huq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52huq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52huq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52huq 128(%rcx),%ymm4,%ymm19
|
||||
movq 16(%r11),%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm3
|
||||
movq 0(%rsi),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
movq %r12,%r10
|
||||
adcq $0,%r10
|
||||
|
||||
movq %r8,%r13
|
||||
imulq %r9,%r13
|
||||
andq %rax,%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm4
|
||||
movq 0(%rcx),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
adcq %r12,%r10
|
||||
|
||||
shrq $52,%r9
|
||||
salq $12,%r10
|
||||
orq %r10,%r9
|
||||
|
||||
vpmadd52luq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52luq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52luq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52luq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52luq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52luq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52luq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52luq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52luq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52luq 128(%rcx),%ymm4,%ymm19
|
||||
|
||||
|
||||
valignq $1,%ymm1,%ymm16,%ymm1
|
||||
valignq $1,%ymm16,%ymm17,%ymm16
|
||||
valignq $1,%ymm17,%ymm18,%ymm17
|
||||
valignq $1,%ymm18,%ymm19,%ymm18
|
||||
valignq $1,%ymm19,%ymm0,%ymm19
|
||||
|
||||
vmovq %xmm1,%r13
|
||||
addq %r13,%r9
|
||||
|
||||
vpmadd52huq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52huq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52huq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52huq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52huq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52huq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52huq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52huq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52huq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52huq 128(%rcx),%ymm4,%ymm19
|
||||
movq 24(%r11),%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm3
|
||||
movq 0(%rsi),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
movq %r12,%r10
|
||||
adcq $0,%r10
|
||||
|
||||
movq %r8,%r13
|
||||
imulq %r9,%r13
|
||||
andq %rax,%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm4
|
||||
movq 0(%rcx),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
adcq %r12,%r10
|
||||
|
||||
shrq $52,%r9
|
||||
salq $12,%r10
|
||||
orq %r10,%r9
|
||||
|
||||
vpmadd52luq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52luq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52luq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52luq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52luq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52luq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52luq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52luq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52luq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52luq 128(%rcx),%ymm4,%ymm19
|
||||
|
||||
|
||||
valignq $1,%ymm1,%ymm16,%ymm1
|
||||
valignq $1,%ymm16,%ymm17,%ymm16
|
||||
valignq $1,%ymm17,%ymm18,%ymm17
|
||||
valignq $1,%ymm18,%ymm19,%ymm18
|
||||
valignq $1,%ymm19,%ymm0,%ymm19
|
||||
|
||||
vmovq %xmm1,%r13
|
||||
addq %r13,%r9
|
||||
|
||||
vpmadd52huq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52huq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52huq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52huq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52huq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52huq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52huq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52huq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52huq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52huq 128(%rcx),%ymm4,%ymm19
|
||||
leaq 32(%r11),%r11
|
||||
decl %ebx
|
||||
jne .Lloop5
|
||||
|
||||
vmovdqa64 .Lmask52x4(%rip),%ymm4
|
||||
|
||||
vpbroadcastq %r9,%ymm3
|
||||
vpblendd $3,%ymm3,%ymm1,%ymm1
|
||||
|
||||
|
||||
|
||||
vpsrlq $52,%ymm1,%ymm24
|
||||
vpsrlq $52,%ymm16,%ymm25
|
||||
vpsrlq $52,%ymm17,%ymm26
|
||||
vpsrlq $52,%ymm18,%ymm27
|
||||
vpsrlq $52,%ymm19,%ymm28
|
||||
|
||||
|
||||
valignq $3,%ymm27,%ymm28,%ymm28
|
||||
valignq $3,%ymm26,%ymm27,%ymm27
|
||||
valignq $3,%ymm25,%ymm26,%ymm26
|
||||
valignq $3,%ymm24,%ymm25,%ymm25
|
||||
valignq $3,%ymm0,%ymm24,%ymm24
|
||||
|
||||
|
||||
vpandq %ymm4,%ymm1,%ymm1
|
||||
vpandq %ymm4,%ymm16,%ymm16
|
||||
vpandq %ymm4,%ymm17,%ymm17
|
||||
vpandq %ymm4,%ymm18,%ymm18
|
||||
vpandq %ymm4,%ymm19,%ymm19
|
||||
|
||||
|
||||
vpaddq %ymm24,%ymm1,%ymm1
|
||||
vpaddq %ymm25,%ymm16,%ymm16
|
||||
vpaddq %ymm26,%ymm17,%ymm17
|
||||
vpaddq %ymm27,%ymm18,%ymm18
|
||||
vpaddq %ymm28,%ymm19,%ymm19
|
||||
|
||||
|
||||
|
||||
vpcmpuq $1,%ymm1,%ymm4,%k1
|
||||
vpcmpuq $1,%ymm16,%ymm4,%k2
|
||||
vpcmpuq $1,%ymm17,%ymm4,%k3
|
||||
vpcmpuq $1,%ymm18,%ymm4,%k4
|
||||
vpcmpuq $1,%ymm19,%ymm4,%k5
|
||||
kmovb %k1,%r14d
|
||||
kmovb %k2,%r13d
|
||||
kmovb %k3,%r12d
|
||||
kmovb %k4,%r11d
|
||||
kmovb %k5,%r10d
|
||||
|
||||
|
||||
vpcmpuq $0,%ymm1,%ymm4,%k1
|
||||
vpcmpuq $0,%ymm16,%ymm4,%k2
|
||||
vpcmpuq $0,%ymm17,%ymm4,%k3
|
||||
vpcmpuq $0,%ymm18,%ymm4,%k4
|
||||
vpcmpuq $0,%ymm19,%ymm4,%k5
|
||||
kmovb %k1,%r9d
|
||||
kmovb %k2,%r8d
|
||||
kmovb %k3,%ebx
|
||||
kmovb %k4,%ecx
|
||||
kmovb %k5,%edx
|
||||
|
||||
|
||||
|
||||
shlb $4,%r13b
|
||||
orb %r13b,%r14b
|
||||
shlb $4,%r11b
|
||||
orb %r11b,%r12b
|
||||
|
||||
addb %r14b,%r14b
|
||||
adcb %r12b,%r12b
|
||||
adcb %r10b,%r10b
|
||||
|
||||
shlb $4,%r8b
|
||||
orb %r8b,%r9b
|
||||
shlb $4,%cl
|
||||
orb %cl,%bl
|
||||
|
||||
addb %r9b,%r14b
|
||||
adcb %bl,%r12b
|
||||
adcb %dl,%r10b
|
||||
|
||||
xorb %r9b,%r14b
|
||||
xorb %bl,%r12b
|
||||
xorb %dl,%r10b
|
||||
|
||||
kmovb %r14d,%k1
|
||||
shrb $4,%r14b
|
||||
kmovb %r14d,%k2
|
||||
kmovb %r12d,%k3
|
||||
shrb $4,%r12b
|
||||
kmovb %r12d,%k4
|
||||
kmovb %r10d,%k5
|
||||
|
||||
|
||||
vpsubq %ymm4,%ymm1,%ymm1{%k1}
|
||||
vpsubq %ymm4,%ymm16,%ymm16{%k2}
|
||||
vpsubq %ymm4,%ymm17,%ymm17{%k3}
|
||||
vpsubq %ymm4,%ymm18,%ymm18{%k4}
|
||||
vpsubq %ymm4,%ymm19,%ymm19{%k5}
|
||||
|
||||
vpandq %ymm4,%ymm1,%ymm1
|
||||
vpandq %ymm4,%ymm16,%ymm16
|
||||
vpandq %ymm4,%ymm17,%ymm17
|
||||
vpandq %ymm4,%ymm18,%ymm18
|
||||
vpandq %ymm4,%ymm19,%ymm19
|
||||
|
||||
vmovdqu64 %ymm1,(%rdi)
|
||||
vmovdqu64 %ymm16,32(%rdi)
|
||||
vmovdqu64 %ymm17,64(%rdi)
|
||||
vmovdqu64 %ymm18,96(%rdi)
|
||||
vmovdqu64 %ymm19,128(%rdi)
|
||||
|
||||
vzeroupper
|
||||
movq 0(%rsp),%r15
|
||||
.cfi_restore %r15
|
||||
movq 8(%rsp),%r14
|
||||
.cfi_restore %r14
|
||||
movq 16(%rsp),%r13
|
||||
.cfi_restore %r13
|
||||
movq 24(%rsp),%r12
|
||||
.cfi_restore %r12
|
||||
movq 32(%rsp),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq 40(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq 48(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset -48
|
||||
.Lrsaz_amm52x20_x1_256_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size ossl_rsaz_amm52x20_x1_256, .-ossl_rsaz_amm52x20_x1_256
|
||||
.data
|
||||
.align 32
|
||||
.Lmask52x4:
|
||||
.quad 0xfffffffffffff
|
||||
.quad 0xfffffffffffff
|
||||
.quad 0xfffffffffffff
|
||||
.quad 0xfffffffffffff
|
||||
.text
|
||||
|
||||
.globl ossl_rsaz_amm52x20_x2_256
|
||||
.type ossl_rsaz_amm52x20_x2_256,@function
|
||||
.align 32
|
||||
ossl_rsaz_amm52x20_x2_256:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
.Lrsaz_amm52x20_x2_256_body:
|
||||
|
||||
|
||||
vpxord %ymm0,%ymm0,%ymm0
|
||||
vmovdqa64 %ymm0,%ymm1
|
||||
vmovdqa64 %ymm0,%ymm16
|
||||
vmovdqa64 %ymm0,%ymm17
|
||||
vmovdqa64 %ymm0,%ymm18
|
||||
vmovdqa64 %ymm0,%ymm19
|
||||
vmovdqa64 %ymm0,%ymm2
|
||||
vmovdqa64 %ymm0,%ymm20
|
||||
vmovdqa64 %ymm0,%ymm21
|
||||
vmovdqa64 %ymm0,%ymm22
|
||||
vmovdqa64 %ymm0,%ymm23
|
||||
|
||||
xorl %r9d,%r9d
|
||||
xorl %r15d,%r15d
|
||||
|
||||
movq %rdx,%r11
|
||||
movq $0xfffffffffffff,%rax
|
||||
|
||||
movl $20,%ebx
|
||||
|
||||
.align 32
|
||||
.Lloop20:
|
||||
movq 0(%r11),%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm3
|
||||
movq 0(%rsi),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
movq %r12,%r10
|
||||
adcq $0,%r10
|
||||
|
||||
movq (%r8),%r13
|
||||
imulq %r9,%r13
|
||||
andq %rax,%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm4
|
||||
movq 0(%rcx),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r9
|
||||
adcq %r12,%r10
|
||||
|
||||
shrq $52,%r9
|
||||
salq $12,%r10
|
||||
orq %r10,%r9
|
||||
|
||||
vpmadd52luq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52luq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52luq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52luq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52luq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52luq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52luq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52luq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52luq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52luq 128(%rcx),%ymm4,%ymm19
|
||||
|
||||
|
||||
valignq $1,%ymm1,%ymm16,%ymm1
|
||||
valignq $1,%ymm16,%ymm17,%ymm16
|
||||
valignq $1,%ymm17,%ymm18,%ymm17
|
||||
valignq $1,%ymm18,%ymm19,%ymm18
|
||||
valignq $1,%ymm19,%ymm0,%ymm19
|
||||
|
||||
vmovq %xmm1,%r13
|
||||
addq %r13,%r9
|
||||
|
||||
vpmadd52huq 0(%rsi),%ymm3,%ymm1
|
||||
vpmadd52huq 32(%rsi),%ymm3,%ymm16
|
||||
vpmadd52huq 64(%rsi),%ymm3,%ymm17
|
||||
vpmadd52huq 96(%rsi),%ymm3,%ymm18
|
||||
vpmadd52huq 128(%rsi),%ymm3,%ymm19
|
||||
|
||||
vpmadd52huq 0(%rcx),%ymm4,%ymm1
|
||||
vpmadd52huq 32(%rcx),%ymm4,%ymm16
|
||||
vpmadd52huq 64(%rcx),%ymm4,%ymm17
|
||||
vpmadd52huq 96(%rcx),%ymm4,%ymm18
|
||||
vpmadd52huq 128(%rcx),%ymm4,%ymm19
|
||||
movq 160(%r11),%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm3
|
||||
movq 160(%rsi),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r15
|
||||
movq %r12,%r10
|
||||
adcq $0,%r10
|
||||
|
||||
movq 8(%r8),%r13
|
||||
imulq %r15,%r13
|
||||
andq %rax,%r13
|
||||
|
||||
vpbroadcastq %r13,%ymm4
|
||||
movq 160(%rcx),%rdx
|
||||
mulxq %r13,%r13,%r12
|
||||
addq %r13,%r15
|
||||
adcq %r12,%r10
|
||||
|
||||
shrq $52,%r15
|
||||
salq $12,%r10
|
||||
orq %r10,%r15
|
||||
|
||||
vpmadd52luq 160(%rsi),%ymm3,%ymm2
|
||||
vpmadd52luq 192(%rsi),%ymm3,%ymm20
|
||||
vpmadd52luq 224(%rsi),%ymm3,%ymm21
|
||||
vpmadd52luq 256(%rsi),%ymm3,%ymm22
|
||||
vpmadd52luq 288(%rsi),%ymm3,%ymm23
|
||||
|
||||
vpmadd52luq 160(%rcx),%ymm4,%ymm2
|
||||
vpmadd52luq 192(%rcx),%ymm4,%ymm20
|
||||
vpmadd52luq 224(%rcx),%ymm4,%ymm21
|
||||
vpmadd52luq 256(%rcx),%ymm4,%ymm22
|
||||
vpmadd52luq 288(%rcx),%ymm4,%ymm23
|
||||
|
||||
|
||||
valignq $1,%ymm2,%ymm20,%ymm2
|
||||
valignq $1,%ymm20,%ymm21,%ymm20
|
||||
valignq $1,%ymm21,%ymm22,%ymm21
|
||||
valignq $1,%ymm22,%ymm23,%ymm22
|
||||
valignq $1,%ymm23,%ymm0,%ymm23
|
||||
|
||||
vmovq %xmm2,%r13
|
||||
addq %r13,%r15
|
||||
|
||||
vpmadd52huq 160(%rsi),%ymm3,%ymm2
|
||||
vpmadd52huq 192(%rsi),%ymm3,%ymm20
|
||||
vpmadd52huq 224(%rsi),%ymm3,%ymm21
|
||||
vpmadd52huq 256(%rsi),%ymm3,%ymm22
|
||||
vpmadd52huq 288(%rsi),%ymm3,%ymm23
|
||||
|
||||
vpmadd52huq 160(%rcx),%ymm4,%ymm2
|
||||
vpmadd52huq 192(%rcx),%ymm4,%ymm20
|
||||
vpmadd52huq 224(%rcx),%ymm4,%ymm21
|
||||
vpmadd52huq 256(%rcx),%ymm4,%ymm22
|
||||
vpmadd52huq 288(%rcx),%ymm4,%ymm23
|
||||
leaq 8(%r11),%r11
|
||||
decl %ebx
|
||||
jne .Lloop20
|
||||
|
||||
vmovdqa64 .Lmask52x4(%rip),%ymm4
|
||||
|
||||
vpbroadcastq %r9,%ymm3
|
||||
vpblendd $3,%ymm3,%ymm1,%ymm1
|
||||
|
||||
|
||||
|
||||
vpsrlq $52,%ymm1,%ymm24
|
||||
vpsrlq $52,%ymm16,%ymm25
|
||||
vpsrlq $52,%ymm17,%ymm26
|
||||
vpsrlq $52,%ymm18,%ymm27
|
||||
vpsrlq $52,%ymm19,%ymm28
|
||||
|
||||
|
||||
valignq $3,%ymm27,%ymm28,%ymm28
|
||||
valignq $3,%ymm26,%ymm27,%ymm27
|
||||
valignq $3,%ymm25,%ymm26,%ymm26
|
||||
valignq $3,%ymm24,%ymm25,%ymm25
|
||||
valignq $3,%ymm0,%ymm24,%ymm24
|
||||
|
||||
|
||||
vpandq %ymm4,%ymm1,%ymm1
|
||||
vpandq %ymm4,%ymm16,%ymm16
|
||||
vpandq %ymm4,%ymm17,%ymm17
|
||||
vpandq %ymm4,%ymm18,%ymm18
|
||||
vpandq %ymm4,%ymm19,%ymm19
|
||||
|
||||
|
||||
vpaddq %ymm24,%ymm1,%ymm1
|
||||
vpaddq %ymm25,%ymm16,%ymm16
|
||||
vpaddq %ymm26,%ymm17,%ymm17
|
||||
vpaddq %ymm27,%ymm18,%ymm18
|
||||
vpaddq %ymm28,%ymm19,%ymm19
|
||||
|
||||
|
||||
|
||||
vpcmpuq $1,%ymm1,%ymm4,%k1
|
||||
vpcmpuq $1,%ymm16,%ymm4,%k2
|
||||
vpcmpuq $1,%ymm17,%ymm4,%k3
|
||||
vpcmpuq $1,%ymm18,%ymm4,%k4
|
||||
vpcmpuq $1,%ymm19,%ymm4,%k5
|
||||
kmovb %k1,%r14d
|
||||
kmovb %k2,%r13d
|
||||
kmovb %k3,%r12d
|
||||
kmovb %k4,%r11d
|
||||
kmovb %k5,%r10d
|
||||
|
||||
|
||||
vpcmpuq $0,%ymm1,%ymm4,%k1
|
||||
vpcmpuq $0,%ymm16,%ymm4,%k2
|
||||
vpcmpuq $0,%ymm17,%ymm4,%k3
|
||||
vpcmpuq $0,%ymm18,%ymm4,%k4
|
||||
vpcmpuq $0,%ymm19,%ymm4,%k5
|
||||
kmovb %k1,%r9d
|
||||
kmovb %k2,%r8d
|
||||
kmovb %k3,%ebx
|
||||
kmovb %k4,%ecx
|
||||
kmovb %k5,%edx
|
||||
|
||||
|
||||
|
||||
shlb $4,%r13b
|
||||
orb %r13b,%r14b
|
||||
shlb $4,%r11b
|
||||
orb %r11b,%r12b
|
||||
|
||||
addb %r14b,%r14b
|
||||
adcb %r12b,%r12b
|
||||
adcb %r10b,%r10b
|
||||
|
||||
shlb $4,%r8b
|
||||
orb %r8b,%r9b
|
||||
shlb $4,%cl
|
||||
orb %cl,%bl
|
||||
|
||||
addb %r9b,%r14b
|
||||
adcb %bl,%r12b
|
||||
adcb %dl,%r10b
|
||||
|
||||
xorb %r9b,%r14b
|
||||
xorb %bl,%r12b
|
||||
xorb %dl,%r10b
|
||||
|
||||
kmovb %r14d,%k1
|
||||
shrb $4,%r14b
|
||||
kmovb %r14d,%k2
|
||||
kmovb %r12d,%k3
|
||||
shrb $4,%r12b
|
||||
kmovb %r12d,%k4
|
||||
kmovb %r10d,%k5
|
||||
|
||||
|
||||
vpsubq %ymm4,%ymm1,%ymm1{%k1}
|
||||
vpsubq %ymm4,%ymm16,%ymm16{%k2}
|
||||
vpsubq %ymm4,%ymm17,%ymm17{%k3}
|
||||
vpsubq %ymm4,%ymm18,%ymm18{%k4}
|
||||
vpsubq %ymm4,%ymm19,%ymm19{%k5}
|
||||
|
||||
vpandq %ymm4,%ymm1,%ymm1
|
||||
vpandq %ymm4,%ymm16,%ymm16
|
||||
vpandq %ymm4,%ymm17,%ymm17
|
||||
vpandq %ymm4,%ymm18,%ymm18
|
||||
vpandq %ymm4,%ymm19,%ymm19
|
||||
|
||||
vpbroadcastq %r15,%ymm3
|
||||
vpblendd $3,%ymm3,%ymm2,%ymm2
|
||||
|
||||
|
||||
|
||||
vpsrlq $52,%ymm2,%ymm24
|
||||
vpsrlq $52,%ymm20,%ymm25
|
||||
vpsrlq $52,%ymm21,%ymm26
|
||||
vpsrlq $52,%ymm22,%ymm27
|
||||
vpsrlq $52,%ymm23,%ymm28
|
||||
|
||||
|
||||
valignq $3,%ymm27,%ymm28,%ymm28
|
||||
valignq $3,%ymm26,%ymm27,%ymm27
|
||||
valignq $3,%ymm25,%ymm26,%ymm26
|
||||
valignq $3,%ymm24,%ymm25,%ymm25
|
||||
valignq $3,%ymm0,%ymm24,%ymm24
|
||||
|
||||
|
||||
vpandq %ymm4,%ymm2,%ymm2
|
||||
vpandq %ymm4,%ymm20,%ymm20
|
||||
vpandq %ymm4,%ymm21,%ymm21
|
||||
vpandq %ymm4,%ymm22,%ymm22
|
||||
vpandq %ymm4,%ymm23,%ymm23
|
||||
|
||||
|
||||
vpaddq %ymm24,%ymm2,%ymm2
|
||||
vpaddq %ymm25,%ymm20,%ymm20
|
||||
vpaddq %ymm26,%ymm21,%ymm21
|
||||
vpaddq %ymm27,%ymm22,%ymm22
|
||||
vpaddq %ymm28,%ymm23,%ymm23
|
||||
|
||||
|
||||
|
||||
vpcmpuq $1,%ymm2,%ymm4,%k1
|
||||
vpcmpuq $1,%ymm20,%ymm4,%k2
|
||||
vpcmpuq $1,%ymm21,%ymm4,%k3
|
||||
vpcmpuq $1,%ymm22,%ymm4,%k4
|
||||
vpcmpuq $1,%ymm23,%ymm4,%k5
|
||||
kmovb %k1,%r14d
|
||||
kmovb %k2,%r13d
|
||||
kmovb %k3,%r12d
|
||||
kmovb %k4,%r11d
|
||||
kmovb %k5,%r10d
|
||||
|
||||
|
||||
vpcmpuq $0,%ymm2,%ymm4,%k1
|
||||
vpcmpuq $0,%ymm20,%ymm4,%k2
|
||||
vpcmpuq $0,%ymm21,%ymm4,%k3
|
||||
vpcmpuq $0,%ymm22,%ymm4,%k4
|
||||
vpcmpuq $0,%ymm23,%ymm4,%k5
|
||||
kmovb %k1,%r9d
|
||||
kmovb %k2,%r8d
|
||||
kmovb %k3,%ebx
|
||||
kmovb %k4,%ecx
|
||||
kmovb %k5,%edx
|
||||
|
||||
|
||||
|
||||
shlb $4,%r13b
|
||||
orb %r13b,%r14b
|
||||
shlb $4,%r11b
|
||||
orb %r11b,%r12b
|
||||
|
||||
addb %r14b,%r14b
|
||||
adcb %r12b,%r12b
|
||||
adcb %r10b,%r10b
|
||||
|
||||
shlb $4,%r8b
|
||||
orb %r8b,%r9b
|
||||
shlb $4,%cl
|
||||
orb %cl,%bl
|
||||
|
||||
addb %r9b,%r14b
|
||||
adcb %bl,%r12b
|
||||
adcb %dl,%r10b
|
||||
|
||||
xorb %r9b,%r14b
|
||||
xorb %bl,%r12b
|
||||
xorb %dl,%r10b
|
||||
|
||||
kmovb %r14d,%k1
|
||||
shrb $4,%r14b
|
||||
kmovb %r14d,%k2
|
||||
kmovb %r12d,%k3
|
||||
shrb $4,%r12b
|
||||
kmovb %r12d,%k4
|
||||
kmovb %r10d,%k5
|
||||
|
||||
|
||||
vpsubq %ymm4,%ymm2,%ymm2{%k1}
|
||||
vpsubq %ymm4,%ymm20,%ymm20{%k2}
|
||||
vpsubq %ymm4,%ymm21,%ymm21{%k3}
|
||||
vpsubq %ymm4,%ymm22,%ymm22{%k4}
|
||||
vpsubq %ymm4,%ymm23,%ymm23{%k5}
|
||||
|
||||
vpandq %ymm4,%ymm2,%ymm2
|
||||
vpandq %ymm4,%ymm20,%ymm20
|
||||
vpandq %ymm4,%ymm21,%ymm21
|
||||
vpandq %ymm4,%ymm22,%ymm22
|
||||
vpandq %ymm4,%ymm23,%ymm23
|
||||
|
||||
vmovdqu64 %ymm1,(%rdi)
|
||||
vmovdqu64 %ymm16,32(%rdi)
|
||||
vmovdqu64 %ymm17,64(%rdi)
|
||||
vmovdqu64 %ymm18,96(%rdi)
|
||||
vmovdqu64 %ymm19,128(%rdi)
|
||||
|
||||
vmovdqu64 %ymm2,160(%rdi)
|
||||
vmovdqu64 %ymm20,192(%rdi)
|
||||
vmovdqu64 %ymm21,224(%rdi)
|
||||
vmovdqu64 %ymm22,256(%rdi)
|
||||
vmovdqu64 %ymm23,288(%rdi)
|
||||
|
||||
vzeroupper
|
||||
movq 0(%rsp),%r15
|
||||
.cfi_restore %r15
|
||||
movq 8(%rsp),%r14
|
||||
.cfi_restore %r14
|
||||
movq 16(%rsp),%r13
|
||||
.cfi_restore %r13
|
||||
movq 24(%rsp),%r12
|
||||
.cfi_restore %r12
|
||||
movq 32(%rsp),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq 40(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq 48(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset -48
|
||||
.Lrsaz_amm52x20_x2_256_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size ossl_rsaz_amm52x20_x2_256, .-ossl_rsaz_amm52x20_x2_256
|
||||
.text
|
||||
|
||||
.align 32
|
||||
.globl ossl_extract_multiplier_2x20_win5
|
||||
.type ossl_extract_multiplier_2x20_win5,@function
|
||||
ossl_extract_multiplier_2x20_win5:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
leaq (%rcx,%rcx,4),%rax
|
||||
salq $5,%rax
|
||||
addq %rax,%rsi
|
||||
|
||||
vmovdqa64 .Lones(%rip),%ymm23
|
||||
vpbroadcastq %rdx,%ymm22
|
||||
leaq 10240(%rsi),%rax
|
||||
|
||||
vpxor %xmm4,%xmm4,%xmm4
|
||||
vmovdqa64 %ymm4,%ymm3
|
||||
vmovdqa64 %ymm4,%ymm2
|
||||
vmovdqa64 %ymm4,%ymm1
|
||||
vmovdqa64 %ymm4,%ymm0
|
||||
vmovdqa64 %ymm4,%ymm21
|
||||
|
||||
.align 32
|
||||
.Lloop:
|
||||
vpcmpq $0,%ymm21,%ymm22,%k1
|
||||
addq $320,%rsi
|
||||
vpaddq %ymm23,%ymm21,%ymm21
|
||||
vmovdqu64 -320(%rsi),%ymm16
|
||||
vmovdqu64 -288(%rsi),%ymm17
|
||||
vmovdqu64 -256(%rsi),%ymm18
|
||||
vmovdqu64 -224(%rsi),%ymm19
|
||||
vmovdqu64 -192(%rsi),%ymm20
|
||||
vpblendmq %ymm16,%ymm0,%ymm0{%k1}
|
||||
vpblendmq %ymm17,%ymm1,%ymm1{%k1}
|
||||
vpblendmq %ymm18,%ymm2,%ymm2{%k1}
|
||||
vpblendmq %ymm19,%ymm3,%ymm3{%k1}
|
||||
vpblendmq %ymm20,%ymm4,%ymm4{%k1}
|
||||
cmpq %rsi,%rax
|
||||
jne .Lloop
|
||||
|
||||
vmovdqu64 %ymm0,(%rdi)
|
||||
vmovdqu64 %ymm1,32(%rdi)
|
||||
vmovdqu64 %ymm2,64(%rdi)
|
||||
vmovdqu64 %ymm3,96(%rdi)
|
||||
vmovdqu64 %ymm4,128(%rdi)
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size ossl_extract_multiplier_2x20_win5, .-ossl_extract_multiplier_2x20_win5
|
||||
.data
|
||||
.align 32
|
||||
.Lones:
|
||||
.quad 1,1,1,1
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,880 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from vpaes-x86_64.pl. */
|
||||
.text
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_encrypt_core,@function
|
||||
.align 16
|
||||
_vpaes_encrypt_core:
|
||||
.cfi_startproc
|
||||
movq %rdx,%r9
|
||||
movq $16,%r11
|
||||
movl 240(%rdx),%eax
|
||||
movdqa %xmm9,%xmm1
|
||||
movdqa .Lk_ipt(%rip),%xmm2
|
||||
pandn %xmm0,%xmm1
|
||||
movdqu (%r9),%xmm5
|
||||
psrld $4,%xmm1
|
||||
pand %xmm9,%xmm0
|
||||
.byte 102,15,56,0,208
|
||||
movdqa .Lk_ipt+16(%rip),%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
pxor %xmm5,%xmm2
|
||||
addq $16,%r9
|
||||
pxor %xmm2,%xmm0
|
||||
leaq .Lk_mc_backward(%rip),%r10
|
||||
jmp .Lenc_entry
|
||||
|
||||
.align 16
|
||||
.Lenc_loop:
|
||||
|
||||
movdqa %xmm13,%xmm4
|
||||
movdqa %xmm12,%xmm0
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm5,%xmm4
|
||||
movdqa %xmm15,%xmm5
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa -64(%r11,%r10,1),%xmm1
|
||||
.byte 102,15,56,0,234
|
||||
movdqa (%r11,%r10,1),%xmm4
|
||||
movdqa %xmm14,%xmm2
|
||||
.byte 102,15,56,0,211
|
||||
movdqa %xmm0,%xmm3
|
||||
pxor %xmm5,%xmm2
|
||||
.byte 102,15,56,0,193
|
||||
addq $16,%r9
|
||||
pxor %xmm2,%xmm0
|
||||
.byte 102,15,56,0,220
|
||||
addq $16,%r11
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,193
|
||||
andq $0x30,%r11
|
||||
subq $1,%rax
|
||||
pxor %xmm3,%xmm0
|
||||
|
||||
.Lenc_entry:
|
||||
|
||||
movdqa %xmm9,%xmm1
|
||||
movdqa %xmm11,%xmm5
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm9,%xmm0
|
||||
.byte 102,15,56,0,232
|
||||
movdqa %xmm10,%xmm3
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,217
|
||||
movdqa %xmm10,%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
.byte 102,15,56,0,224
|
||||
movdqa %xmm10,%xmm2
|
||||
pxor %xmm5,%xmm4
|
||||
.byte 102,15,56,0,211
|
||||
movdqa %xmm10,%xmm3
|
||||
pxor %xmm0,%xmm2
|
||||
.byte 102,15,56,0,220
|
||||
movdqu (%r9),%xmm5
|
||||
pxor %xmm1,%xmm3
|
||||
jnz .Lenc_loop
|
||||
|
||||
|
||||
movdqa -96(%r10),%xmm4
|
||||
movdqa -80(%r10),%xmm0
|
||||
.byte 102,15,56,0,226
|
||||
pxor %xmm5,%xmm4
|
||||
.byte 102,15,56,0,195
|
||||
movdqa 64(%r11,%r10,1),%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_decrypt_core,@function
|
||||
.align 16
|
||||
_vpaes_decrypt_core:
|
||||
.cfi_startproc
|
||||
movq %rdx,%r9
|
||||
movl 240(%rdx),%eax
|
||||
movdqa %xmm9,%xmm1
|
||||
movdqa .Lk_dipt(%rip),%xmm2
|
||||
pandn %xmm0,%xmm1
|
||||
movq %rax,%r11
|
||||
psrld $4,%xmm1
|
||||
movdqu (%r9),%xmm5
|
||||
shlq $4,%r11
|
||||
pand %xmm9,%xmm0
|
||||
.byte 102,15,56,0,208
|
||||
movdqa .Lk_dipt+16(%rip),%xmm0
|
||||
xorq $0x30,%r11
|
||||
leaq .Lk_dsbd(%rip),%r10
|
||||
.byte 102,15,56,0,193
|
||||
andq $0x30,%r11
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa .Lk_mc_forward+48(%rip),%xmm5
|
||||
pxor %xmm2,%xmm0
|
||||
addq $16,%r9
|
||||
addq %r10,%r11
|
||||
jmp .Ldec_entry
|
||||
|
||||
.align 16
|
||||
.Ldec_loop:
|
||||
|
||||
|
||||
|
||||
movdqa -32(%r10),%xmm4
|
||||
movdqa -16(%r10),%xmm1
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa 0(%r10),%xmm4
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa 16(%r10),%xmm1
|
||||
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,197
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa 32(%r10),%xmm4
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa 48(%r10),%xmm1
|
||||
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,197
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa 64(%r10),%xmm4
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa 80(%r10),%xmm1
|
||||
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,15,56,0,197
|
||||
.byte 102,15,56,0,203
|
||||
pxor %xmm4,%xmm0
|
||||
addq $16,%r9
|
||||
.byte 102,15,58,15,237,12
|
||||
pxor %xmm1,%xmm0
|
||||
subq $1,%rax
|
||||
|
||||
.Ldec_entry:
|
||||
|
||||
movdqa %xmm9,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
movdqa %xmm11,%xmm2
|
||||
psrld $4,%xmm1
|
||||
pand %xmm9,%xmm0
|
||||
.byte 102,15,56,0,208
|
||||
movdqa %xmm10,%xmm3
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,217
|
||||
movdqa %xmm10,%xmm4
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,224
|
||||
pxor %xmm2,%xmm4
|
||||
movdqa %xmm10,%xmm2
|
||||
.byte 102,15,56,0,211
|
||||
movdqa %xmm10,%xmm3
|
||||
pxor %xmm0,%xmm2
|
||||
.byte 102,15,56,0,220
|
||||
movdqu (%r9),%xmm0
|
||||
pxor %xmm1,%xmm3
|
||||
jnz .Ldec_loop
|
||||
|
||||
|
||||
movdqa 96(%r10),%xmm4
|
||||
.byte 102,15,56,0,226
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa 112(%r10),%xmm0
|
||||
movdqa -352(%r11),%xmm2
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,194
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_schedule_core,@function
|
||||
.align 16
|
||||
_vpaes_schedule_core:
|
||||
.cfi_startproc
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
call _vpaes_preheat
|
||||
movdqa .Lk_rcon(%rip),%xmm8
|
||||
movdqu (%rdi),%xmm0
|
||||
|
||||
|
||||
movdqa %xmm0,%xmm3
|
||||
leaq .Lk_ipt(%rip),%r11
|
||||
call _vpaes_schedule_transform
|
||||
movdqa %xmm0,%xmm7
|
||||
|
||||
leaq .Lk_sr(%rip),%r10
|
||||
testq %rcx,%rcx
|
||||
jnz .Lschedule_am_decrypting
|
||||
|
||||
|
||||
movdqu %xmm0,(%rdx)
|
||||
jmp .Lschedule_go
|
||||
|
||||
.Lschedule_am_decrypting:
|
||||
|
||||
movdqa (%r8,%r10,1),%xmm1
|
||||
.byte 102,15,56,0,217
|
||||
movdqu %xmm3,(%rdx)
|
||||
xorq $0x30,%r8
|
||||
|
||||
.Lschedule_go:
|
||||
cmpl $192,%esi
|
||||
ja .Lschedule_256
|
||||
je .Lschedule_192
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.Lschedule_128:
|
||||
movl $10,%esi
|
||||
|
||||
.Loop_schedule_128:
|
||||
call _vpaes_schedule_round
|
||||
decq %rsi
|
||||
jz .Lschedule_mangle_last
|
||||
call _vpaes_schedule_mangle
|
||||
jmp .Loop_schedule_128
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.align 16
|
||||
.Lschedule_192:
|
||||
movdqu 8(%rdi),%xmm0
|
||||
call _vpaes_schedule_transform
|
||||
movdqa %xmm0,%xmm6
|
||||
pxor %xmm4,%xmm4
|
||||
movhlps %xmm4,%xmm6
|
||||
movl $4,%esi
|
||||
|
||||
.Loop_schedule_192:
|
||||
call _vpaes_schedule_round
|
||||
.byte 102,15,58,15,198,8
|
||||
call _vpaes_schedule_mangle
|
||||
call _vpaes_schedule_192_smear
|
||||
call _vpaes_schedule_mangle
|
||||
call _vpaes_schedule_round
|
||||
decq %rsi
|
||||
jz .Lschedule_mangle_last
|
||||
call _vpaes_schedule_mangle
|
||||
call _vpaes_schedule_192_smear
|
||||
jmp .Loop_schedule_192
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.align 16
|
||||
.Lschedule_256:
|
||||
movdqu 16(%rdi),%xmm0
|
||||
call _vpaes_schedule_transform
|
||||
movl $7,%esi
|
||||
|
||||
.Loop_schedule_256:
|
||||
call _vpaes_schedule_mangle
|
||||
movdqa %xmm0,%xmm6
|
||||
|
||||
|
||||
call _vpaes_schedule_round
|
||||
decq %rsi
|
||||
jz .Lschedule_mangle_last
|
||||
call _vpaes_schedule_mangle
|
||||
|
||||
|
||||
pshufd $0xFF,%xmm0,%xmm0
|
||||
movdqa %xmm7,%xmm5
|
||||
movdqa %xmm6,%xmm7
|
||||
call _vpaes_schedule_low_round
|
||||
movdqa %xmm5,%xmm7
|
||||
|
||||
jmp .Loop_schedule_256
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.align 16
|
||||
.Lschedule_mangle_last:
|
||||
|
||||
leaq .Lk_deskew(%rip),%r11
|
||||
testq %rcx,%rcx
|
||||
jnz .Lschedule_mangle_last_dec
|
||||
|
||||
|
||||
movdqa (%r8,%r10,1),%xmm1
|
||||
.byte 102,15,56,0,193
|
||||
leaq .Lk_opt(%rip),%r11
|
||||
addq $32,%rdx
|
||||
|
||||
.Lschedule_mangle_last_dec:
|
||||
addq $-16,%rdx
|
||||
pxor .Lk_s63(%rip),%xmm0
|
||||
call _vpaes_schedule_transform
|
||||
movdqu %xmm0,(%rdx)
|
||||
|
||||
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_core,.-_vpaes_schedule_core
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_schedule_192_smear,@function
|
||||
.align 16
|
||||
_vpaes_schedule_192_smear:
|
||||
.cfi_startproc
|
||||
pshufd $0x80,%xmm6,%xmm1
|
||||
pshufd $0xFE,%xmm7,%xmm0
|
||||
pxor %xmm1,%xmm6
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm6,%xmm0
|
||||
movhlps %xmm1,%xmm6
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_schedule_round,@function
|
||||
.align 16
|
||||
_vpaes_schedule_round:
|
||||
.cfi_startproc
|
||||
|
||||
pxor %xmm1,%xmm1
|
||||
.byte 102,65,15,58,15,200,15
|
||||
.byte 102,69,15,58,15,192,15
|
||||
pxor %xmm1,%xmm7
|
||||
|
||||
|
||||
pshufd $0xFF,%xmm0,%xmm0
|
||||
.byte 102,15,58,15,192,1
|
||||
|
||||
|
||||
|
||||
|
||||
_vpaes_schedule_low_round:
|
||||
|
||||
movdqa %xmm7,%xmm1
|
||||
pslldq $4,%xmm7
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm7,%xmm1
|
||||
pslldq $8,%xmm7
|
||||
pxor %xmm1,%xmm7
|
||||
pxor .Lk_s63(%rip),%xmm7
|
||||
|
||||
|
||||
movdqa %xmm9,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm9,%xmm0
|
||||
movdqa %xmm11,%xmm2
|
||||
.byte 102,15,56,0,208
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa %xmm10,%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
movdqa %xmm10,%xmm4
|
||||
.byte 102,15,56,0,224
|
||||
pxor %xmm2,%xmm4
|
||||
movdqa %xmm10,%xmm2
|
||||
.byte 102,15,56,0,211
|
||||
pxor %xmm0,%xmm2
|
||||
movdqa %xmm10,%xmm3
|
||||
.byte 102,15,56,0,220
|
||||
pxor %xmm1,%xmm3
|
||||
movdqa %xmm13,%xmm4
|
||||
.byte 102,15,56,0,226
|
||||
movdqa %xmm12,%xmm0
|
||||
.byte 102,15,56,0,195
|
||||
pxor %xmm4,%xmm0
|
||||
|
||||
|
||||
pxor %xmm7,%xmm0
|
||||
movdqa %xmm0,%xmm7
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_round,.-_vpaes_schedule_round
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_schedule_transform,@function
|
||||
.align 16
|
||||
_vpaes_schedule_transform:
|
||||
.cfi_startproc
|
||||
movdqa %xmm9,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm9,%xmm0
|
||||
movdqa (%r11),%xmm2
|
||||
.byte 102,15,56,0,208
|
||||
movdqa 16(%r11),%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
pxor %xmm2,%xmm0
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_schedule_mangle,@function
|
||||
.align 16
|
||||
_vpaes_schedule_mangle:
|
||||
.cfi_startproc
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa .Lk_mc_forward(%rip),%xmm5
|
||||
testq %rcx,%rcx
|
||||
jnz .Lschedule_mangle_dec
|
||||
|
||||
|
||||
addq $16,%rdx
|
||||
pxor .Lk_s63(%rip),%xmm4
|
||||
.byte 102,15,56,0,229
|
||||
movdqa %xmm4,%xmm3
|
||||
.byte 102,15,56,0,229
|
||||
pxor %xmm4,%xmm3
|
||||
.byte 102,15,56,0,229
|
||||
pxor %xmm4,%xmm3
|
||||
|
||||
jmp .Lschedule_mangle_both
|
||||
.align 16
|
||||
.Lschedule_mangle_dec:
|
||||
|
||||
leaq .Lk_dksd(%rip),%r11
|
||||
movdqa %xmm9,%xmm1
|
||||
pandn %xmm4,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm9,%xmm4
|
||||
|
||||
movdqa 0(%r11),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
movdqa 16(%r11),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
|
||||
movdqa 32(%r11),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
pxor %xmm3,%xmm2
|
||||
movdqa 48(%r11),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
|
||||
movdqa 64(%r11),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
pxor %xmm3,%xmm2
|
||||
movdqa 80(%r11),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
|
||||
movdqa 96(%r11),%xmm2
|
||||
.byte 102,15,56,0,212
|
||||
pxor %xmm3,%xmm2
|
||||
movdqa 112(%r11),%xmm3
|
||||
.byte 102,15,56,0,217
|
||||
pxor %xmm2,%xmm3
|
||||
|
||||
addq $-16,%rdx
|
||||
|
||||
.Lschedule_mangle_both:
|
||||
movdqa (%r8,%r10,1),%xmm1
|
||||
.byte 102,15,56,0,217
|
||||
addq $-16,%r8
|
||||
andq $0x30,%r8
|
||||
movdqu %xmm3,(%rdx)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
|
||||
|
||||
|
||||
|
||||
|
||||
.globl vpaes_set_encrypt_key
|
||||
.type vpaes_set_encrypt_key,@function
|
||||
.align 16
|
||||
vpaes_set_encrypt_key:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
movl %esi,%eax
|
||||
shrl $5,%eax
|
||||
addl $5,%eax
|
||||
movl %eax,240(%rdx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl $0x30,%r8d
|
||||
call _vpaes_schedule_core
|
||||
xorl %eax,%eax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
|
||||
|
||||
.globl vpaes_set_decrypt_key
|
||||
.type vpaes_set_decrypt_key,@function
|
||||
.align 16
|
||||
vpaes_set_decrypt_key:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
movl %esi,%eax
|
||||
shrl $5,%eax
|
||||
addl $5,%eax
|
||||
movl %eax,240(%rdx)
|
||||
shll $4,%eax
|
||||
leaq 16(%rdx,%rax,1),%rdx
|
||||
|
||||
movl $1,%ecx
|
||||
movl %esi,%r8d
|
||||
shrl $1,%r8d
|
||||
andl $32,%r8d
|
||||
xorl $32,%r8d
|
||||
call _vpaes_schedule_core
|
||||
xorl %eax,%eax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
|
||||
|
||||
.globl vpaes_encrypt
|
||||
.type vpaes_encrypt,@function
|
||||
.align 16
|
||||
vpaes_encrypt:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
movdqu (%rdi),%xmm0
|
||||
call _vpaes_preheat
|
||||
call _vpaes_encrypt_core
|
||||
movdqu %xmm0,(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_encrypt,.-vpaes_encrypt
|
||||
|
||||
.globl vpaes_decrypt
|
||||
.type vpaes_decrypt,@function
|
||||
.align 16
|
||||
vpaes_decrypt:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
movdqu (%rdi),%xmm0
|
||||
call _vpaes_preheat
|
||||
call _vpaes_decrypt_core
|
||||
movdqu %xmm0,(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_decrypt,.-vpaes_decrypt
|
||||
.globl vpaes_cbc_encrypt
|
||||
.type vpaes_cbc_encrypt,@function
|
||||
.align 16
|
||||
vpaes_cbc_encrypt:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
xchgq %rcx,%rdx
|
||||
subq $16,%rcx
|
||||
jc .Lcbc_abort
|
||||
movdqu (%r8),%xmm6
|
||||
subq %rdi,%rsi
|
||||
call _vpaes_preheat
|
||||
cmpl $0,%r9d
|
||||
je .Lcbc_dec_loop
|
||||
jmp .Lcbc_enc_loop
|
||||
.align 16
|
||||
.Lcbc_enc_loop:
|
||||
movdqu (%rdi),%xmm0
|
||||
pxor %xmm6,%xmm0
|
||||
call _vpaes_encrypt_core
|
||||
movdqa %xmm0,%xmm6
|
||||
movdqu %xmm0,(%rsi,%rdi,1)
|
||||
leaq 16(%rdi),%rdi
|
||||
subq $16,%rcx
|
||||
jnc .Lcbc_enc_loop
|
||||
jmp .Lcbc_done
|
||||
.align 16
|
||||
.Lcbc_dec_loop:
|
||||
movdqu (%rdi),%xmm0
|
||||
movdqa %xmm0,%xmm7
|
||||
call _vpaes_decrypt_core
|
||||
pxor %xmm6,%xmm0
|
||||
movdqa %xmm7,%xmm6
|
||||
movdqu %xmm0,(%rsi,%rdi,1)
|
||||
leaq 16(%rdi),%rdi
|
||||
subq $16,%rcx
|
||||
jnc .Lcbc_dec_loop
|
||||
.Lcbc_done:
|
||||
movdqu %xmm6,(%r8)
|
||||
.Lcbc_abort:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_preheat,@function
|
||||
.align 16
|
||||
_vpaes_preheat:
|
||||
.cfi_startproc
|
||||
leaq .Lk_s0F(%rip),%r10
|
||||
movdqa -32(%r10),%xmm10
|
||||
movdqa -16(%r10),%xmm11
|
||||
movdqa 0(%r10),%xmm9
|
||||
movdqa 48(%r10),%xmm13
|
||||
movdqa 64(%r10),%xmm12
|
||||
movdqa 80(%r10),%xmm15
|
||||
movdqa 96(%r10),%xmm14
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_preheat,.-_vpaes_preheat
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_consts,@object
|
||||
.align 64
|
||||
_vpaes_consts:
|
||||
.Lk_inv:
|
||||
.quad 0x0E05060F0D080180, 0x040703090A0B0C02
|
||||
.quad 0x01040A060F0B0780, 0x030D0E0C02050809
|
||||
|
||||
.Lk_s0F:
|
||||
.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
|
||||
|
||||
.Lk_ipt:
|
||||
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
|
||||
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
|
||||
|
||||
.Lk_sb1:
|
||||
.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
|
||||
.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
|
||||
.Lk_sb2:
|
||||
.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
|
||||
.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
|
||||
.Lk_sbo:
|
||||
.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
|
||||
.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
|
||||
|
||||
.Lk_mc_forward:
|
||||
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
|
||||
.quad 0x080B0A0904070605, 0x000302010C0F0E0D
|
||||
.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
|
||||
.quad 0x000302010C0F0E0D, 0x080B0A0904070605
|
||||
|
||||
.Lk_mc_backward:
|
||||
.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
|
||||
.quad 0x020100030E0D0C0F, 0x0A09080B06050407
|
||||
.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
|
||||
.quad 0x0A09080B06050407, 0x020100030E0D0C0F
|
||||
|
||||
.Lk_sr:
|
||||
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
|
||||
.quad 0x030E09040F0A0500, 0x0B06010C07020D08
|
||||
.quad 0x0F060D040B020900, 0x070E050C030A0108
|
||||
.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
|
||||
|
||||
.Lk_rcon:
|
||||
.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
|
||||
|
||||
.Lk_s63:
|
||||
.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
|
||||
|
||||
.Lk_opt:
|
||||
.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
|
||||
.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
|
||||
|
||||
.Lk_deskew:
|
||||
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
|
||||
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.Lk_dksd:
|
||||
.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
|
||||
.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
|
||||
.Lk_dksb:
|
||||
.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
|
||||
.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
|
||||
.Lk_dkse:
|
||||
.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
|
||||
.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
|
||||
.Lk_dks9:
|
||||
.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
|
||||
.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.Lk_dipt:
|
||||
.quad 0x0F505B040B545F00, 0x154A411E114E451A
|
||||
.quad 0x86E383E660056500, 0x12771772F491F194
|
||||
|
||||
.Lk_dsb9:
|
||||
.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
|
||||
.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
|
||||
.Lk_dsbd:
|
||||
.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
|
||||
.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
|
||||
.Lk_dsbb:
|
||||
.quad 0xD022649296B44200, 0x602646F6B0F2D404
|
||||
.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
|
||||
.Lk_dsbe:
|
||||
.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
|
||||
.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
|
||||
.Lk_dsbo:
|
||||
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
|
||||
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
|
||||
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
|
||||
.align 64
|
||||
.size _vpaes_consts,.-_vpaes_consts
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
@@ -1,901 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from wp-x86_64.pl. */
|
||||
.text
|
||||
|
||||
.globl whirlpool_block
|
||||
.type whirlpool_block,@function
|
||||
.align 16
|
||||
whirlpool_block:
|
||||
.cfi_startproc
|
||||
movq %rsp,%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
|
||||
subq $128+40,%rsp
|
||||
andq $-64,%rsp
|
||||
|
||||
leaq 128(%rsp),%r10
|
||||
movq %rdi,0(%r10)
|
||||
movq %rsi,8(%r10)
|
||||
movq %rdx,16(%r10)
|
||||
movq %rax,32(%r10)
|
||||
.cfi_escape 0x0f,0x06,0x77,0xa0,0x01,0x06,0x23,0x08
|
||||
.Lprologue:
|
||||
|
||||
movq %r10,%rbx
|
||||
leaq .Ltable(%rip),%rbp
|
||||
|
||||
xorq %rcx,%rcx
|
||||
xorq %rdx,%rdx
|
||||
movq 0(%rdi),%r8
|
||||
movq 8(%rdi),%r9
|
||||
movq 16(%rdi),%r10
|
||||
movq 24(%rdi),%r11
|
||||
movq 32(%rdi),%r12
|
||||
movq 40(%rdi),%r13
|
||||
movq 48(%rdi),%r14
|
||||
movq 56(%rdi),%r15
|
||||
.Louterloop:
|
||||
movq %r8,0(%rsp)
|
||||
movq %r9,8(%rsp)
|
||||
movq %r10,16(%rsp)
|
||||
movq %r11,24(%rsp)
|
||||
movq %r12,32(%rsp)
|
||||
movq %r13,40(%rsp)
|
||||
movq %r14,48(%rsp)
|
||||
movq %r15,56(%rsp)
|
||||
xorq 0(%rsi),%r8
|
||||
xorq 8(%rsi),%r9
|
||||
xorq 16(%rsi),%r10
|
||||
xorq 24(%rsi),%r11
|
||||
xorq 32(%rsi),%r12
|
||||
xorq 40(%rsi),%r13
|
||||
xorq 48(%rsi),%r14
|
||||
xorq 56(%rsi),%r15
|
||||
movq %r8,64+0(%rsp)
|
||||
movq %r9,64+8(%rsp)
|
||||
movq %r10,64+16(%rsp)
|
||||
movq %r11,64+24(%rsp)
|
||||
movq %r12,64+32(%rsp)
|
||||
movq %r13,64+40(%rsp)
|
||||
movq %r14,64+48(%rsp)
|
||||
movq %r15,64+56(%rsp)
|
||||
xorq %rsi,%rsi
|
||||
movq %rsi,24(%rbx)
|
||||
jmp .Lround
|
||||
.align 16
|
||||
.Lround:
|
||||
movq 4096(%rbp,%rsi,8),%r8
|
||||
movl 0(%rsp),%eax
|
||||
movl 4(%rsp),%ebx
|
||||
movzbl %al,%ecx
|
||||
movzbl %ah,%edx
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r8
|
||||
movq 7(%rbp,%rdi,8),%r9
|
||||
movl 0+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
movq 6(%rbp,%rsi,8),%r10
|
||||
movq 5(%rbp,%rdi,8),%r11
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
movq 4(%rbp,%rsi,8),%r12
|
||||
movq 3(%rbp,%rdi,8),%r13
|
||||
movl 0+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
movq 2(%rbp,%rsi,8),%r14
|
||||
movq 1(%rbp,%rdi,8),%r15
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r9
|
||||
xorq 7(%rbp,%rdi,8),%r10
|
||||
movl 8+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r11
|
||||
xorq 5(%rbp,%rdi,8),%r12
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r13
|
||||
xorq 3(%rbp,%rdi,8),%r14
|
||||
movl 8+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r15
|
||||
xorq 1(%rbp,%rdi,8),%r8
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r10
|
||||
xorq 7(%rbp,%rdi,8),%r11
|
||||
movl 16+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r12
|
||||
xorq 5(%rbp,%rdi,8),%r13
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r14
|
||||
xorq 3(%rbp,%rdi,8),%r15
|
||||
movl 16+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r8
|
||||
xorq 1(%rbp,%rdi,8),%r9
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r11
|
||||
xorq 7(%rbp,%rdi,8),%r12
|
||||
movl 24+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r13
|
||||
xorq 5(%rbp,%rdi,8),%r14
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r15
|
||||
xorq 3(%rbp,%rdi,8),%r8
|
||||
movl 24+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r9
|
||||
xorq 1(%rbp,%rdi,8),%r10
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r12
|
||||
xorq 7(%rbp,%rdi,8),%r13
|
||||
movl 32+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r14
|
||||
xorq 5(%rbp,%rdi,8),%r15
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r8
|
||||
xorq 3(%rbp,%rdi,8),%r9
|
||||
movl 32+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r10
|
||||
xorq 1(%rbp,%rdi,8),%r11
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r13
|
||||
xorq 7(%rbp,%rdi,8),%r14
|
||||
movl 40+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r15
|
||||
xorq 5(%rbp,%rdi,8),%r8
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r9
|
||||
xorq 3(%rbp,%rdi,8),%r10
|
||||
movl 40+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r11
|
||||
xorq 1(%rbp,%rdi,8),%r12
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r14
|
||||
xorq 7(%rbp,%rdi,8),%r15
|
||||
movl 48+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r8
|
||||
xorq 5(%rbp,%rdi,8),%r9
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r10
|
||||
xorq 3(%rbp,%rdi,8),%r11
|
||||
movl 48+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r12
|
||||
xorq 1(%rbp,%rdi,8),%r13
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r15
|
||||
xorq 7(%rbp,%rdi,8),%r8
|
||||
movl 56+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r9
|
||||
xorq 5(%rbp,%rdi,8),%r10
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r11
|
||||
xorq 3(%rbp,%rdi,8),%r12
|
||||
movl 56+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r13
|
||||
xorq 1(%rbp,%rdi,8),%r14
|
||||
movq %r8,0(%rsp)
|
||||
movq %r9,8(%rsp)
|
||||
movq %r10,16(%rsp)
|
||||
movq %r11,24(%rsp)
|
||||
movq %r12,32(%rsp)
|
||||
movq %r13,40(%rsp)
|
||||
movq %r14,48(%rsp)
|
||||
movq %r15,56(%rsp)
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r8
|
||||
xorq 7(%rbp,%rdi,8),%r9
|
||||
movl 64+0+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r10
|
||||
xorq 5(%rbp,%rdi,8),%r11
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r12
|
||||
xorq 3(%rbp,%rdi,8),%r13
|
||||
movl 64+0+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r14
|
||||
xorq 1(%rbp,%rdi,8),%r15
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r9
|
||||
xorq 7(%rbp,%rdi,8),%r10
|
||||
movl 64+8+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r11
|
||||
xorq 5(%rbp,%rdi,8),%r12
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r13
|
||||
xorq 3(%rbp,%rdi,8),%r14
|
||||
movl 64+8+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r15
|
||||
xorq 1(%rbp,%rdi,8),%r8
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r10
|
||||
xorq 7(%rbp,%rdi,8),%r11
|
||||
movl 64+16+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r12
|
||||
xorq 5(%rbp,%rdi,8),%r13
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r14
|
||||
xorq 3(%rbp,%rdi,8),%r15
|
||||
movl 64+16+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r8
|
||||
xorq 1(%rbp,%rdi,8),%r9
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r11
|
||||
xorq 7(%rbp,%rdi,8),%r12
|
||||
movl 64+24+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r13
|
||||
xorq 5(%rbp,%rdi,8),%r14
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r15
|
||||
xorq 3(%rbp,%rdi,8),%r8
|
||||
movl 64+24+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r9
|
||||
xorq 1(%rbp,%rdi,8),%r10
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r12
|
||||
xorq 7(%rbp,%rdi,8),%r13
|
||||
movl 64+32+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r14
|
||||
xorq 5(%rbp,%rdi,8),%r15
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r8
|
||||
xorq 3(%rbp,%rdi,8),%r9
|
||||
movl 64+32+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r10
|
||||
xorq 1(%rbp,%rdi,8),%r11
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r13
|
||||
xorq 7(%rbp,%rdi,8),%r14
|
||||
movl 64+40+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r15
|
||||
xorq 5(%rbp,%rdi,8),%r8
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r9
|
||||
xorq 3(%rbp,%rdi,8),%r10
|
||||
movl 64+40+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r11
|
||||
xorq 1(%rbp,%rdi,8),%r12
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r14
|
||||
xorq 7(%rbp,%rdi,8),%r15
|
||||
movl 64+48+8(%rsp),%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r8
|
||||
xorq 5(%rbp,%rdi,8),%r9
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r10
|
||||
xorq 3(%rbp,%rdi,8),%r11
|
||||
movl 64+48+8+4(%rsp),%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r12
|
||||
xorq 1(%rbp,%rdi,8),%r13
|
||||
shrl $16,%eax
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 0(%rbp,%rsi,8),%r15
|
||||
xorq 7(%rbp,%rdi,8),%r8
|
||||
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 6(%rbp,%rsi,8),%r9
|
||||
xorq 5(%rbp,%rdi,8),%r10
|
||||
shrl $16,%ebx
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %bl,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %bh,%edx
|
||||
xorq 4(%rbp,%rsi,8),%r11
|
||||
xorq 3(%rbp,%rdi,8),%r12
|
||||
|
||||
leaq (%rcx,%rcx,1),%rsi
|
||||
movzbl %al,%ecx
|
||||
leaq (%rdx,%rdx,1),%rdi
|
||||
movzbl %ah,%edx
|
||||
xorq 2(%rbp,%rsi,8),%r13
|
||||
xorq 1(%rbp,%rdi,8),%r14
|
||||
leaq 128(%rsp),%rbx
|
||||
movq 24(%rbx),%rsi
|
||||
addq $1,%rsi
|
||||
cmpq $10,%rsi
|
||||
je .Lroundsdone
|
||||
|
||||
movq %rsi,24(%rbx)
|
||||
movq %r8,64+0(%rsp)
|
||||
movq %r9,64+8(%rsp)
|
||||
movq %r10,64+16(%rsp)
|
||||
movq %r11,64+24(%rsp)
|
||||
movq %r12,64+32(%rsp)
|
||||
movq %r13,64+40(%rsp)
|
||||
movq %r14,64+48(%rsp)
|
||||
movq %r15,64+56(%rsp)
|
||||
jmp .Lround
|
||||
.align 16
|
||||
.Lroundsdone:
|
||||
movq 0(%rbx),%rdi
|
||||
movq 8(%rbx),%rsi
|
||||
movq 16(%rbx),%rax
|
||||
xorq 0(%rsi),%r8
|
||||
xorq 8(%rsi),%r9
|
||||
xorq 16(%rsi),%r10
|
||||
xorq 24(%rsi),%r11
|
||||
xorq 32(%rsi),%r12
|
||||
xorq 40(%rsi),%r13
|
||||
xorq 48(%rsi),%r14
|
||||
xorq 56(%rsi),%r15
|
||||
xorq 0(%rdi),%r8
|
||||
xorq 8(%rdi),%r9
|
||||
xorq 16(%rdi),%r10
|
||||
xorq 24(%rdi),%r11
|
||||
xorq 32(%rdi),%r12
|
||||
xorq 40(%rdi),%r13
|
||||
xorq 48(%rdi),%r14
|
||||
xorq 56(%rdi),%r15
|
||||
movq %r8,0(%rdi)
|
||||
movq %r9,8(%rdi)
|
||||
movq %r10,16(%rdi)
|
||||
movq %r11,24(%rdi)
|
||||
movq %r12,32(%rdi)
|
||||
movq %r13,40(%rdi)
|
||||
movq %r14,48(%rdi)
|
||||
movq %r15,56(%rdi)
|
||||
leaq 64(%rsi),%rsi
|
||||
subq $1,%rax
|
||||
jz .Lalldone
|
||||
movq %rsi,8(%rbx)
|
||||
movq %rax,16(%rbx)
|
||||
jmp .Louterloop
|
||||
.Lalldone:
|
||||
movq 32(%rbx),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lepilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size whirlpool_block,.-whirlpool_block
|
||||
|
||||
.align 64
|
||||
.type .Ltable,@object
|
||||
.Ltable:
|
||||
.byte 24,24,96,24,192,120,48,216,24,24,96,24,192,120,48,216
|
||||
.byte 35,35,140,35,5,175,70,38,35,35,140,35,5,175,70,38
|
||||
.byte 198,198,63,198,126,249,145,184,198,198,63,198,126,249,145,184
|
||||
.byte 232,232,135,232,19,111,205,251,232,232,135,232,19,111,205,251
|
||||
.byte 135,135,38,135,76,161,19,203,135,135,38,135,76,161,19,203
|
||||
.byte 184,184,218,184,169,98,109,17,184,184,218,184,169,98,109,17
|
||||
.byte 1,1,4,1,8,5,2,9,1,1,4,1,8,5,2,9
|
||||
.byte 79,79,33,79,66,110,158,13,79,79,33,79,66,110,158,13
|
||||
.byte 54,54,216,54,173,238,108,155,54,54,216,54,173,238,108,155
|
||||
.byte 166,166,162,166,89,4,81,255,166,166,162,166,89,4,81,255
|
||||
.byte 210,210,111,210,222,189,185,12,210,210,111,210,222,189,185,12
|
||||
.byte 245,245,243,245,251,6,247,14,245,245,243,245,251,6,247,14
|
||||
.byte 121,121,249,121,239,128,242,150,121,121,249,121,239,128,242,150
|
||||
.byte 111,111,161,111,95,206,222,48,111,111,161,111,95,206,222,48
|
||||
.byte 145,145,126,145,252,239,63,109,145,145,126,145,252,239,63,109
|
||||
.byte 82,82,85,82,170,7,164,248,82,82,85,82,170,7,164,248
|
||||
.byte 96,96,157,96,39,253,192,71,96,96,157,96,39,253,192,71
|
||||
.byte 188,188,202,188,137,118,101,53,188,188,202,188,137,118,101,53
|
||||
.byte 155,155,86,155,172,205,43,55,155,155,86,155,172,205,43,55
|
||||
.byte 142,142,2,142,4,140,1,138,142,142,2,142,4,140,1,138
|
||||
.byte 163,163,182,163,113,21,91,210,163,163,182,163,113,21,91,210
|
||||
.byte 12,12,48,12,96,60,24,108,12,12,48,12,96,60,24,108
|
||||
.byte 123,123,241,123,255,138,246,132,123,123,241,123,255,138,246,132
|
||||
.byte 53,53,212,53,181,225,106,128,53,53,212,53,181,225,106,128
|
||||
.byte 29,29,116,29,232,105,58,245,29,29,116,29,232,105,58,245
|
||||
.byte 224,224,167,224,83,71,221,179,224,224,167,224,83,71,221,179
|
||||
.byte 215,215,123,215,246,172,179,33,215,215,123,215,246,172,179,33
|
||||
.byte 194,194,47,194,94,237,153,156,194,194,47,194,94,237,153,156
|
||||
.byte 46,46,184,46,109,150,92,67,46,46,184,46,109,150,92,67
|
||||
.byte 75,75,49,75,98,122,150,41,75,75,49,75,98,122,150,41
|
||||
.byte 254,254,223,254,163,33,225,93,254,254,223,254,163,33,225,93
|
||||
.byte 87,87,65,87,130,22,174,213,87,87,65,87,130,22,174,213
|
||||
.byte 21,21,84,21,168,65,42,189,21,21,84,21,168,65,42,189
|
||||
.byte 119,119,193,119,159,182,238,232,119,119,193,119,159,182,238,232
|
||||
.byte 55,55,220,55,165,235,110,146,55,55,220,55,165,235,110,146
|
||||
.byte 229,229,179,229,123,86,215,158,229,229,179,229,123,86,215,158
|
||||
.byte 159,159,70,159,140,217,35,19,159,159,70,159,140,217,35,19
|
||||
.byte 240,240,231,240,211,23,253,35,240,240,231,240,211,23,253,35
|
||||
.byte 74,74,53,74,106,127,148,32,74,74,53,74,106,127,148,32
|
||||
.byte 218,218,79,218,158,149,169,68,218,218,79,218,158,149,169,68
|
||||
.byte 88,88,125,88,250,37,176,162,88,88,125,88,250,37,176,162
|
||||
.byte 201,201,3,201,6,202,143,207,201,201,3,201,6,202,143,207
|
||||
.byte 41,41,164,41,85,141,82,124,41,41,164,41,85,141,82,124
|
||||
.byte 10,10,40,10,80,34,20,90,10,10,40,10,80,34,20,90
|
||||
.byte 177,177,254,177,225,79,127,80,177,177,254,177,225,79,127,80
|
||||
.byte 160,160,186,160,105,26,93,201,160,160,186,160,105,26,93,201
|
||||
.byte 107,107,177,107,127,218,214,20,107,107,177,107,127,218,214,20
|
||||
.byte 133,133,46,133,92,171,23,217,133,133,46,133,92,171,23,217
|
||||
.byte 189,189,206,189,129,115,103,60,189,189,206,189,129,115,103,60
|
||||
.byte 93,93,105,93,210,52,186,143,93,93,105,93,210,52,186,143
|
||||
.byte 16,16,64,16,128,80,32,144,16,16,64,16,128,80,32,144
|
||||
.byte 244,244,247,244,243,3,245,7,244,244,247,244,243,3,245,7
|
||||
.byte 203,203,11,203,22,192,139,221,203,203,11,203,22,192,139,221
|
||||
.byte 62,62,248,62,237,198,124,211,62,62,248,62,237,198,124,211
|
||||
.byte 5,5,20,5,40,17,10,45,5,5,20,5,40,17,10,45
|
||||
.byte 103,103,129,103,31,230,206,120,103,103,129,103,31,230,206,120
|
||||
.byte 228,228,183,228,115,83,213,151,228,228,183,228,115,83,213,151
|
||||
.byte 39,39,156,39,37,187,78,2,39,39,156,39,37,187,78,2
|
||||
.byte 65,65,25,65,50,88,130,115,65,65,25,65,50,88,130,115
|
||||
.byte 139,139,22,139,44,157,11,167,139,139,22,139,44,157,11,167
|
||||
.byte 167,167,166,167,81,1,83,246,167,167,166,167,81,1,83,246
|
||||
.byte 125,125,233,125,207,148,250,178,125,125,233,125,207,148,250,178
|
||||
.byte 149,149,110,149,220,251,55,73,149,149,110,149,220,251,55,73
|
||||
.byte 216,216,71,216,142,159,173,86,216,216,71,216,142,159,173,86
|
||||
.byte 251,251,203,251,139,48,235,112,251,251,203,251,139,48,235,112
|
||||
.byte 238,238,159,238,35,113,193,205,238,238,159,238,35,113,193,205
|
||||
.byte 124,124,237,124,199,145,248,187,124,124,237,124,199,145,248,187
|
||||
.byte 102,102,133,102,23,227,204,113,102,102,133,102,23,227,204,113
|
||||
.byte 221,221,83,221,166,142,167,123,221,221,83,221,166,142,167,123
|
||||
.byte 23,23,92,23,184,75,46,175,23,23,92,23,184,75,46,175
|
||||
.byte 71,71,1,71,2,70,142,69,71,71,1,71,2,70,142,69
|
||||
.byte 158,158,66,158,132,220,33,26,158,158,66,158,132,220,33,26
|
||||
.byte 202,202,15,202,30,197,137,212,202,202,15,202,30,197,137,212
|
||||
.byte 45,45,180,45,117,153,90,88,45,45,180,45,117,153,90,88
|
||||
.byte 191,191,198,191,145,121,99,46,191,191,198,191,145,121,99,46
|
||||
.byte 7,7,28,7,56,27,14,63,7,7,28,7,56,27,14,63
|
||||
.byte 173,173,142,173,1,35,71,172,173,173,142,173,1,35,71,172
|
||||
.byte 90,90,117,90,234,47,180,176,90,90,117,90,234,47,180,176
|
||||
.byte 131,131,54,131,108,181,27,239,131,131,54,131,108,181,27,239
|
||||
.byte 51,51,204,51,133,255,102,182,51,51,204,51,133,255,102,182
|
||||
.byte 99,99,145,99,63,242,198,92,99,99,145,99,63,242,198,92
|
||||
.byte 2,2,8,2,16,10,4,18,2,2,8,2,16,10,4,18
|
||||
.byte 170,170,146,170,57,56,73,147,170,170,146,170,57,56,73,147
|
||||
.byte 113,113,217,113,175,168,226,222,113,113,217,113,175,168,226,222
|
||||
.byte 200,200,7,200,14,207,141,198,200,200,7,200,14,207,141,198
|
||||
.byte 25,25,100,25,200,125,50,209,25,25,100,25,200,125,50,209
|
||||
.byte 73,73,57,73,114,112,146,59,73,73,57,73,114,112,146,59
|
||||
.byte 217,217,67,217,134,154,175,95,217,217,67,217,134,154,175,95
|
||||
.byte 242,242,239,242,195,29,249,49,242,242,239,242,195,29,249,49
|
||||
.byte 227,227,171,227,75,72,219,168,227,227,171,227,75,72,219,168
|
||||
.byte 91,91,113,91,226,42,182,185,91,91,113,91,226,42,182,185
|
||||
.byte 136,136,26,136,52,146,13,188,136,136,26,136,52,146,13,188
|
||||
.byte 154,154,82,154,164,200,41,62,154,154,82,154,164,200,41,62
|
||||
.byte 38,38,152,38,45,190,76,11,38,38,152,38,45,190,76,11
|
||||
.byte 50,50,200,50,141,250,100,191,50,50,200,50,141,250,100,191
|
||||
.byte 176,176,250,176,233,74,125,89,176,176,250,176,233,74,125,89
|
||||
.byte 233,233,131,233,27,106,207,242,233,233,131,233,27,106,207,242
|
||||
.byte 15,15,60,15,120,51,30,119,15,15,60,15,120,51,30,119
|
||||
.byte 213,213,115,213,230,166,183,51,213,213,115,213,230,166,183,51
|
||||
.byte 128,128,58,128,116,186,29,244,128,128,58,128,116,186,29,244
|
||||
.byte 190,190,194,190,153,124,97,39,190,190,194,190,153,124,97,39
|
||||
.byte 205,205,19,205,38,222,135,235,205,205,19,205,38,222,135,235
|
||||
.byte 52,52,208,52,189,228,104,137,52,52,208,52,189,228,104,137
|
||||
.byte 72,72,61,72,122,117,144,50,72,72,61,72,122,117,144,50
|
||||
.byte 255,255,219,255,171,36,227,84,255,255,219,255,171,36,227,84
|
||||
.byte 122,122,245,122,247,143,244,141,122,122,245,122,247,143,244,141
|
||||
.byte 144,144,122,144,244,234,61,100,144,144,122,144,244,234,61,100
|
||||
.byte 95,95,97,95,194,62,190,157,95,95,97,95,194,62,190,157
|
||||
.byte 32,32,128,32,29,160,64,61,32,32,128,32,29,160,64,61
|
||||
.byte 104,104,189,104,103,213,208,15,104,104,189,104,103,213,208,15
|
||||
.byte 26,26,104,26,208,114,52,202,26,26,104,26,208,114,52,202
|
||||
.byte 174,174,130,174,25,44,65,183,174,174,130,174,25,44,65,183
|
||||
.byte 180,180,234,180,201,94,117,125,180,180,234,180,201,94,117,125
|
||||
.byte 84,84,77,84,154,25,168,206,84,84,77,84,154,25,168,206
|
||||
.byte 147,147,118,147,236,229,59,127,147,147,118,147,236,229,59,127
|
||||
.byte 34,34,136,34,13,170,68,47,34,34,136,34,13,170,68,47
|
||||
.byte 100,100,141,100,7,233,200,99,100,100,141,100,7,233,200,99
|
||||
.byte 241,241,227,241,219,18,255,42,241,241,227,241,219,18,255,42
|
||||
.byte 115,115,209,115,191,162,230,204,115,115,209,115,191,162,230,204
|
||||
.byte 18,18,72,18,144,90,36,130,18,18,72,18,144,90,36,130
|
||||
.byte 64,64,29,64,58,93,128,122,64,64,29,64,58,93,128,122
|
||||
.byte 8,8,32,8,64,40,16,72,8,8,32,8,64,40,16,72
|
||||
.byte 195,195,43,195,86,232,155,149,195,195,43,195,86,232,155,149
|
||||
.byte 236,236,151,236,51,123,197,223,236,236,151,236,51,123,197,223
|
||||
.byte 219,219,75,219,150,144,171,77,219,219,75,219,150,144,171,77
|
||||
.byte 161,161,190,161,97,31,95,192,161,161,190,161,97,31,95,192
|
||||
.byte 141,141,14,141,28,131,7,145,141,141,14,141,28,131,7,145
|
||||
.byte 61,61,244,61,245,201,122,200,61,61,244,61,245,201,122,200
|
||||
.byte 151,151,102,151,204,241,51,91,151,151,102,151,204,241,51,91
|
||||
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
.byte 207,207,27,207,54,212,131,249,207,207,27,207,54,212,131,249
|
||||
.byte 43,43,172,43,69,135,86,110,43,43,172,43,69,135,86,110
|
||||
.byte 118,118,197,118,151,179,236,225,118,118,197,118,151,179,236,225
|
||||
.byte 130,130,50,130,100,176,25,230,130,130,50,130,100,176,25,230
|
||||
.byte 214,214,127,214,254,169,177,40,214,214,127,214,254,169,177,40
|
||||
.byte 27,27,108,27,216,119,54,195,27,27,108,27,216,119,54,195
|
||||
.byte 181,181,238,181,193,91,119,116,181,181,238,181,193,91,119,116
|
||||
.byte 175,175,134,175,17,41,67,190,175,175,134,175,17,41,67,190
|
||||
.byte 106,106,181,106,119,223,212,29,106,106,181,106,119,223,212,29
|
||||
.byte 80,80,93,80,186,13,160,234,80,80,93,80,186,13,160,234
|
||||
.byte 69,69,9,69,18,76,138,87,69,69,9,69,18,76,138,87
|
||||
.byte 243,243,235,243,203,24,251,56,243,243,235,243,203,24,251,56
|
||||
.byte 48,48,192,48,157,240,96,173,48,48,192,48,157,240,96,173
|
||||
.byte 239,239,155,239,43,116,195,196,239,239,155,239,43,116,195,196
|
||||
.byte 63,63,252,63,229,195,126,218,63,63,252,63,229,195,126,218
|
||||
.byte 85,85,73,85,146,28,170,199,85,85,73,85,146,28,170,199
|
||||
.byte 162,162,178,162,121,16,89,219,162,162,178,162,121,16,89,219
|
||||
.byte 234,234,143,234,3,101,201,233,234,234,143,234,3,101,201,233
|
||||
.byte 101,101,137,101,15,236,202,106,101,101,137,101,15,236,202,106
|
||||
.byte 186,186,210,186,185,104,105,3,186,186,210,186,185,104,105,3
|
||||
.byte 47,47,188,47,101,147,94,74,47,47,188,47,101,147,94,74
|
||||
.byte 192,192,39,192,78,231,157,142,192,192,39,192,78,231,157,142
|
||||
.byte 222,222,95,222,190,129,161,96,222,222,95,222,190,129,161,96
|
||||
.byte 28,28,112,28,224,108,56,252,28,28,112,28,224,108,56,252
|
||||
.byte 253,253,211,253,187,46,231,70,253,253,211,253,187,46,231,70
|
||||
.byte 77,77,41,77,82,100,154,31,77,77,41,77,82,100,154,31
|
||||
.byte 146,146,114,146,228,224,57,118,146,146,114,146,228,224,57,118
|
||||
.byte 117,117,201,117,143,188,234,250,117,117,201,117,143,188,234,250
|
||||
.byte 6,6,24,6,48,30,12,54,6,6,24,6,48,30,12,54
|
||||
.byte 138,138,18,138,36,152,9,174,138,138,18,138,36,152,9,174
|
||||
.byte 178,178,242,178,249,64,121,75,178,178,242,178,249,64,121,75
|
||||
.byte 230,230,191,230,99,89,209,133,230,230,191,230,99,89,209,133
|
||||
.byte 14,14,56,14,112,54,28,126,14,14,56,14,112,54,28,126
|
||||
.byte 31,31,124,31,248,99,62,231,31,31,124,31,248,99,62,231
|
||||
.byte 98,98,149,98,55,247,196,85,98,98,149,98,55,247,196,85
|
||||
.byte 212,212,119,212,238,163,181,58,212,212,119,212,238,163,181,58
|
||||
.byte 168,168,154,168,41,50,77,129,168,168,154,168,41,50,77,129
|
||||
.byte 150,150,98,150,196,244,49,82,150,150,98,150,196,244,49,82
|
||||
.byte 249,249,195,249,155,58,239,98,249,249,195,249,155,58,239,98
|
||||
.byte 197,197,51,197,102,246,151,163,197,197,51,197,102,246,151,163
|
||||
.byte 37,37,148,37,53,177,74,16,37,37,148,37,53,177,74,16
|
||||
.byte 89,89,121,89,242,32,178,171,89,89,121,89,242,32,178,171
|
||||
.byte 132,132,42,132,84,174,21,208,132,132,42,132,84,174,21,208
|
||||
.byte 114,114,213,114,183,167,228,197,114,114,213,114,183,167,228,197
|
||||
.byte 57,57,228,57,213,221,114,236,57,57,228,57,213,221,114,236
|
||||
.byte 76,76,45,76,90,97,152,22,76,76,45,76,90,97,152,22
|
||||
.byte 94,94,101,94,202,59,188,148,94,94,101,94,202,59,188,148
|
||||
.byte 120,120,253,120,231,133,240,159,120,120,253,120,231,133,240,159
|
||||
.byte 56,56,224,56,221,216,112,229,56,56,224,56,221,216,112,229
|
||||
.byte 140,140,10,140,20,134,5,152,140,140,10,140,20,134,5,152
|
||||
.byte 209,209,99,209,198,178,191,23,209,209,99,209,198,178,191,23
|
||||
.byte 165,165,174,165,65,11,87,228,165,165,174,165,65,11,87,228
|
||||
.byte 226,226,175,226,67,77,217,161,226,226,175,226,67,77,217,161
|
||||
.byte 97,97,153,97,47,248,194,78,97,97,153,97,47,248,194,78
|
||||
.byte 179,179,246,179,241,69,123,66,179,179,246,179,241,69,123,66
|
||||
.byte 33,33,132,33,21,165,66,52,33,33,132,33,21,165,66,52
|
||||
.byte 156,156,74,156,148,214,37,8,156,156,74,156,148,214,37,8
|
||||
.byte 30,30,120,30,240,102,60,238,30,30,120,30,240,102,60,238
|
||||
.byte 67,67,17,67,34,82,134,97,67,67,17,67,34,82,134,97
|
||||
.byte 199,199,59,199,118,252,147,177,199,199,59,199,118,252,147,177
|
||||
.byte 252,252,215,252,179,43,229,79,252,252,215,252,179,43,229,79
|
||||
.byte 4,4,16,4,32,20,8,36,4,4,16,4,32,20,8,36
|
||||
.byte 81,81,89,81,178,8,162,227,81,81,89,81,178,8,162,227
|
||||
.byte 153,153,94,153,188,199,47,37,153,153,94,153,188,199,47,37
|
||||
.byte 109,109,169,109,79,196,218,34,109,109,169,109,79,196,218,34
|
||||
.byte 13,13,52,13,104,57,26,101,13,13,52,13,104,57,26,101
|
||||
.byte 250,250,207,250,131,53,233,121,250,250,207,250,131,53,233,121
|
||||
.byte 223,223,91,223,182,132,163,105,223,223,91,223,182,132,163,105
|
||||
.byte 126,126,229,126,215,155,252,169,126,126,229,126,215,155,252,169
|
||||
.byte 36,36,144,36,61,180,72,25,36,36,144,36,61,180,72,25
|
||||
.byte 59,59,236,59,197,215,118,254,59,59,236,59,197,215,118,254
|
||||
.byte 171,171,150,171,49,61,75,154,171,171,150,171,49,61,75,154
|
||||
.byte 206,206,31,206,62,209,129,240,206,206,31,206,62,209,129,240
|
||||
.byte 17,17,68,17,136,85,34,153,17,17,68,17,136,85,34,153
|
||||
.byte 143,143,6,143,12,137,3,131,143,143,6,143,12,137,3,131
|
||||
.byte 78,78,37,78,74,107,156,4,78,78,37,78,74,107,156,4
|
||||
.byte 183,183,230,183,209,81,115,102,183,183,230,183,209,81,115,102
|
||||
.byte 235,235,139,235,11,96,203,224,235,235,139,235,11,96,203,224
|
||||
.byte 60,60,240,60,253,204,120,193,60,60,240,60,253,204,120,193
|
||||
.byte 129,129,62,129,124,191,31,253,129,129,62,129,124,191,31,253
|
||||
.byte 148,148,106,148,212,254,53,64,148,148,106,148,212,254,53,64
|
||||
.byte 247,247,251,247,235,12,243,28,247,247,251,247,235,12,243,28
|
||||
.byte 185,185,222,185,161,103,111,24,185,185,222,185,161,103,111,24
|
||||
.byte 19,19,76,19,152,95,38,139,19,19,76,19,152,95,38,139
|
||||
.byte 44,44,176,44,125,156,88,81,44,44,176,44,125,156,88,81
|
||||
.byte 211,211,107,211,214,184,187,5,211,211,107,211,214,184,187,5
|
||||
.byte 231,231,187,231,107,92,211,140,231,231,187,231,107,92,211,140
|
||||
.byte 110,110,165,110,87,203,220,57,110,110,165,110,87,203,220,57
|
||||
.byte 196,196,55,196,110,243,149,170,196,196,55,196,110,243,149,170
|
||||
.byte 3,3,12,3,24,15,6,27,3,3,12,3,24,15,6,27
|
||||
.byte 86,86,69,86,138,19,172,220,86,86,69,86,138,19,172,220
|
||||
.byte 68,68,13,68,26,73,136,94,68,68,13,68,26,73,136,94
|
||||
.byte 127,127,225,127,223,158,254,160,127,127,225,127,223,158,254,160
|
||||
.byte 169,169,158,169,33,55,79,136,169,169,158,169,33,55,79,136
|
||||
.byte 42,42,168,42,77,130,84,103,42,42,168,42,77,130,84,103
|
||||
.byte 187,187,214,187,177,109,107,10,187,187,214,187,177,109,107,10
|
||||
.byte 193,193,35,193,70,226,159,135,193,193,35,193,70,226,159,135
|
||||
.byte 83,83,81,83,162,2,166,241,83,83,81,83,162,2,166,241
|
||||
.byte 220,220,87,220,174,139,165,114,220,220,87,220,174,139,165,114
|
||||
.byte 11,11,44,11,88,39,22,83,11,11,44,11,88,39,22,83
|
||||
.byte 157,157,78,157,156,211,39,1,157,157,78,157,156,211,39,1
|
||||
.byte 108,108,173,108,71,193,216,43,108,108,173,108,71,193,216,43
|
||||
.byte 49,49,196,49,149,245,98,164,49,49,196,49,149,245,98,164
|
||||
.byte 116,116,205,116,135,185,232,243,116,116,205,116,135,185,232,243
|
||||
.byte 246,246,255,246,227,9,241,21,246,246,255,246,227,9,241,21
|
||||
.byte 70,70,5,70,10,67,140,76,70,70,5,70,10,67,140,76
|
||||
.byte 172,172,138,172,9,38,69,165,172,172,138,172,9,38,69,165
|
||||
.byte 137,137,30,137,60,151,15,181,137,137,30,137,60,151,15,181
|
||||
.byte 20,20,80,20,160,68,40,180,20,20,80,20,160,68,40,180
|
||||
.byte 225,225,163,225,91,66,223,186,225,225,163,225,91,66,223,186
|
||||
.byte 22,22,88,22,176,78,44,166,22,22,88,22,176,78,44,166
|
||||
.byte 58,58,232,58,205,210,116,247,58,58,232,58,205,210,116,247
|
||||
.byte 105,105,185,105,111,208,210,6,105,105,185,105,111,208,210,6
|
||||
.byte 9,9,36,9,72,45,18,65,9,9,36,9,72,45,18,65
|
||||
.byte 112,112,221,112,167,173,224,215,112,112,221,112,167,173,224,215
|
||||
.byte 182,182,226,182,217,84,113,111,182,182,226,182,217,84,113,111
|
||||
.byte 208,208,103,208,206,183,189,30,208,208,103,208,206,183,189,30
|
||||
.byte 237,237,147,237,59,126,199,214,237,237,147,237,59,126,199,214
|
||||
.byte 204,204,23,204,46,219,133,226,204,204,23,204,46,219,133,226
|
||||
.byte 66,66,21,66,42,87,132,104,66,66,21,66,42,87,132,104
|
||||
.byte 152,152,90,152,180,194,45,44,152,152,90,152,180,194,45,44
|
||||
.byte 164,164,170,164,73,14,85,237,164,164,170,164,73,14,85,237
|
||||
.byte 40,40,160,40,93,136,80,117,40,40,160,40,93,136,80,117
|
||||
.byte 92,92,109,92,218,49,184,134,92,92,109,92,218,49,184,134
|
||||
.byte 248,248,199,248,147,63,237,107,248,248,199,248,147,63,237,107
|
||||
.byte 134,134,34,134,68,164,17,194,134,134,34,134,68,164,17,194
|
||||
.byte 24,35,198,232,135,184,1,79
|
||||
.byte 54,166,210,245,121,111,145,82
|
||||
.byte 96,188,155,142,163,12,123,53
|
||||
.byte 29,224,215,194,46,75,254,87
|
||||
.byte 21,119,55,229,159,240,74,218
|
||||
.byte 88,201,41,10,177,160,107,133
|
||||
.byte 189,93,16,244,203,62,5,103
|
||||
.byte 228,39,65,139,167,125,149,216
|
||||
.byte 251,238,124,102,221,23,71,158
|
||||
.byte 202,45,191,7,173,90,131,51
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
@@ -1,824 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from x25519-x86_64.pl. */
|
||||
.text
|
||||
|
||||
.globl x25519_fe51_mul
|
||||
.type x25519_fe51_mul,@function
|
||||
.align 32
|
||||
x25519_fe51_mul:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-16
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
leaq -40(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset 40
|
||||
.Lfe51_mul_body:
|
||||
|
||||
movq 0(%rsi),%rax
|
||||
movq 0(%rdx),%r11
|
||||
movq 8(%rdx),%r12
|
||||
movq 16(%rdx),%r13
|
||||
movq 24(%rdx),%rbp
|
||||
movq 32(%rdx),%r14
|
||||
|
||||
movq %rdi,32(%rsp)
|
||||
movq %rax,%rdi
|
||||
mulq %r11
|
||||
movq %r11,0(%rsp)
|
||||
movq %rax,%rbx
|
||||
movq %rdi,%rax
|
||||
movq %rdx,%rcx
|
||||
mulq %r12
|
||||
movq %r12,8(%rsp)
|
||||
movq %rax,%r8
|
||||
movq %rdi,%rax
|
||||
leaq (%r14,%r14,8),%r15
|
||||
movq %rdx,%r9
|
||||
mulq %r13
|
||||
movq %r13,16(%rsp)
|
||||
movq %rax,%r10
|
||||
movq %rdi,%rax
|
||||
leaq (%r14,%r15,2),%rdi
|
||||
movq %rdx,%r11
|
||||
mulq %rbp
|
||||
movq %rax,%r12
|
||||
movq 0(%rsi),%rax
|
||||
movq %rdx,%r13
|
||||
mulq %r14
|
||||
movq %rax,%r14
|
||||
movq 8(%rsi),%rax
|
||||
movq %rdx,%r15
|
||||
|
||||
mulq %rdi
|
||||
addq %rax,%rbx
|
||||
movq 16(%rsi),%rax
|
||||
adcq %rdx,%rcx
|
||||
mulq %rdi
|
||||
addq %rax,%r8
|
||||
movq 24(%rsi),%rax
|
||||
adcq %rdx,%r9
|
||||
mulq %rdi
|
||||
addq %rax,%r10
|
||||
movq 32(%rsi),%rax
|
||||
adcq %rdx,%r11
|
||||
mulq %rdi
|
||||
imulq $19,%rbp,%rdi
|
||||
addq %rax,%r12
|
||||
movq 8(%rsi),%rax
|
||||
adcq %rdx,%r13
|
||||
mulq %rbp
|
||||
movq 16(%rsp),%rbp
|
||||
addq %rax,%r14
|
||||
movq 16(%rsi),%rax
|
||||
adcq %rdx,%r15
|
||||
|
||||
mulq %rdi
|
||||
addq %rax,%rbx
|
||||
movq 24(%rsi),%rax
|
||||
adcq %rdx,%rcx
|
||||
mulq %rdi
|
||||
addq %rax,%r8
|
||||
movq 32(%rsi),%rax
|
||||
adcq %rdx,%r9
|
||||
mulq %rdi
|
||||
imulq $19,%rbp,%rdi
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq %rdx,%r11
|
||||
mulq %rbp
|
||||
addq %rax,%r12
|
||||
movq 16(%rsi),%rax
|
||||
adcq %rdx,%r13
|
||||
mulq %rbp
|
||||
movq 8(%rsp),%rbp
|
||||
addq %rax,%r14
|
||||
movq 24(%rsi),%rax
|
||||
adcq %rdx,%r15
|
||||
|
||||
mulq %rdi
|
||||
addq %rax,%rbx
|
||||
movq 32(%rsi),%rax
|
||||
adcq %rdx,%rcx
|
||||
mulq %rdi
|
||||
addq %rax,%r8
|
||||
movq 8(%rsi),%rax
|
||||
adcq %rdx,%r9
|
||||
mulq %rbp
|
||||
imulq $19,%rbp,%rdi
|
||||
addq %rax,%r10
|
||||
movq 16(%rsi),%rax
|
||||
adcq %rdx,%r11
|
||||
mulq %rbp
|
||||
addq %rax,%r12
|
||||
movq 24(%rsi),%rax
|
||||
adcq %rdx,%r13
|
||||
mulq %rbp
|
||||
movq 0(%rsp),%rbp
|
||||
addq %rax,%r14
|
||||
movq 32(%rsi),%rax
|
||||
adcq %rdx,%r15
|
||||
|
||||
mulq %rdi
|
||||
addq %rax,%rbx
|
||||
movq 8(%rsi),%rax
|
||||
adcq %rdx,%rcx
|
||||
mulq %rbp
|
||||
addq %rax,%r8
|
||||
movq 16(%rsi),%rax
|
||||
adcq %rdx,%r9
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 24(%rsi),%rax
|
||||
adcq %rdx,%r11
|
||||
mulq %rbp
|
||||
addq %rax,%r12
|
||||
movq 32(%rsi),%rax
|
||||
adcq %rdx,%r13
|
||||
mulq %rbp
|
||||
addq %rax,%r14
|
||||
adcq %rdx,%r15
|
||||
|
||||
movq 32(%rsp),%rdi
|
||||
jmp .Lreduce51
|
||||
.Lfe51_mul_epilogue:
|
||||
.cfi_endproc
|
||||
.size x25519_fe51_mul,.-x25519_fe51_mul
|
||||
|
||||
.globl x25519_fe51_sqr
|
||||
.type x25519_fe51_sqr,@function
|
||||
.align 32
|
||||
x25519_fe51_sqr:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-16
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
leaq -40(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset 40
|
||||
.Lfe51_sqr_body:
|
||||
|
||||
movq 0(%rsi),%rax
|
||||
movq 16(%rsi),%r15
|
||||
movq 32(%rsi),%rbp
|
||||
|
||||
movq %rdi,32(%rsp)
|
||||
leaq (%rax,%rax,1),%r14
|
||||
mulq %rax
|
||||
movq %rax,%rbx
|
||||
movq 8(%rsi),%rax
|
||||
movq %rdx,%rcx
|
||||
mulq %r14
|
||||
movq %rax,%r8
|
||||
movq %r15,%rax
|
||||
movq %r15,0(%rsp)
|
||||
movq %rdx,%r9
|
||||
mulq %r14
|
||||
movq %rax,%r10
|
||||
movq 24(%rsi),%rax
|
||||
movq %rdx,%r11
|
||||
imulq $19,%rbp,%rdi
|
||||
mulq %r14
|
||||
movq %rax,%r12
|
||||
movq %rbp,%rax
|
||||
movq %rdx,%r13
|
||||
mulq %r14
|
||||
movq %rax,%r14
|
||||
movq %rbp,%rax
|
||||
movq %rdx,%r15
|
||||
|
||||
mulq %rdi
|
||||
addq %rax,%r12
|
||||
movq 8(%rsi),%rax
|
||||
adcq %rdx,%r13
|
||||
|
||||
movq 24(%rsi),%rsi
|
||||
leaq (%rax,%rax,1),%rbp
|
||||
mulq %rax
|
||||
addq %rax,%r10
|
||||
movq 0(%rsp),%rax
|
||||
adcq %rdx,%r11
|
||||
mulq %rbp
|
||||
addq %rax,%r12
|
||||
movq %rbp,%rax
|
||||
adcq %rdx,%r13
|
||||
mulq %rsi
|
||||
addq %rax,%r14
|
||||
movq %rbp,%rax
|
||||
adcq %rdx,%r15
|
||||
imulq $19,%rsi,%rbp
|
||||
mulq %rdi
|
||||
addq %rax,%rbx
|
||||
leaq (%rsi,%rsi,1),%rax
|
||||
adcq %rdx,%rcx
|
||||
|
||||
mulq %rdi
|
||||
addq %rax,%r10
|
||||
movq %rsi,%rax
|
||||
adcq %rdx,%r11
|
||||
mulq %rbp
|
||||
addq %rax,%r8
|
||||
movq 0(%rsp),%rax
|
||||
adcq %rdx,%r9
|
||||
|
||||
leaq (%rax,%rax,1),%rsi
|
||||
mulq %rax
|
||||
addq %rax,%r14
|
||||
movq %rbp,%rax
|
||||
adcq %rdx,%r15
|
||||
mulq %rsi
|
||||
addq %rax,%rbx
|
||||
movq %rsi,%rax
|
||||
adcq %rdx,%rcx
|
||||
mulq %rdi
|
||||
addq %rax,%r8
|
||||
adcq %rdx,%r9
|
||||
|
||||
movq 32(%rsp),%rdi
|
||||
jmp .Lreduce51
|
||||
|
||||
.align 32
|
||||
.Lreduce51:
|
||||
movq $0x7ffffffffffff,%rbp
|
||||
|
||||
movq %r10,%rdx
|
||||
shrq $51,%r10
|
||||
shlq $13,%r11
|
||||
andq %rbp,%rdx
|
||||
orq %r10,%r11
|
||||
addq %r11,%r12
|
||||
adcq $0,%r13
|
||||
|
||||
movq %rbx,%rax
|
||||
shrq $51,%rbx
|
||||
shlq $13,%rcx
|
||||
andq %rbp,%rax
|
||||
orq %rbx,%rcx
|
||||
addq %rcx,%r8
|
||||
adcq $0,%r9
|
||||
|
||||
movq %r12,%rbx
|
||||
shrq $51,%r12
|
||||
shlq $13,%r13
|
||||
andq %rbp,%rbx
|
||||
orq %r12,%r13
|
||||
addq %r13,%r14
|
||||
adcq $0,%r15
|
||||
|
||||
movq %r8,%rcx
|
||||
shrq $51,%r8
|
||||
shlq $13,%r9
|
||||
andq %rbp,%rcx
|
||||
orq %r8,%r9
|
||||
addq %r9,%rdx
|
||||
|
||||
movq %r14,%r10
|
||||
shrq $51,%r14
|
||||
shlq $13,%r15
|
||||
andq %rbp,%r10
|
||||
orq %r14,%r15
|
||||
|
||||
leaq (%r15,%r15,8),%r14
|
||||
leaq (%r15,%r14,2),%r15
|
||||
addq %r15,%rax
|
||||
|
||||
movq %rdx,%r8
|
||||
andq %rbp,%rdx
|
||||
shrq $51,%r8
|
||||
addq %r8,%rbx
|
||||
|
||||
movq %rax,%r9
|
||||
andq %rbp,%rax
|
||||
shrq $51,%r9
|
||||
addq %r9,%rcx
|
||||
|
||||
movq %rax,0(%rdi)
|
||||
movq %rcx,8(%rdi)
|
||||
movq %rdx,16(%rdi)
|
||||
movq %rbx,24(%rdi)
|
||||
movq %r10,32(%rdi)
|
||||
|
||||
movq 40(%rsp),%r15
|
||||
.cfi_restore %r15
|
||||
movq 48(%rsp),%r14
|
||||
.cfi_restore %r14
|
||||
movq 56(%rsp),%r13
|
||||
.cfi_restore %r13
|
||||
movq 64(%rsp),%r12
|
||||
.cfi_restore %r12
|
||||
movq 72(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
movq 80(%rsp),%rbp
|
||||
.cfi_restore %rbp
|
||||
leaq 88(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset 88
|
||||
.Lfe51_sqr_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size x25519_fe51_sqr,.-x25519_fe51_sqr
|
||||
|
||||
.globl x25519_fe51_mul121666
|
||||
.type x25519_fe51_mul121666,@function
|
||||
.align 32
|
||||
x25519_fe51_mul121666:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-16
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
leaq -40(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset 40
|
||||
.Lfe51_mul121666_body:
|
||||
movl $121666,%eax
|
||||
|
||||
mulq 0(%rsi)
|
||||
movq %rax,%rbx
|
||||
movl $121666,%eax
|
||||
movq %rdx,%rcx
|
||||
mulq 8(%rsi)
|
||||
movq %rax,%r8
|
||||
movl $121666,%eax
|
||||
movq %rdx,%r9
|
||||
mulq 16(%rsi)
|
||||
movq %rax,%r10
|
||||
movl $121666,%eax
|
||||
movq %rdx,%r11
|
||||
mulq 24(%rsi)
|
||||
movq %rax,%r12
|
||||
movl $121666,%eax
|
||||
movq %rdx,%r13
|
||||
mulq 32(%rsi)
|
||||
movq %rax,%r14
|
||||
movq %rdx,%r15
|
||||
|
||||
jmp .Lreduce51
|
||||
.Lfe51_mul121666_epilogue:
|
||||
.cfi_endproc
|
||||
.size x25519_fe51_mul121666,.-x25519_fe51_mul121666
|
||||
|
||||
.globl x25519_fe64_eligible
|
||||
.type x25519_fe64_eligible,@function
|
||||
.align 32
|
||||
x25519_fe64_eligible:
|
||||
.cfi_startproc
|
||||
movl OPENSSL_ia32cap_P+8(%rip),%ecx
|
||||
xorl %eax,%eax
|
||||
andl $0x80100,%ecx
|
||||
cmpl $0x80100,%ecx
|
||||
cmovel %ecx,%eax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size x25519_fe64_eligible,.-x25519_fe64_eligible
|
||||
|
||||
.globl x25519_fe64_mul
|
||||
.type x25519_fe64_mul,@function
|
||||
.align 32
|
||||
x25519_fe64_mul:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-16
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
pushq %rdi
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rdi,-64
|
||||
leaq -16(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset 16
|
||||
.Lfe64_mul_body:
|
||||
|
||||
movq %rdx,%rax
|
||||
movq 0(%rdx),%rbp
|
||||
movq 0(%rsi),%rdx
|
||||
movq 8(%rax),%rcx
|
||||
movq 16(%rax),%r14
|
||||
movq 24(%rax),%r15
|
||||
|
||||
mulxq %rbp,%r8,%rax
|
||||
xorl %edi,%edi
|
||||
mulxq %rcx,%r9,%rbx
|
||||
adcxq %rax,%r9
|
||||
mulxq %r14,%r10,%rax
|
||||
adcxq %rbx,%r10
|
||||
mulxq %r15,%r11,%r12
|
||||
movq 8(%rsi),%rdx
|
||||
adcxq %rax,%r11
|
||||
movq %r14,(%rsp)
|
||||
adcxq %rdi,%r12
|
||||
|
||||
mulxq %rbp,%rax,%rbx
|
||||
adoxq %rax,%r9
|
||||
adcxq %rbx,%r10
|
||||
mulxq %rcx,%rax,%rbx
|
||||
adoxq %rax,%r10
|
||||
adcxq %rbx,%r11
|
||||
mulxq %r14,%rax,%rbx
|
||||
adoxq %rax,%r11
|
||||
adcxq %rbx,%r12
|
||||
mulxq %r15,%rax,%r13
|
||||
movq 16(%rsi),%rdx
|
||||
adoxq %rax,%r12
|
||||
adcxq %rdi,%r13
|
||||
adoxq %rdi,%r13
|
||||
|
||||
mulxq %rbp,%rax,%rbx
|
||||
adcxq %rax,%r10
|
||||
adoxq %rbx,%r11
|
||||
mulxq %rcx,%rax,%rbx
|
||||
adcxq %rax,%r11
|
||||
adoxq %rbx,%r12
|
||||
mulxq %r14,%rax,%rbx
|
||||
adcxq %rax,%r12
|
||||
adoxq %rbx,%r13
|
||||
mulxq %r15,%rax,%r14
|
||||
movq 24(%rsi),%rdx
|
||||
adcxq %rax,%r13
|
||||
adoxq %rdi,%r14
|
||||
adcxq %rdi,%r14
|
||||
|
||||
mulxq %rbp,%rax,%rbx
|
||||
adoxq %rax,%r11
|
||||
adcxq %rbx,%r12
|
||||
mulxq %rcx,%rax,%rbx
|
||||
adoxq %rax,%r12
|
||||
adcxq %rbx,%r13
|
||||
mulxq (%rsp),%rax,%rbx
|
||||
adoxq %rax,%r13
|
||||
adcxq %rbx,%r14
|
||||
mulxq %r15,%rax,%r15
|
||||
movl $38,%edx
|
||||
adoxq %rax,%r14
|
||||
adcxq %rdi,%r15
|
||||
adoxq %rdi,%r15
|
||||
|
||||
jmp .Lreduce64
|
||||
.Lfe64_mul_epilogue:
|
||||
.cfi_endproc
|
||||
.size x25519_fe64_mul,.-x25519_fe64_mul
|
||||
|
||||
.globl x25519_fe64_sqr
|
||||
.type x25519_fe64_sqr,@function
|
||||
.align 32
|
||||
x25519_fe64_sqr:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-16
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
pushq %rdi
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rdi,-64
|
||||
leaq -16(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset 16
|
||||
.Lfe64_sqr_body:
|
||||
|
||||
movq 0(%rsi),%rdx
|
||||
movq 8(%rsi),%rcx
|
||||
movq 16(%rsi),%rbp
|
||||
movq 24(%rsi),%rsi
|
||||
|
||||
|
||||
mulxq %rdx,%r8,%r15
|
||||
mulxq %rcx,%r9,%rax
|
||||
xorl %edi,%edi
|
||||
mulxq %rbp,%r10,%rbx
|
||||
adcxq %rax,%r10
|
||||
mulxq %rsi,%r11,%r12
|
||||
movq %rcx,%rdx
|
||||
adcxq %rbx,%r11
|
||||
adcxq %rdi,%r12
|
||||
|
||||
|
||||
mulxq %rbp,%rax,%rbx
|
||||
adoxq %rax,%r11
|
||||
adcxq %rbx,%r12
|
||||
mulxq %rsi,%rax,%r13
|
||||
movq %rbp,%rdx
|
||||
adoxq %rax,%r12
|
||||
adcxq %rdi,%r13
|
||||
|
||||
|
||||
mulxq %rsi,%rax,%r14
|
||||
movq %rcx,%rdx
|
||||
adoxq %rax,%r13
|
||||
adcxq %rdi,%r14
|
||||
adoxq %rdi,%r14
|
||||
|
||||
adcxq %r9,%r9
|
||||
adoxq %r15,%r9
|
||||
adcxq %r10,%r10
|
||||
mulxq %rdx,%rax,%rbx
|
||||
movq %rbp,%rdx
|
||||
adcxq %r11,%r11
|
||||
adoxq %rax,%r10
|
||||
adcxq %r12,%r12
|
||||
adoxq %rbx,%r11
|
||||
mulxq %rdx,%rax,%rbx
|
||||
movq %rsi,%rdx
|
||||
adcxq %r13,%r13
|
||||
adoxq %rax,%r12
|
||||
adcxq %r14,%r14
|
||||
adoxq %rbx,%r13
|
||||
mulxq %rdx,%rax,%r15
|
||||
movl $38,%edx
|
||||
adoxq %rax,%r14
|
||||
adcxq %rdi,%r15
|
||||
adoxq %rdi,%r15
|
||||
jmp .Lreduce64
|
||||
|
||||
.align 32
|
||||
.Lreduce64:
|
||||
mulxq %r12,%rax,%rbx
|
||||
adcxq %rax,%r8
|
||||
adoxq %rbx,%r9
|
||||
mulxq %r13,%rax,%rbx
|
||||
adcxq %rax,%r9
|
||||
adoxq %rbx,%r10
|
||||
mulxq %r14,%rax,%rbx
|
||||
adcxq %rax,%r10
|
||||
adoxq %rbx,%r11
|
||||
mulxq %r15,%rax,%r12
|
||||
adcxq %rax,%r11
|
||||
adoxq %rdi,%r12
|
||||
adcxq %rdi,%r12
|
||||
|
||||
movq 16(%rsp),%rdi
|
||||
imulq %rdx,%r12
|
||||
|
||||
addq %r12,%r8
|
||||
adcq $0,%r9
|
||||
adcq $0,%r10
|
||||
adcq $0,%r11
|
||||
|
||||
sbbq %rax,%rax
|
||||
andq $38,%rax
|
||||
|
||||
addq %rax,%r8
|
||||
movq %r9,8(%rdi)
|
||||
movq %r10,16(%rdi)
|
||||
movq %r11,24(%rdi)
|
||||
movq %r8,0(%rdi)
|
||||
|
||||
movq 24(%rsp),%r15
|
||||
.cfi_restore %r15
|
||||
movq 32(%rsp),%r14
|
||||
.cfi_restore %r14
|
||||
movq 40(%rsp),%r13
|
||||
.cfi_restore %r13
|
||||
movq 48(%rsp),%r12
|
||||
.cfi_restore %r12
|
||||
movq 56(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
movq 64(%rsp),%rbp
|
||||
.cfi_restore %rbp
|
||||
leaq 72(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset 88
|
||||
.Lfe64_sqr_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size x25519_fe64_sqr,.-x25519_fe64_sqr
|
||||
|
||||
.globl x25519_fe64_mul121666
|
||||
.type x25519_fe64_mul121666,@function
|
||||
.align 32
|
||||
x25519_fe64_mul121666:
|
||||
.Lfe64_mul121666_body:
|
||||
.cfi_startproc
|
||||
movl $121666,%edx
|
||||
mulxq 0(%rsi),%r8,%rcx
|
||||
mulxq 8(%rsi),%r9,%rax
|
||||
addq %rcx,%r9
|
||||
mulxq 16(%rsi),%r10,%rcx
|
||||
adcq %rax,%r10
|
||||
mulxq 24(%rsi),%r11,%rax
|
||||
adcq %rcx,%r11
|
||||
adcq $0,%rax
|
||||
|
||||
imulq $38,%rax,%rax
|
||||
|
||||
addq %rax,%r8
|
||||
adcq $0,%r9
|
||||
adcq $0,%r10
|
||||
adcq $0,%r11
|
||||
|
||||
sbbq %rax,%rax
|
||||
andq $38,%rax
|
||||
|
||||
addq %rax,%r8
|
||||
movq %r9,8(%rdi)
|
||||
movq %r10,16(%rdi)
|
||||
movq %r11,24(%rdi)
|
||||
movq %r8,0(%rdi)
|
||||
|
||||
.Lfe64_mul121666_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size x25519_fe64_mul121666,.-x25519_fe64_mul121666
|
||||
|
||||
.globl x25519_fe64_add
|
||||
.type x25519_fe64_add,@function
|
||||
.align 32
|
||||
x25519_fe64_add:
|
||||
.Lfe64_add_body:
|
||||
.cfi_startproc
|
||||
movq 0(%rsi),%r8
|
||||
movq 8(%rsi),%r9
|
||||
movq 16(%rsi),%r10
|
||||
movq 24(%rsi),%r11
|
||||
|
||||
addq 0(%rdx),%r8
|
||||
adcq 8(%rdx),%r9
|
||||
adcq 16(%rdx),%r10
|
||||
adcq 24(%rdx),%r11
|
||||
|
||||
sbbq %rax,%rax
|
||||
andq $38,%rax
|
||||
|
||||
addq %rax,%r8
|
||||
adcq $0,%r9
|
||||
adcq $0,%r10
|
||||
movq %r9,8(%rdi)
|
||||
adcq $0,%r11
|
||||
movq %r10,16(%rdi)
|
||||
sbbq %rax,%rax
|
||||
movq %r11,24(%rdi)
|
||||
andq $38,%rax
|
||||
|
||||
addq %rax,%r8
|
||||
movq %r8,0(%rdi)
|
||||
|
||||
.Lfe64_add_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size x25519_fe64_add,.-x25519_fe64_add
|
||||
|
||||
.globl x25519_fe64_sub
|
||||
.type x25519_fe64_sub,@function
|
||||
.align 32
|
||||
x25519_fe64_sub:
|
||||
.Lfe64_sub_body:
|
||||
.cfi_startproc
|
||||
movq 0(%rsi),%r8
|
||||
movq 8(%rsi),%r9
|
||||
movq 16(%rsi),%r10
|
||||
movq 24(%rsi),%r11
|
||||
|
||||
subq 0(%rdx),%r8
|
||||
sbbq 8(%rdx),%r9
|
||||
sbbq 16(%rdx),%r10
|
||||
sbbq 24(%rdx),%r11
|
||||
|
||||
sbbq %rax,%rax
|
||||
andq $38,%rax
|
||||
|
||||
subq %rax,%r8
|
||||
sbbq $0,%r9
|
||||
sbbq $0,%r10
|
||||
movq %r9,8(%rdi)
|
||||
sbbq $0,%r11
|
||||
movq %r10,16(%rdi)
|
||||
sbbq %rax,%rax
|
||||
movq %r11,24(%rdi)
|
||||
andq $38,%rax
|
||||
|
||||
subq %rax,%r8
|
||||
movq %r8,0(%rdi)
|
||||
|
||||
.Lfe64_sub_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size x25519_fe64_sub,.-x25519_fe64_sub
|
||||
|
||||
.globl x25519_fe64_tobytes
|
||||
.type x25519_fe64_tobytes,@function
|
||||
.align 32
|
||||
x25519_fe64_tobytes:
|
||||
.Lfe64_to_body:
|
||||
.cfi_startproc
|
||||
movq 0(%rsi),%r8
|
||||
movq 8(%rsi),%r9
|
||||
movq 16(%rsi),%r10
|
||||
movq 24(%rsi),%r11
|
||||
|
||||
|
||||
leaq (%r11,%r11,1),%rax
|
||||
sarq $63,%r11
|
||||
shrq $1,%rax
|
||||
andq $19,%r11
|
||||
addq $19,%r11
|
||||
|
||||
addq %r11,%r8
|
||||
adcq $0,%r9
|
||||
adcq $0,%r10
|
||||
adcq $0,%rax
|
||||
|
||||
leaq (%rax,%rax,1),%r11
|
||||
sarq $63,%rax
|
||||
shrq $1,%r11
|
||||
notq %rax
|
||||
andq $19,%rax
|
||||
|
||||
subq %rax,%r8
|
||||
sbbq $0,%r9
|
||||
sbbq $0,%r10
|
||||
sbbq $0,%r11
|
||||
|
||||
movq %r8,0(%rdi)
|
||||
movq %r9,8(%rdi)
|
||||
movq %r10,16(%rdi)
|
||||
movq %r11,24(%rdi)
|
||||
|
||||
.Lfe64_to_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size x25519_fe64_tobytes,.-x25519_fe64_tobytes
|
||||
.byte 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
@@ -1,333 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from x86_64-gf2m.pl. */
|
||||
.text
|
||||
|
||||
.type _mul_1x1,@function
|
||||
.align 16
|
||||
_mul_1x1:
|
||||
.cfi_startproc
|
||||
subq $128+8,%rsp
|
||||
.cfi_adjust_cfa_offset 128+8
|
||||
movq $-1,%r9
|
||||
leaq (%rax,%rax,1),%rsi
|
||||
shrq $3,%r9
|
||||
leaq (,%rax,4),%rdi
|
||||
andq %rax,%r9
|
||||
leaq (,%rax,8),%r12
|
||||
sarq $63,%rax
|
||||
leaq (%r9,%r9,1),%r10
|
||||
sarq $63,%rsi
|
||||
leaq (,%r9,4),%r11
|
||||
andq %rbp,%rax
|
||||
sarq $63,%rdi
|
||||
movq %rax,%rdx
|
||||
shlq $63,%rax
|
||||
andq %rbp,%rsi
|
||||
shrq $1,%rdx
|
||||
movq %rsi,%rcx
|
||||
shlq $62,%rsi
|
||||
andq %rbp,%rdi
|
||||
shrq $2,%rcx
|
||||
xorq %rsi,%rax
|
||||
movq %rdi,%rbx
|
||||
shlq $61,%rdi
|
||||
xorq %rcx,%rdx
|
||||
shrq $3,%rbx
|
||||
xorq %rdi,%rax
|
||||
xorq %rbx,%rdx
|
||||
|
||||
movq %r9,%r13
|
||||
movq $0,0(%rsp)
|
||||
xorq %r10,%r13
|
||||
movq %r9,8(%rsp)
|
||||
movq %r11,%r14
|
||||
movq %r10,16(%rsp)
|
||||
xorq %r12,%r14
|
||||
movq %r13,24(%rsp)
|
||||
|
||||
xorq %r11,%r9
|
||||
movq %r11,32(%rsp)
|
||||
xorq %r11,%r10
|
||||
movq %r9,40(%rsp)
|
||||
xorq %r11,%r13
|
||||
movq %r10,48(%rsp)
|
||||
xorq %r14,%r9
|
||||
movq %r13,56(%rsp)
|
||||
xorq %r14,%r10
|
||||
|
||||
movq %r12,64(%rsp)
|
||||
xorq %r14,%r13
|
||||
movq %r9,72(%rsp)
|
||||
xorq %r11,%r9
|
||||
movq %r10,80(%rsp)
|
||||
xorq %r11,%r10
|
||||
movq %r13,88(%rsp)
|
||||
|
||||
xorq %r11,%r13
|
||||
movq %r14,96(%rsp)
|
||||
movq %r8,%rsi
|
||||
movq %r9,104(%rsp)
|
||||
andq %rbp,%rsi
|
||||
movq %r10,112(%rsp)
|
||||
shrq $4,%rbp
|
||||
movq %r13,120(%rsp)
|
||||
movq %r8,%rdi
|
||||
andq %rbp,%rdi
|
||||
shrq $4,%rbp
|
||||
|
||||
movq (%rsp,%rsi,8),%xmm0
|
||||
movq %r8,%rsi
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $4,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $60,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $1,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $12,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $52,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $2,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $20,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $44,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $3,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $28,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $36,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $4,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $36,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $28,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $5,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $44,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $20,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $6,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $52,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $12,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $7,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %rcx,%rbx
|
||||
shlq $60,%rcx
|
||||
.byte 102,72,15,126,198
|
||||
shrq $4,%rbx
|
||||
xorq %rcx,%rax
|
||||
psrldq $8,%xmm0
|
||||
xorq %rbx,%rdx
|
||||
.byte 102,72,15,126,199
|
||||
xorq %rsi,%rax
|
||||
xorq %rdi,%rdx
|
||||
|
||||
addq $128+8,%rsp
|
||||
.cfi_adjust_cfa_offset -128-8
|
||||
.byte 0xf3,0xc3
|
||||
.Lend_mul_1x1:
|
||||
.cfi_endproc
|
||||
.size _mul_1x1,.-_mul_1x1
|
||||
|
||||
.globl bn_GF2m_mul_2x2
|
||||
.type bn_GF2m_mul_2x2,@function
|
||||
.align 16
|
||||
bn_GF2m_mul_2x2:
|
||||
.cfi_startproc
|
||||
movq %rsp,%rax
|
||||
movq OPENSSL_ia32cap_P(%rip),%r10
|
||||
btq $33,%r10
|
||||
jnc .Lvanilla_mul_2x2
|
||||
|
||||
.byte 102,72,15,110,198
|
||||
.byte 102,72,15,110,201
|
||||
.byte 102,72,15,110,210
|
||||
.byte 102,73,15,110,216
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm1,%xmm5
|
||||
.byte 102,15,58,68,193,0
|
||||
pxor %xmm2,%xmm4
|
||||
pxor %xmm3,%xmm5
|
||||
.byte 102,15,58,68,211,0
|
||||
.byte 102,15,58,68,229,0
|
||||
xorps %xmm0,%xmm4
|
||||
xorps %xmm2,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
pslldq $8,%xmm4
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
pxor %xmm5,%xmm0
|
||||
movdqu %xmm2,0(%rdi)
|
||||
movdqu %xmm0,16(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
.align 16
|
||||
.Lvanilla_mul_2x2:
|
||||
leaq -136(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset 8*17
|
||||
movq %r14,80(%rsp)
|
||||
.cfi_rel_offset %r14,8*10
|
||||
movq %r13,88(%rsp)
|
||||
.cfi_rel_offset %r13,8*11
|
||||
movq %r12,96(%rsp)
|
||||
.cfi_rel_offset %r12,8*12
|
||||
movq %rbp,104(%rsp)
|
||||
.cfi_rel_offset %rbp,8*13
|
||||
movq %rbx,112(%rsp)
|
||||
.cfi_rel_offset %rbx,8*14
|
||||
.Lbody_mul_2x2:
|
||||
movq %rdi,32(%rsp)
|
||||
movq %rsi,40(%rsp)
|
||||
movq %rdx,48(%rsp)
|
||||
movq %rcx,56(%rsp)
|
||||
movq %r8,64(%rsp)
|
||||
|
||||
movq $0xf,%r8
|
||||
movq %rsi,%rax
|
||||
movq %rcx,%rbp
|
||||
call _mul_1x1
|
||||
movq %rax,16(%rsp)
|
||||
movq %rdx,24(%rsp)
|
||||
|
||||
movq 48(%rsp),%rax
|
||||
movq 64(%rsp),%rbp
|
||||
call _mul_1x1
|
||||
movq %rax,0(%rsp)
|
||||
movq %rdx,8(%rsp)
|
||||
|
||||
movq 40(%rsp),%rax
|
||||
movq 56(%rsp),%rbp
|
||||
xorq 48(%rsp),%rax
|
||||
xorq 64(%rsp),%rbp
|
||||
call _mul_1x1
|
||||
movq 0(%rsp),%rbx
|
||||
movq 8(%rsp),%rcx
|
||||
movq 16(%rsp),%rdi
|
||||
movq 24(%rsp),%rsi
|
||||
movq 32(%rsp),%rbp
|
||||
|
||||
xorq %rdx,%rax
|
||||
xorq %rcx,%rdx
|
||||
xorq %rbx,%rax
|
||||
movq %rbx,0(%rbp)
|
||||
xorq %rdi,%rdx
|
||||
movq %rsi,24(%rbp)
|
||||
xorq %rsi,%rax
|
||||
xorq %rsi,%rdx
|
||||
xorq %rdx,%rax
|
||||
movq %rdx,16(%rbp)
|
||||
movq %rax,8(%rbp)
|
||||
|
||||
movq 80(%rsp),%r14
|
||||
.cfi_restore %r14
|
||||
movq 88(%rsp),%r13
|
||||
.cfi_restore %r13
|
||||
movq 96(%rsp),%r12
|
||||
.cfi_restore %r12
|
||||
movq 104(%rsp),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq 112(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq 136(%rsp),%rsp
|
||||
.cfi_adjust_cfa_offset -8*17
|
||||
.Lepilogue_mul_2x2:
|
||||
.byte 0xf3,0xc3
|
||||
.Lend_mul_2x2:
|
||||
.cfi_endproc
|
||||
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
|
||||
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 16
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,513 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from x86_64cpuid.pl. */
|
||||
|
||||
.hidden OPENSSL_cpuid_setup
|
||||
.section .init
|
||||
call OPENSSL_cpuid_setup
|
||||
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
|
||||
.text
|
||||
|
||||
.globl OPENSSL_atomic_add
|
||||
.type OPENSSL_atomic_add,@function
|
||||
.align 16
|
||||
OPENSSL_atomic_add:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
movl (%rdi),%eax
|
||||
.Lspin: leaq (%rsi,%rax,1),%r8
|
||||
.byte 0xf0
|
||||
cmpxchgl %r8d,(%rdi)
|
||||
jne .Lspin
|
||||
movl %r8d,%eax
|
||||
.byte 0x48,0x98
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
||||
|
||||
.globl OPENSSL_rdtsc
|
||||
.type OPENSSL_rdtsc,@function
|
||||
.align 16
|
||||
OPENSSL_rdtsc:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
rdtsc
|
||||
shlq $32,%rdx
|
||||
orq %rdx,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
|
||||
|
||||
.globl OPENSSL_ia32_cpuid
|
||||
.type OPENSSL_ia32_cpuid,@function
|
||||
.align 16
|
||||
OPENSSL_ia32_cpuid:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
movq %rbx,%r8
|
||||
.cfi_register %rbx,%r8
|
||||
|
||||
xorl %eax,%eax
|
||||
movq %rax,8(%rdi)
|
||||
cpuid
|
||||
movl %eax,%r11d
|
||||
|
||||
xorl %eax,%eax
|
||||
cmpl $0x756e6547,%ebx
|
||||
setne %al
|
||||
movl %eax,%r9d
|
||||
cmpl $0x49656e69,%edx
|
||||
setne %al
|
||||
orl %eax,%r9d
|
||||
cmpl $0x6c65746e,%ecx
|
||||
setne %al
|
||||
orl %eax,%r9d
|
||||
jz .Lintel
|
||||
|
||||
cmpl $0x68747541,%ebx
|
||||
setne %al
|
||||
movl %eax,%r10d
|
||||
cmpl $0x69746E65,%edx
|
||||
setne %al
|
||||
orl %eax,%r10d
|
||||
cmpl $0x444D4163,%ecx
|
||||
setne %al
|
||||
orl %eax,%r10d
|
||||
jnz .Lintel
|
||||
|
||||
|
||||
movl $0x80000000,%eax
|
||||
cpuid
|
||||
cmpl $0x80000001,%eax
|
||||
jb .Lintel
|
||||
movl %eax,%r10d
|
||||
movl $0x80000001,%eax
|
||||
cpuid
|
||||
orl %ecx,%r9d
|
||||
andl $0x00000801,%r9d
|
||||
|
||||
cmpl $0x80000008,%r10d
|
||||
jb .Lintel
|
||||
|
||||
movl $0x80000008,%eax
|
||||
cpuid
|
||||
movzbq %cl,%r10
|
||||
incq %r10
|
||||
|
||||
movl $1,%eax
|
||||
cpuid
|
||||
btl $28,%edx
|
||||
jnc .Lgeneric
|
||||
shrl $16,%ebx
|
||||
cmpb %r10b,%bl
|
||||
ja .Lgeneric
|
||||
andl $0xefffffff,%edx
|
||||
jmp .Lgeneric
|
||||
|
||||
.Lintel:
|
||||
cmpl $4,%r11d
|
||||
movl $-1,%r10d
|
||||
jb .Lnocacheinfo
|
||||
|
||||
movl $4,%eax
|
||||
movl $0,%ecx
|
||||
cpuid
|
||||
movl %eax,%r10d
|
||||
shrl $14,%r10d
|
||||
andl $0xfff,%r10d
|
||||
|
||||
.Lnocacheinfo:
|
||||
movl $1,%eax
|
||||
cpuid
|
||||
movd %eax,%xmm0
|
||||
andl $0xbfefffff,%edx
|
||||
cmpl $0,%r9d
|
||||
jne .Lnotintel
|
||||
orl $0x40000000,%edx
|
||||
andb $15,%ah
|
||||
cmpb $15,%ah
|
||||
jne .LnotP4
|
||||
orl $0x00100000,%edx
|
||||
.LnotP4:
|
||||
cmpb $6,%ah
|
||||
jne .Lnotintel
|
||||
andl $0x0fff0ff0,%eax
|
||||
cmpl $0x00050670,%eax
|
||||
je .Lknights
|
||||
cmpl $0x00080650,%eax
|
||||
jne .Lnotintel
|
||||
.Lknights:
|
||||
andl $0xfbffffff,%ecx
|
||||
|
||||
.Lnotintel:
|
||||
btl $28,%edx
|
||||
jnc .Lgeneric
|
||||
andl $0xefffffff,%edx
|
||||
cmpl $0,%r10d
|
||||
je .Lgeneric
|
||||
|
||||
orl $0x10000000,%edx
|
||||
shrl $16,%ebx
|
||||
cmpb $1,%bl
|
||||
ja .Lgeneric
|
||||
andl $0xefffffff,%edx
|
||||
.Lgeneric:
|
||||
andl $0x00000800,%r9d
|
||||
andl $0xfffff7ff,%ecx
|
||||
orl %ecx,%r9d
|
||||
|
||||
movl %edx,%r10d
|
||||
|
||||
cmpl $7,%r11d
|
||||
jb .Lno_extended_info
|
||||
movl $7,%eax
|
||||
xorl %ecx,%ecx
|
||||
cpuid
|
||||
btl $26,%r9d
|
||||
jc .Lnotknights
|
||||
andl $0xfff7ffff,%ebx
|
||||
.Lnotknights:
|
||||
movd %xmm0,%eax
|
||||
andl $0x0fff0ff0,%eax
|
||||
cmpl $0x00050650,%eax
|
||||
jne .Lnotskylakex
|
||||
andl $0xfffeffff,%ebx
|
||||
|
||||
.Lnotskylakex:
|
||||
movl %ebx,8(%rdi)
|
||||
movl %ecx,12(%rdi)
|
||||
.Lno_extended_info:
|
||||
|
||||
btl $27,%r9d
|
||||
jnc .Lclear_avx
|
||||
xorl %ecx,%ecx
|
||||
.byte 0x0f,0x01,0xd0
|
||||
andl $0xe6,%eax
|
||||
cmpl $0xe6,%eax
|
||||
je .Ldone
|
||||
andl $0x3fdeffff,8(%rdi)
|
||||
|
||||
|
||||
|
||||
|
||||
andl $6,%eax
|
||||
cmpl $6,%eax
|
||||
je .Ldone
|
||||
.Lclear_avx:
|
||||
movl $0xefffe7ff,%eax
|
||||
andl %eax,%r9d
|
||||
movl $0x3fdeffdf,%eax
|
||||
andl %eax,8(%rdi)
|
||||
.Ldone:
|
||||
shlq $32,%r9
|
||||
movl %r10d,%eax
|
||||
movq %r8,%rbx
|
||||
.cfi_restore %rbx
|
||||
orq %r9,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
|
||||
|
||||
.globl OPENSSL_cleanse
|
||||
.type OPENSSL_cleanse,@function
|
||||
.align 16
|
||||
OPENSSL_cleanse:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
xorq %rax,%rax
|
||||
cmpq $15,%rsi
|
||||
jae .Lot
|
||||
cmpq $0,%rsi
|
||||
je .Lret
|
||||
.Little:
|
||||
movb %al,(%rdi)
|
||||
subq $1,%rsi
|
||||
leaq 1(%rdi),%rdi
|
||||
jnz .Little
|
||||
.Lret:
|
||||
.byte 0xf3,0xc3
|
||||
.align 16
|
||||
.Lot:
|
||||
testq $7,%rdi
|
||||
jz .Laligned
|
||||
movb %al,(%rdi)
|
||||
leaq -1(%rsi),%rsi
|
||||
leaq 1(%rdi),%rdi
|
||||
jmp .Lot
|
||||
.Laligned:
|
||||
movq %rax,(%rdi)
|
||||
leaq -8(%rsi),%rsi
|
||||
testq $-8,%rsi
|
||||
leaq 8(%rdi),%rdi
|
||||
jnz .Laligned
|
||||
cmpq $0,%rsi
|
||||
jne .Little
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||
|
||||
.globl CRYPTO_memcmp
|
||||
.type CRYPTO_memcmp,@function
|
||||
.align 16
|
||||
CRYPTO_memcmp:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
xorq %rax,%rax
|
||||
xorq %r10,%r10
|
||||
cmpq $0,%rdx
|
||||
je .Lno_data
|
||||
cmpq $16,%rdx
|
||||
jne .Loop_cmp
|
||||
movq (%rdi),%r10
|
||||
movq 8(%rdi),%r11
|
||||
movq $1,%rdx
|
||||
xorq (%rsi),%r10
|
||||
xorq 8(%rsi),%r11
|
||||
orq %r11,%r10
|
||||
cmovnzq %rdx,%rax
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
.align 16
|
||||
.Loop_cmp:
|
||||
movb (%rdi),%r10b
|
||||
leaq 1(%rdi),%rdi
|
||||
xorb (%rsi),%r10b
|
||||
leaq 1(%rsi),%rsi
|
||||
orb %r10b,%al
|
||||
decq %rdx
|
||||
jnz .Loop_cmp
|
||||
negq %rax
|
||||
shrq $63,%rax
|
||||
.Lno_data:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size CRYPTO_memcmp,.-CRYPTO_memcmp
|
||||
.globl OPENSSL_wipe_cpu
|
||||
.type OPENSSL_wipe_cpu,@function
|
||||
.align 16
|
||||
OPENSSL_wipe_cpu:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
pxor %xmm8,%xmm8
|
||||
pxor %xmm9,%xmm9
|
||||
pxor %xmm10,%xmm10
|
||||
pxor %xmm11,%xmm11
|
||||
pxor %xmm12,%xmm12
|
||||
pxor %xmm13,%xmm13
|
||||
pxor %xmm14,%xmm14
|
||||
pxor %xmm15,%xmm15
|
||||
xorq %rcx,%rcx
|
||||
xorq %rdx,%rdx
|
||||
xorq %rsi,%rsi
|
||||
xorq %rdi,%rdi
|
||||
xorq %r8,%r8
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
leaq 8(%rsp),%rax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||
.globl OPENSSL_instrument_bus
|
||||
.type OPENSSL_instrument_bus,@function
|
||||
.align 16
|
||||
OPENSSL_instrument_bus:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
movq %rdi,%r10
|
||||
movq %rsi,%rcx
|
||||
movq %rsi,%r11
|
||||
|
||||
rdtsc
|
||||
movl %eax,%r8d
|
||||
movl $0,%r9d
|
||||
clflush (%r10)
|
||||
.byte 0xf0
|
||||
addl %r9d,(%r10)
|
||||
jmp .Loop
|
||||
.align 16
|
||||
.Loop: rdtsc
|
||||
movl %eax,%edx
|
||||
subl %r8d,%eax
|
||||
movl %edx,%r8d
|
||||
movl %eax,%r9d
|
||||
clflush (%r10)
|
||||
.byte 0xf0
|
||||
addl %eax,(%r10)
|
||||
leaq 4(%r10),%r10
|
||||
subq $1,%rcx
|
||||
jnz .Loop
|
||||
|
||||
movq %r11,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
|
||||
|
||||
.globl OPENSSL_instrument_bus2
|
||||
.type OPENSSL_instrument_bus2,@function
|
||||
.align 16
|
||||
OPENSSL_instrument_bus2:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
movq %rdi,%r10
|
||||
movq %rsi,%rcx
|
||||
movq %rdx,%r11
|
||||
movq %rcx,8(%rsp)
|
||||
|
||||
rdtsc
|
||||
movl %eax,%r8d
|
||||
movl $0,%r9d
|
||||
|
||||
clflush (%r10)
|
||||
.byte 0xf0
|
||||
addl %r9d,(%r10)
|
||||
|
||||
rdtsc
|
||||
movl %eax,%edx
|
||||
subl %r8d,%eax
|
||||
movl %edx,%r8d
|
||||
movl %eax,%r9d
|
||||
.Loop2:
|
||||
clflush (%r10)
|
||||
.byte 0xf0
|
||||
addl %eax,(%r10)
|
||||
|
||||
subq $1,%r11
|
||||
jz .Ldone2
|
||||
|
||||
rdtsc
|
||||
movl %eax,%edx
|
||||
subl %r8d,%eax
|
||||
movl %edx,%r8d
|
||||
cmpl %r9d,%eax
|
||||
movl %eax,%r9d
|
||||
movl $0,%edx
|
||||
setne %dl
|
||||
subq %rdx,%rcx
|
||||
leaq (%r10,%rdx,4),%r10
|
||||
jnz .Loop2
|
||||
|
||||
.Ldone2:
|
||||
movq 8(%rsp),%rax
|
||||
subq %rcx,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
|
||||
.globl OPENSSL_ia32_rdrand_bytes
|
||||
.type OPENSSL_ia32_rdrand_bytes,@function
|
||||
.align 16
|
||||
OPENSSL_ia32_rdrand_bytes:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
xorq %rax,%rax
|
||||
cmpq $0,%rsi
|
||||
je .Ldone_rdrand_bytes
|
||||
|
||||
movq $8,%r11
|
||||
.Loop_rdrand_bytes:
|
||||
.byte 73,15,199,242
|
||||
jc .Lbreak_rdrand_bytes
|
||||
decq %r11
|
||||
jnz .Loop_rdrand_bytes
|
||||
jmp .Ldone_rdrand_bytes
|
||||
|
||||
.align 16
|
||||
.Lbreak_rdrand_bytes:
|
||||
cmpq $8,%rsi
|
||||
jb .Ltail_rdrand_bytes
|
||||
movq %r10,(%rdi)
|
||||
leaq 8(%rdi),%rdi
|
||||
addq $8,%rax
|
||||
subq $8,%rsi
|
||||
jz .Ldone_rdrand_bytes
|
||||
movq $8,%r11
|
||||
jmp .Loop_rdrand_bytes
|
||||
|
||||
.align 16
|
||||
.Ltail_rdrand_bytes:
|
||||
movb %r10b,(%rdi)
|
||||
leaq 1(%rdi),%rdi
|
||||
incq %rax
|
||||
shrq $8,%r10
|
||||
decq %rsi
|
||||
jnz .Ltail_rdrand_bytes
|
||||
|
||||
.Ldone_rdrand_bytes:
|
||||
xorq %r10,%r10
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size OPENSSL_ia32_rdrand_bytes,.-OPENSSL_ia32_rdrand_bytes
|
||||
.globl OPENSSL_ia32_rdseed_bytes
|
||||
.type OPENSSL_ia32_rdseed_bytes,@function
|
||||
.align 16
|
||||
OPENSSL_ia32_rdseed_bytes:
|
||||
.cfi_startproc
|
||||
.byte 243,15,30,250
|
||||
xorq %rax,%rax
|
||||
cmpq $0,%rsi
|
||||
je .Ldone_rdseed_bytes
|
||||
|
||||
movq $8,%r11
|
||||
.Loop_rdseed_bytes:
|
||||
.byte 73,15,199,250
|
||||
jc .Lbreak_rdseed_bytes
|
||||
decq %r11
|
||||
jnz .Loop_rdseed_bytes
|
||||
jmp .Ldone_rdseed_bytes
|
||||
|
||||
.align 16
|
||||
.Lbreak_rdseed_bytes:
|
||||
cmpq $8,%rsi
|
||||
jb .Ltail_rdseed_bytes
|
||||
movq %r10,(%rdi)
|
||||
leaq 8(%rdi),%rdi
|
||||
addq $8,%rax
|
||||
subq $8,%rsi
|
||||
jz .Ldone_rdseed_bytes
|
||||
movq $8,%r11
|
||||
jmp .Loop_rdseed_bytes
|
||||
|
||||
.align 16
|
||||
.Ltail_rdseed_bytes:
|
||||
movb %r10b,(%rdi)
|
||||
leaq 1(%rdi),%rdi
|
||||
incq %rax
|
||||
shrq $8,%r10
|
||||
decq %rsi
|
||||
jnz .Ltail_rdseed_bytes
|
||||
|
||||
.Ldone_rdseed_bytes:
|
||||
xorq %r10,%r10
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size OPENSSL_ia32_rdseed_bytes,.-OPENSSL_ia32_rdseed_bytes
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 3
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
# "GNU" encoded with .byte, since .asciz isn't supported
|
||||
# on Solaris.
|
||||
.byte 0x47
|
||||
.byte 0x4e
|
||||
.byte 0x55
|
||||
.byte 0
|
||||
1:
|
||||
.p2align 3
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 3
|
||||
4:
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,236 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from armv4-gf2m.pl. */
|
||||
#include "arm_arch.h"
|
||||
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
.text
|
||||
.type mul_1x1_ialu,%function
|
||||
.align 5
|
||||
mul_1x1_ialu:
|
||||
mov r4,#0
|
||||
bic r5,r1,#3<<30 @ a1=a&0x3fffffff
|
||||
str r4,[sp,#0] @ tab[0]=0
|
||||
add r6,r5,r5 @ a2=a1<<1
|
||||
str r5,[sp,#4] @ tab[1]=a1
|
||||
eor r7,r5,r6 @ a1^a2
|
||||
str r6,[sp,#8] @ tab[2]=a2
|
||||
mov r8,r5,lsl#2 @ a4=a1<<2
|
||||
str r7,[sp,#12] @ tab[3]=a1^a2
|
||||
eor r9,r5,r8 @ a1^a4
|
||||
str r8,[sp,#16] @ tab[4]=a4
|
||||
eor r4,r6,r8 @ a2^a4
|
||||
str r9,[sp,#20] @ tab[5]=a1^a4
|
||||
eor r7,r7,r8 @ a1^a2^a4
|
||||
str r4,[sp,#24] @ tab[6]=a2^a4
|
||||
and r8,r12,r0,lsl#2
|
||||
str r7,[sp,#28] @ tab[7]=a1^a2^a4
|
||||
|
||||
and r9,r12,r0,lsr#1
|
||||
ldr r5,[sp,r8] @ tab[b & 0x7]
|
||||
and r8,r12,r0,lsr#4
|
||||
ldr r7,[sp,r9] @ tab[b >> 3 & 0x7]
|
||||
and r9,r12,r0,lsr#7
|
||||
ldr r6,[sp,r8] @ tab[b >> 6 & 0x7]
|
||||
eor r5,r5,r7,lsl#3 @ stall
|
||||
mov r4,r7,lsr#29
|
||||
ldr r7,[sp,r9] @ tab[b >> 9 & 0x7]
|
||||
|
||||
and r8,r12,r0,lsr#10
|
||||
eor r5,r5,r6,lsl#6
|
||||
eor r4,r4,r6,lsr#26
|
||||
ldr r6,[sp,r8] @ tab[b >> 12 & 0x7]
|
||||
|
||||
and r9,r12,r0,lsr#13
|
||||
eor r5,r5,r7,lsl#9
|
||||
eor r4,r4,r7,lsr#23
|
||||
ldr r7,[sp,r9] @ tab[b >> 15 & 0x7]
|
||||
|
||||
and r8,r12,r0,lsr#16
|
||||
eor r5,r5,r6,lsl#12
|
||||
eor r4,r4,r6,lsr#20
|
||||
ldr r6,[sp,r8] @ tab[b >> 18 & 0x7]
|
||||
|
||||
and r9,r12,r0,lsr#19
|
||||
eor r5,r5,r7,lsl#15
|
||||
eor r4,r4,r7,lsr#17
|
||||
ldr r7,[sp,r9] @ tab[b >> 21 & 0x7]
|
||||
|
||||
and r8,r12,r0,lsr#22
|
||||
eor r5,r5,r6,lsl#18
|
||||
eor r4,r4,r6,lsr#14
|
||||
ldr r6,[sp,r8] @ tab[b >> 24 & 0x7]
|
||||
|
||||
and r9,r12,r0,lsr#25
|
||||
eor r5,r5,r7,lsl#21
|
||||
eor r4,r4,r7,lsr#11
|
||||
ldr r7,[sp,r9] @ tab[b >> 27 & 0x7]
|
||||
|
||||
tst r1,#1<<30
|
||||
and r8,r12,r0,lsr#28
|
||||
eor r5,r5,r6,lsl#24
|
||||
eor r4,r4,r6,lsr#8
|
||||
ldr r6,[sp,r8] @ tab[b >> 30 ]
|
||||
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
eorne r5,r5,r0,lsl#30
|
||||
eorne r4,r4,r0,lsr#2
|
||||
tst r1,#1<<31
|
||||
eor r5,r5,r7,lsl#27
|
||||
eor r4,r4,r7,lsr#5
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
eorne r5,r5,r0,lsl#31
|
||||
eorne r4,r4,r0,lsr#1
|
||||
eor r5,r5,r6,lsl#30
|
||||
eor r4,r4,r6,lsr#2
|
||||
|
||||
mov pc,lr
|
||||
.size mul_1x1_ialu,.-mul_1x1_ialu
|
||||
.globl bn_GF2m_mul_2x2
|
||||
.type bn_GF2m_mul_2x2,%function
|
||||
.align 5
|
||||
bn_GF2m_mul_2x2:
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
stmdb sp!,{r10,lr}
|
||||
ldr r12,.LOPENSSL_armcap
|
||||
# if !defined(_WIN32)
|
||||
adr r10,.LOPENSSL_armcap
|
||||
ldr r12,[r12,r10]
|
||||
# endif
|
||||
# if defined(__APPLE__) || defined(_WIN32)
|
||||
ldr r12,[r12]
|
||||
# endif
|
||||
tst r12,#ARMV7_NEON
|
||||
itt ne
|
||||
ldrne r10,[sp],#8
|
||||
bne .LNEON
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9}
|
||||
#else
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
|
||||
#endif
|
||||
mov r10,r0 @ reassign 1st argument
|
||||
mov r0,r3 @ r0=b1
|
||||
sub r7,sp,#36
|
||||
mov r8,sp
|
||||
and r7,r7,#-32
|
||||
ldr r3,[sp,#32] @ load b0
|
||||
mov r12,#7<<2
|
||||
mov sp,r7 @ allocate tab[8]
|
||||
str r8,[r7,#32]
|
||||
|
||||
bl mul_1x1_ialu @ a1·b1
|
||||
str r5,[r10,#8]
|
||||
str r4,[r10,#12]
|
||||
|
||||
eor r0,r0,r3 @ flip b0 and b1
|
||||
eor r1,r1,r2 @ flip a0 and a1
|
||||
eor r3,r3,r0
|
||||
eor r2,r2,r1
|
||||
eor r0,r0,r3
|
||||
eor r1,r1,r2
|
||||
bl mul_1x1_ialu @ a0·b0
|
||||
str r5,[r10]
|
||||
str r4,[r10,#4]
|
||||
|
||||
eor r1,r1,r2
|
||||
eor r0,r0,r3
|
||||
bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
|
||||
ldmia r10,{r6,r7,r8,r9}
|
||||
eor r5,r5,r4
|
||||
ldr sp,[sp,#32] @ destroy tab[8]
|
||||
eor r4,r4,r7
|
||||
eor r5,r5,r6
|
||||
eor r4,r4,r8
|
||||
eor r5,r5,r9
|
||||
eor r4,r4,r9
|
||||
str r4,[r10,#8]
|
||||
eor r5,r5,r4
|
||||
str r5,[r10,#4]
|
||||
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.align 5
|
||||
.LNEON:
|
||||
ldr r12, [sp] @ 5th argument
|
||||
vmov d26, r2, r1
|
||||
vmov d27, r12, r3
|
||||
vmov.i64 d28, #0x0000ffffffffffff
|
||||
vmov.i64 d29, #0x00000000ffffffff
|
||||
vmov.i64 d30, #0x000000000000ffff
|
||||
|
||||
vext.8 d2, d26, d26, #1 @ A1
|
||||
vmull.p8 q1, d2, d27 @ F = A1*B
|
||||
vext.8 d0, d27, d27, #1 @ B1
|
||||
vmull.p8 q0, d26, d0 @ E = A*B1
|
||||
vext.8 d4, d26, d26, #2 @ A2
|
||||
vmull.p8 q2, d4, d27 @ H = A2*B
|
||||
vext.8 d16, d27, d27, #2 @ B2
|
||||
vmull.p8 q8, d26, d16 @ G = A*B2
|
||||
vext.8 d6, d26, d26, #3 @ A3
|
||||
veor q1, q1, q0 @ L = E + F
|
||||
vmull.p8 q3, d6, d27 @ J = A3*B
|
||||
vext.8 d0, d27, d27, #3 @ B3
|
||||
veor q2, q2, q8 @ M = G + H
|
||||
vmull.p8 q0, d26, d0 @ I = A*B3
|
||||
veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d3, d3, d28
|
||||
vext.8 d16, d27, d27, #4 @ B4
|
||||
veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d5, d5, d29
|
||||
vmull.p8 q8, d26, d16 @ K = A*B4
|
||||
veor q3, q3, q0 @ N = I + J
|
||||
veor d2, d2, d3
|
||||
veor d4, d4, d5
|
||||
veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d7, d7, d30
|
||||
vext.8 q1, q1, q1, #15
|
||||
veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d17, #0
|
||||
vext.8 q2, q2, q2, #14
|
||||
veor d6, d6, d7
|
||||
vmull.p8 q0, d26, d27 @ D = A*B
|
||||
vext.8 q8, q8, q8, #12
|
||||
vext.8 q3, q3, q3, #13
|
||||
veor q1, q1, q2
|
||||
veor q3, q3, q8
|
||||
veor q0, q0, q1
|
||||
veor q0, q0, q3
|
||||
|
||||
vst1.32 {q0}, [r0]
|
||||
bx lr @ bx lr
|
||||
#endif
|
||||
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
.LOPENSSL_armcap:
|
||||
# ifdef _WIN32
|
||||
.word OPENSSL_armcap_P
|
||||
# else
|
||||
.word OPENSSL_armcap_P-.
|
||||
# endif
|
||||
#endif
|
||||
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 5
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
#endif
|
||||
@@ -1,961 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from armv4-mont.pl. */
|
||||
#include "arm_arch.h"
|
||||
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
.LOPENSSL_armcap:
|
||||
# ifdef _WIN32
|
||||
.word OPENSSL_armcap_P
|
||||
# else
|
||||
.word OPENSSL_armcap_P-.Lbn_mul_mont
|
||||
# endif
|
||||
#endif
|
||||
|
||||
.globl bn_mul_mont
|
||||
.type bn_mul_mont,%function
|
||||
|
||||
.align 5
|
||||
bn_mul_mont:
|
||||
.Lbn_mul_mont:
|
||||
ldr ip,[sp,#4] @ load num
|
||||
stmdb sp!,{r0,r2} @ sp points at argument block
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
tst ip,#7
|
||||
bne .Lialu
|
||||
ldr r0,.LOPENSSL_armcap
|
||||
#if !defined(_WIN32)
|
||||
adr r2,.Lbn_mul_mont
|
||||
ldr r0,[r0,r2]
|
||||
# endif
|
||||
# if defined(__APPLE__) || defined(_WIN32)
|
||||
ldr r0,[r0]
|
||||
# endif
|
||||
tst r0,#ARMV7_NEON @ NEON available?
|
||||
ldmia sp, {r0,r2}
|
||||
beq .Lialu
|
||||
add sp,sp,#8
|
||||
b bn_mul8x_mont_neon
|
||||
.align 4
|
||||
.Lialu:
|
||||
#endif
|
||||
cmp ip,#2
|
||||
mov r0,ip @ load num
|
||||
#ifdef __thumb2__
|
||||
ittt lt
|
||||
#endif
|
||||
movlt r0,#0
|
||||
addlt sp,sp,#2*4
|
||||
blt .Labrt
|
||||
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
|
||||
|
||||
mov r0,r0,lsl#2 @ rescale r0 for byte count
|
||||
sub sp,sp,r0 @ alloca(4*num)
|
||||
sub sp,sp,#4 @ +extra dword
|
||||
sub r0,r0,#4 @ "num=num-1"
|
||||
add r4,r2,r0 @ &bp[num-1]
|
||||
|
||||
add r0,sp,r0 @ r0 to point at &tp[num-1]
|
||||
ldr r8,[r0,#14*4] @ &n0
|
||||
ldr r2,[r2] @ bp[0]
|
||||
ldr r5,[r1],#4 @ ap[0],ap++
|
||||
ldr r6,[r3],#4 @ np[0],np++
|
||||
ldr r8,[r8] @ *n0
|
||||
str r4,[r0,#15*4] @ save &bp[num]
|
||||
|
||||
umull r10,r11,r5,r2 @ ap[0]*bp[0]
|
||||
str r8,[r0,#14*4] @ save n0 value
|
||||
mul r8,r10,r8 @ "tp[0]"*n0
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
|
||||
mov r4,sp
|
||||
|
||||
.L1st:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
mov r10,r11
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne .L1st
|
||||
|
||||
adds r12,r12,r11
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
mov r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
mov r7,sp
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
.Louter:
|
||||
sub r7,r0,r7 @ "original" r0-1 value
|
||||
sub r1,r1,r7 @ "rewind" ap to &ap[1]
|
||||
ldr r2,[r4,#4]! @ *(++bp)
|
||||
sub r3,r3,r7 @ "rewind" np to &np[1]
|
||||
ldr r5,[r1,#-4] @ ap[0]
|
||||
ldr r10,[sp] @ tp[0]
|
||||
ldr r6,[r3,#-4] @ np[0]
|
||||
ldr r7,[sp,#4] @ tp[1]
|
||||
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
|
||||
str r4,[r0,#13*4] @ save bp
|
||||
mul r8,r10,r8
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
|
||||
mov r4,sp
|
||||
|
||||
.Linner:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
adds r10,r11,r7 @ +=tp[j]
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adc r11,r11,#0
|
||||
ldr r7,[r4,#8] @ tp[j+1]
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne .Linner
|
||||
|
||||
adds r12,r12,r11
|
||||
mov r14,#0
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
adc r14,r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adds r12,r12,r7
|
||||
ldr r7,[r0,#15*4] @ restore &bp[num]
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
cmp r4,r7
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
movne r7,sp
|
||||
bne .Louter
|
||||
|
||||
ldr r2,[r0,#12*4] @ pull rp
|
||||
mov r5,sp
|
||||
add r0,r0,#4 @ r0 to point at &tp[num]
|
||||
sub r5,r0,r5 @ "original" num value
|
||||
mov r4,sp @ "rewind" r4
|
||||
mov r1,r4 @ "borrow" r1
|
||||
sub r3,r3,r5 @ "rewind" r3 to &np[0]
|
||||
|
||||
subs r7,r7,r7 @ "clear" carry flag
|
||||
.Lsub: ldr r7,[r4],#4
|
||||
ldr r6,[r3],#4
|
||||
sbcs r7,r7,r6 @ tp[j]-np[j]
|
||||
str r7,[r2],#4 @ rp[j]=
|
||||
teq r4,r0 @ preserve carry
|
||||
bne .Lsub
|
||||
sbcs r14,r14,#0 @ upmost carry
|
||||
mov r4,sp @ "rewind" r4
|
||||
sub r2,r2,r5 @ "rewind" r2
|
||||
|
||||
.Lcopy: ldr r7,[r4] @ conditional copy
|
||||
ldr r5,[r2]
|
||||
str sp,[r4],#4 @ zap tp
|
||||
#ifdef __thumb2__
|
||||
it cc
|
||||
#endif
|
||||
movcc r5,r7
|
||||
str r5,[r2],#4
|
||||
teq r4,r0 @ preserve carry
|
||||
bne .Lcopy
|
||||
|
||||
mov sp,r0
|
||||
add sp,sp,#4 @ skip over tp[num+1]
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
|
||||
add sp,sp,#2*4 @ skip over {r0,r2}
|
||||
mov r0,#1
|
||||
.Labrt:
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr @ bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size bn_mul_mont,.-bn_mul_mont
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.type bn_mul8x_mont_neon,%function
|
||||
.align 5
|
||||
bn_mul8x_mont_neon:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load rest of parameter block
|
||||
mov ip,sp
|
||||
|
||||
cmp r5,#8
|
||||
bhi .LNEON_8n
|
||||
|
||||
@ special case for r5==8, everything is in register bank...
|
||||
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
sub r7,sp,r5,lsl#4
|
||||
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
|
||||
and r7,r7,#-64
|
||||
vld1.32 {d30[0]}, [r4,:32]
|
||||
mov sp,r7 @ alloca
|
||||
vzip.16 d28,d8
|
||||
|
||||
vmull.u32 q6,d28,d0[0]
|
||||
vmull.u32 q7,d28,d0[1]
|
||||
vmull.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmull.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmull.u32 q10,d28,d2[0]
|
||||
vld1.32 {d4,d5,d6,d7}, [r3]!
|
||||
vmull.u32 q11,d28,d2[1]
|
||||
vmull.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmull.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
sub r9,r5,#1
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
b .LNEON_outer8
|
||||
|
||||
.align 4
|
||||
.LNEON_outer8:
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
vadd.u64 d12,d12,d10
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
subs r9,r9,#1
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
bne .LNEON_outer8
|
||||
|
||||
vadd.u64 d12,d12,d10
|
||||
mov r7,sp
|
||||
vshr.u64 d10,d12,#16
|
||||
mov r8,r5
|
||||
vadd.u64 d13,d13,d10
|
||||
add r6,sp,#96
|
||||
vshr.u64 d10,d13,#16
|
||||
vzip.16 d12,d13
|
||||
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
.LNEON_8n:
|
||||
veor q6,q6,q6
|
||||
sub r7,sp,#128
|
||||
veor q7,q7,q7
|
||||
sub r7,r7,r5,lsl#4
|
||||
veor q8,q8,q8
|
||||
and r7,r7,#-64
|
||||
veor q9,q9,q9
|
||||
mov sp,r7 @ alloca
|
||||
veor q10,q10,q10
|
||||
add r7,r7,#256
|
||||
veor q11,q11,q11
|
||||
sub r8,r5,#8
|
||||
veor q12,q12,q12
|
||||
veor q13,q13,q13
|
||||
|
||||
.LNEON_8n_init:
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
subs r8,r8,#8
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12,q13},[r7,:256]!
|
||||
bne .LNEON_8n_init
|
||||
|
||||
add r6,sp,#256
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
add r10,sp,#8
|
||||
vld1.32 {d30[0]},[r4,:32]
|
||||
mov r9,r5
|
||||
b .LNEON_8n_outer
|
||||
|
||||
.align 4
|
||||
.LNEON_8n_outer:
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
add r7,sp,#128
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
vadd.u64 d29,d29,d12
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vadd.u64 d12,d12,d13
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vadd.u64 d14,d14,d12
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]!
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
vshl.i64 d29,d15,#16
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vadd.u64 d29,d29,d14
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vadd.u64 d14,d14,d15
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vadd.u64 d16,d16,d14
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]!
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
vshl.i64 d29,d17,#16
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vadd.u64 d29,d29,d16
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vadd.u64 d16,d16,d17
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vadd.u64 d18,d18,d16
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]!
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
vshl.i64 d29,d19,#16
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vadd.u64 d29,d29,d18
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vadd.u64 d18,d18,d19
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vadd.u64 d20,d20,d18
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]!
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
vshl.i64 d29,d21,#16
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vadd.u64 d29,d29,d20
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vadd.u64 d20,d20,d21
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vadd.u64 d22,d22,d20
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]!
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
vshl.i64 d29,d23,#16
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vadd.u64 d29,d29,d22
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vadd.u64 d22,d22,d23
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vadd.u64 d24,d24,d22
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]!
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
vshl.i64 d29,d25,#16
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vadd.u64 d29,d29,d24
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vadd.u64 d24,d24,d25
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vadd.u64 d26,d26,d24
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]!
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
vshl.i64 d29,d27,#16
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vadd.u64 d29,d29,d26
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vadd.u64 d26,d26,d27
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
vadd.u64 d12,d12,d26
|
||||
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
|
||||
add r10,sp,#8 @ rewind
|
||||
sub r8,r5,#8
|
||||
b .LNEON_8n_inner
|
||||
|
||||
.align 4
|
||||
.LNEON_8n_inner:
|
||||
subs r8,r8,#8
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vld1.64 {q13},[r6,:128]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vst1.64 {q6},[r7,:128]!
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vst1.64 {q7},[r7,:128]!
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vst1.64 {q8},[r7,:128]!
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vst1.64 {q9},[r7,:128]!
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vst1.64 {q10},[r7,:128]!
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vst1.64 {q11},[r7,:128]!
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vst1.64 {q12},[r7,:128]!
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
it eq
|
||||
subeq r1,r1,r5,lsl#2 @ rewind
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
add r10,sp,#8 @ rewind
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vst1.64 {q13},[r7,:128]!
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
|
||||
bne .LNEON_8n_inner
|
||||
add r6,sp,#128
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
veor q2,q2,q2 @ d4-d5
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
veor q3,q3,q3 @ d6-d7
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12},[r7,:128]
|
||||
|
||||
subs r9,r9,#8
|
||||
vld1.64 {q6,q7},[r6,:256]!
|
||||
vld1.64 {q8,q9},[r6,:256]!
|
||||
vld1.64 {q10,q11},[r6,:256]!
|
||||
vld1.64 {q12,q13},[r6,:256]!
|
||||
|
||||
itt ne
|
||||
subne r3,r3,r5,lsl#2 @ rewind
|
||||
bne .LNEON_8n_outer
|
||||
|
||||
add r7,sp,#128
|
||||
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
|
||||
vshr.u64 d10,d12,#16
|
||||
vst1.64 {q2,q3},[sp,:256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
mov r8,r5
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
.LNEON_tail:
|
||||
vadd.u64 d12,d12,d10
|
||||
vshr.u64 d10,d12,#16
|
||||
vld1.64 {q8,q9}, [r6, :256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vld1.64 {q10,q11}, [r6, :256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vld1.64 {q12,q13}, [r6, :256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
.LNEON_tail_entry:
|
||||
vadd.u64 d14,d14,d10
|
||||
vst1.32 {d12[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d14,#16
|
||||
vadd.u64 d15,d15,d10
|
||||
vshr.u64 d10,d15,#16
|
||||
vzip.16 d14,d15
|
||||
vadd.u64 d16,d16,d10
|
||||
vst1.32 {d14[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d16,#16
|
||||
vadd.u64 d17,d17,d10
|
||||
vshr.u64 d10,d17,#16
|
||||
vzip.16 d16,d17
|
||||
vadd.u64 d18,d18,d10
|
||||
vst1.32 {d16[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d18,#16
|
||||
vadd.u64 d19,d19,d10
|
||||
vshr.u64 d10,d19,#16
|
||||
vzip.16 d18,d19
|
||||
vadd.u64 d20,d20,d10
|
||||
vst1.32 {d18[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d20,#16
|
||||
vadd.u64 d21,d21,d10
|
||||
vshr.u64 d10,d21,#16
|
||||
vzip.16 d20,d21
|
||||
vadd.u64 d22,d22,d10
|
||||
vst1.32 {d20[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d22,#16
|
||||
vadd.u64 d23,d23,d10
|
||||
vshr.u64 d10,d23,#16
|
||||
vzip.16 d22,d23
|
||||
vadd.u64 d24,d24,d10
|
||||
vst1.32 {d22[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d24,#16
|
||||
vadd.u64 d25,d25,d10
|
||||
vshr.u64 d10,d25,#16
|
||||
vzip.16 d24,d25
|
||||
vadd.u64 d26,d26,d10
|
||||
vst1.32 {d24[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d26,#16
|
||||
vadd.u64 d27,d27,d10
|
||||
vshr.u64 d10,d27,#16
|
||||
vzip.16 d26,d27
|
||||
vld1.64 {q6,q7}, [r6, :256]!
|
||||
subs r8,r8,#8
|
||||
vst1.32 {d26[0]}, [r7, :32]!
|
||||
bne .LNEON_tail
|
||||
|
||||
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
|
||||
sub r3,r3,r5,lsl#2 @ rewind r3
|
||||
subs r1,sp,#0 @ clear carry flag
|
||||
add r2,sp,r5,lsl#2
|
||||
|
||||
.LNEON_sub:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r3!, {r8,r9,r10,r11}
|
||||
sbcs r8, r4,r8
|
||||
sbcs r9, r5,r9
|
||||
sbcs r10,r6,r10
|
||||
sbcs r11,r7,r11
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne .LNEON_sub
|
||||
|
||||
ldr r10, [r1] @ load top-most bit
|
||||
mov r11,sp
|
||||
veor q0,q0,q0
|
||||
sub r11,r2,r11 @ this is num*4
|
||||
veor q1,q1,q1
|
||||
mov r1,sp
|
||||
sub r0,r0,r11 @ rewind r0
|
||||
mov r3,r2 @ second 3/4th of frame
|
||||
sbcs r10,r10,#0 @ result is carry flag
|
||||
|
||||
.LNEON_copy_n_zap:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
ldmia r1, {r4,r5,r6,r7}
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
sub r1,r1,#16
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r1,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne .LNEON_copy_n_zap
|
||||
|
||||
mov sp,ip
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
bx lr @ bx lr
|
||||
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
|
||||
#endif
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
#endif
|
||||
@@ -1,273 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from armv4cpuid.pl. */
|
||||
#include "arm_arch.h"
|
||||
|
||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
.align 5
|
||||
.globl OPENSSL_atomic_add
|
||||
.type OPENSSL_atomic_add,%function
|
||||
OPENSSL_atomic_add:
|
||||
#if __ARM_ARCH__>=6
|
||||
.Ladd: ldrex r2,[r0]
|
||||
add r3,r2,r1
|
||||
strex r2,r3,[r0]
|
||||
cmp r2,#0
|
||||
bne .Ladd
|
||||
mov r0,r3
|
||||
bx lr
|
||||
#else
|
||||
stmdb sp!,{r4,r5,r6,lr}
|
||||
ldr r2,.Lspinlock
|
||||
adr r3,.Lspinlock
|
||||
mov r4,r0
|
||||
mov r5,r1
|
||||
add r6,r3,r2 @ &spinlock
|
||||
b .+8
|
||||
.Lspin: bl sched_yield
|
||||
mov r0,#-1
|
||||
swp r0,r0,[r6]
|
||||
cmp r0,#0
|
||||
bne .Lspin
|
||||
|
||||
ldr r2,[r4]
|
||||
add r2,r2,r5
|
||||
str r2,[r4]
|
||||
str r0,[r6] @ release spinlock
|
||||
ldmia sp!,{r4,r5,r6,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
#endif
|
||||
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
||||
|
||||
.globl OPENSSL_cleanse
|
||||
.type OPENSSL_cleanse,%function
|
||||
OPENSSL_cleanse:
|
||||
eor ip,ip,ip
|
||||
cmp r1,#7
|
||||
#ifdef __thumb2__
|
||||
itt hs
|
||||
#endif
|
||||
subhs r1,r1,#4
|
||||
bhs .Lot
|
||||
cmp r1,#0
|
||||
beq .Lcleanse_done
|
||||
.Little:
|
||||
strb ip,[r0],#1
|
||||
subs r1,r1,#1
|
||||
bhi .Little
|
||||
b .Lcleanse_done
|
||||
|
||||
.Lot: tst r0,#3
|
||||
beq .Laligned
|
||||
strb ip,[r0],#1
|
||||
sub r1,r1,#1
|
||||
b .Lot
|
||||
.Laligned:
|
||||
str ip,[r0],#4
|
||||
subs r1,r1,#4
|
||||
bhs .Laligned
|
||||
adds r1,r1,#4
|
||||
bne .Little
|
||||
.Lcleanse_done:
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
#endif
|
||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||
|
||||
.globl CRYPTO_memcmp
|
||||
.type CRYPTO_memcmp,%function
|
||||
.align 4
|
||||
CRYPTO_memcmp:
|
||||
eor ip,ip,ip
|
||||
cmp r2,#0
|
||||
beq .Lno_data
|
||||
stmdb sp!,{r4,r5}
|
||||
|
||||
.Loop_cmp:
|
||||
ldrb r4,[r0],#1
|
||||
ldrb r5,[r1],#1
|
||||
eor r4,r4,r5
|
||||
orr ip,ip,r4
|
||||
subs r2,r2,#1
|
||||
bne .Loop_cmp
|
||||
|
||||
ldmia sp!,{r4,r5}
|
||||
.Lno_data:
|
||||
rsb r0,ip,#0
|
||||
mov r0,r0,lsr#31
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
#endif
|
||||
.size CRYPTO_memcmp,.-CRYPTO_memcmp
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.align 5
|
||||
.globl _armv7_neon_probe
|
||||
.type _armv7_neon_probe,%function
|
||||
_armv7_neon_probe:
|
||||
vorr q0,q0,q0
|
||||
bx lr
|
||||
.size _armv7_neon_probe,.-_armv7_neon_probe
|
||||
|
||||
.globl _armv7_tick
|
||||
.type _armv7_tick,%function
|
||||
_armv7_tick:
|
||||
#ifdef __APPLE__
|
||||
mrrc p15,0,r0,r1,c14 @ CNTPCT
|
||||
#else
|
||||
mrrc p15,1,r0,r1,c14 @ CNTVCT
|
||||
#endif
|
||||
bx lr
|
||||
.size _armv7_tick,.-_armv7_tick
|
||||
|
||||
.globl _armv8_aes_probe
|
||||
.type _armv8_aes_probe,%function
|
||||
_armv8_aes_probe:
|
||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||
.byte 0xb0,0xff,0x00,0x03 @ aese.8 q0,q0
|
||||
#else
|
||||
.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
|
||||
#endif
|
||||
bx lr
|
||||
.size _armv8_aes_probe,.-_armv8_aes_probe
|
||||
|
||||
.globl _armv8_sha1_probe
|
||||
.type _armv8_sha1_probe,%function
|
||||
_armv8_sha1_probe:
|
||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||
.byte 0x00,0xef,0x40,0x0c @ sha1c.32 q0,q0,q0
|
||||
#else
|
||||
.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
|
||||
#endif
|
||||
bx lr
|
||||
.size _armv8_sha1_probe,.-_armv8_sha1_probe
|
||||
|
||||
.globl _armv8_sha256_probe
|
||||
.type _armv8_sha256_probe,%function
|
||||
_armv8_sha256_probe:
|
||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||
.byte 0x00,0xff,0x40,0x0c @ sha256h.32 q0,q0,q0
|
||||
#else
|
||||
.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
|
||||
#endif
|
||||
bx lr
|
||||
.size _armv8_sha256_probe,.-_armv8_sha256_probe
|
||||
.globl _armv8_pmull_probe
|
||||
.type _armv8_pmull_probe,%function
|
||||
_armv8_pmull_probe:
|
||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||
.byte 0xa0,0xef,0x00,0x0e @ vmull.p64 q0,d0,d0
|
||||
#else
|
||||
.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
|
||||
#endif
|
||||
bx lr
|
||||
.size _armv8_pmull_probe,.-_armv8_pmull_probe
|
||||
#endif
|
||||
|
||||
.globl OPENSSL_wipe_cpu
|
||||
.type OPENSSL_wipe_cpu,%function
|
||||
OPENSSL_wipe_cpu:
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
ldr r0,.LOPENSSL_armcap
|
||||
adr r1,.LOPENSSL_armcap
|
||||
ldr r0,[r1,r0]
|
||||
#ifdef __APPLE__
|
||||
ldr r0,[r0]
|
||||
#endif
|
||||
#endif
|
||||
eor r2,r2,r2
|
||||
eor r3,r3,r3
|
||||
eor ip,ip,ip
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
tst r0,#1
|
||||
beq .Lwipe_done
|
||||
veor q0, q0, q0
|
||||
veor q1, q1, q1
|
||||
veor q2, q2, q2
|
||||
veor q3, q3, q3
|
||||
veor q8, q8, q8
|
||||
veor q9, q9, q9
|
||||
veor q10, q10, q10
|
||||
veor q11, q11, q11
|
||||
veor q12, q12, q12
|
||||
veor q13, q13, q13
|
||||
veor q14, q14, q14
|
||||
veor q15, q15, q15
|
||||
.Lwipe_done:
|
||||
#endif
|
||||
mov r0,sp
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
#endif
|
||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||
|
||||
.globl OPENSSL_instrument_bus
|
||||
.type OPENSSL_instrument_bus,%function
|
||||
OPENSSL_instrument_bus:
|
||||
eor r0,r0,r0
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
#endif
|
||||
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
|
||||
|
||||
.globl OPENSSL_instrument_bus2
|
||||
.type OPENSSL_instrument_bus2,%function
|
||||
OPENSSL_instrument_bus2:
|
||||
eor r0,r0,r0
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr
|
||||
.word 0xe12fff1e @ bx lr
|
||||
#endif
|
||||
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
|
||||
|
||||
.align 5
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-.
|
||||
#endif
|
||||
#if __ARM_ARCH__>=6
|
||||
.align 5
|
||||
#else
|
||||
.Lspinlock:
|
||||
.word atomic_add_spinlock-.Lspinlock
|
||||
.align 5
|
||||
|
||||
.data
|
||||
.align 2
|
||||
atomic_add_spinlock:
|
||||
.word 0
|
||||
#endif
|
||||
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,565 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from ghash-armv4.pl. */
|
||||
#include "arm_arch.h"
|
||||
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
.syntax unified
|
||||
#define ldrplb ldrbpl
|
||||
#define ldrneb ldrbne
|
||||
#endif
|
||||
#if defined(__thumb2__)
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
.type rem_4bit,%object
|
||||
.align 5
|
||||
rem_4bit:
|
||||
.short 0x0000,0x1C20,0x3840,0x2460
|
||||
.short 0x7080,0x6CA0,0x48C0,0x54E0
|
||||
.short 0xE100,0xFD20,0xD940,0xC560
|
||||
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
|
||||
.size rem_4bit,.-rem_4bit
|
||||
|
||||
.type rem_4bit_get,%function
|
||||
rem_4bit_get:
|
||||
#if defined(__thumb2__)
|
||||
adr r2,rem_4bit
|
||||
#else
|
||||
sub r2,pc,#8+32 @ &rem_4bit
|
||||
#endif
|
||||
b .Lrem_4bit_got
|
||||
nop
|
||||
nop
|
||||
.size rem_4bit_get,.-rem_4bit_get
|
||||
|
||||
.globl gcm_ghash_4bit
|
||||
.type gcm_ghash_4bit,%function
|
||||
.align 4
|
||||
gcm_ghash_4bit:
|
||||
#if defined(__thumb2__)
|
||||
adr r12,rem_4bit
|
||||
#else
|
||||
sub r12,pc,#8+48 @ &rem_4bit
|
||||
#endif
|
||||
add r3,r2,r3 @ r3 to point at the end
|
||||
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
|
||||
|
||||
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
|
||||
|
||||
ldrb r12,[r2,#15]
|
||||
ldrb r14,[r0,#15]
|
||||
.Louter:
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
add r11,r1,r14
|
||||
ldrb r12,[r2,#14]
|
||||
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[sp,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
ldrb r14,[r0,#14]
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16
|
||||
|
||||
.Linner:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[sp,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r2,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r8,[r0,r3]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r9,[sp,r14]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
eorpl r12,r12,r8
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl .Linner
|
||||
|
||||
ldr r3,[sp,#32] @ re-load r3/end
|
||||
add r2,r2,#16
|
||||
mov r14,r4
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
cmp r2,r3
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#ifdef __thumb2__
|
||||
it ne
|
||||
#endif
|
||||
ldrneb r12,[r2,#15]
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
bne .Louter
|
||||
|
||||
add sp,sp,#36
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size gcm_ghash_4bit,.-gcm_ghash_4bit
|
||||
|
||||
.globl gcm_gmult_4bit
|
||||
.type gcm_gmult_4bit,%function
|
||||
gcm_gmult_4bit:
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
ldrb r12,[r0,#15]
|
||||
b rem_4bit_get
|
||||
.Lrem_4bit_got:
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
ldrb r12,[r0,#14]
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
and r14,r12,#0xf0
|
||||
eor r7,r7,r8,lsl#16
|
||||
and r12,r12,#0x0f
|
||||
|
||||
.Loop:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[r2,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r0,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl .Loop
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size gcm_gmult_4bit,.-gcm_gmult_4bit
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.globl gcm_init_neon
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
vld1.64 d7,[r1]! @ load H
|
||||
vmov.i8 q8,#0xe1
|
||||
vld1.64 d6,[r1]
|
||||
vshl.i64 d17,#57
|
||||
vshr.u64 d16,#63 @ t0=0xc2....01
|
||||
vdup.8 q9,d7[7]
|
||||
vshr.u64 d26,d6,#63
|
||||
vshr.s8 q9,#7 @ broadcast carry bit
|
||||
vshl.i64 q3,q3,#1
|
||||
vand q8,q8,q9
|
||||
vorr d7,d26 @ H<<<=1
|
||||
veor q3,q3,q8 @ twisted H
|
||||
vstmia r0,{q3}
|
||||
|
||||
bx lr @ bx lr
|
||||
.size gcm_init_neon,.-gcm_init_neon
|
||||
|
||||
.globl gcm_gmult_neon
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
vld1.64 d7,[r0]! @ load Xi
|
||||
vld1.64 d6,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
mov r3,#16
|
||||
b .Lgmult_neon
|
||||
.size gcm_gmult_neon,.-gcm_gmult_neon
|
||||
|
||||
.globl gcm_ghash_neon
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
vld1.64 d1,[r0]! @ load Xi
|
||||
vld1.64 d0,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
|
||||
.Loop_neon:
|
||||
vld1.64 d7,[r2]! @ load inp
|
||||
vld1.64 d6,[r2]!
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
veor q3,q0 @ inp^=Xi
|
||||
.Lgmult_neon:
|
||||
vext.8 d16, d26, d26, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d0, d6, d6, #1 @ B1
|
||||
vmull.p8 q0, d26, d0 @ E = A*B1
|
||||
vext.8 d18, d26, d26, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d26, d22 @ G = A*B2
|
||||
vext.8 d20, d26, d26, #3 @ A3
|
||||
veor q8, q8, q0 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d0, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q0, d26, d0 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d26, d22 @ K = A*B4
|
||||
veor q10, q10, q0 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q0, d26, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q0, q0, q8
|
||||
veor q0, q0, q10
|
||||
veor d6,d6,d7 @ Karatsuba pre-processing
|
||||
vext.8 d16, d28, d28, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d2, d6, d6, #1 @ B1
|
||||
vmull.p8 q1, d28, d2 @ E = A*B1
|
||||
vext.8 d18, d28, d28, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d28, d22 @ G = A*B2
|
||||
vext.8 d20, d28, d28, #3 @ A3
|
||||
veor q8, q8, q1 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d2, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q1, d28, d2 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d28, d22 @ K = A*B4
|
||||
veor q10, q10, q1 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q1, d28, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q1, q1, q8
|
||||
veor q1, q1, q10
|
||||
vext.8 d16, d27, d27, #1 @ A1
|
||||
vmull.p8 q8, d16, d7 @ F = A1*B
|
||||
vext.8 d4, d7, d7, #1 @ B1
|
||||
vmull.p8 q2, d27, d4 @ E = A*B1
|
||||
vext.8 d18, d27, d27, #2 @ A2
|
||||
vmull.p8 q9, d18, d7 @ H = A2*B
|
||||
vext.8 d22, d7, d7, #2 @ B2
|
||||
vmull.p8 q11, d27, d22 @ G = A*B2
|
||||
vext.8 d20, d27, d27, #3 @ A3
|
||||
veor q8, q8, q2 @ L = E + F
|
||||
vmull.p8 q10, d20, d7 @ J = A3*B
|
||||
vext.8 d4, d7, d7, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q2, d27, d4 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d7, d7, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d27, d22 @ K = A*B4
|
||||
veor q10, q10, q2 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q2, d27, d7 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q2, q2, q8
|
||||
veor q2, q2, q10
|
||||
veor q1,q1,q0 @ Karatsuba post-processing
|
||||
veor q1,q1,q2
|
||||
veor d1,d1,d2
|
||||
veor d4,d4,d3 @ Xh|Xl - 256-bit result
|
||||
|
||||
@ equivalent of reduction_avx from ghash-x86_64.pl
|
||||
vshl.i64 q9,q0,#57 @ 1st phase
|
||||
vshl.i64 q10,q0,#62
|
||||
veor q10,q10,q9 @
|
||||
vshl.i64 q9,q0,#63
|
||||
veor q10, q10, q9 @
|
||||
veor d1,d1,d20 @
|
||||
veor d4,d4,d21
|
||||
|
||||
vshr.u64 q10,q0,#1 @ 2nd phase
|
||||
veor q2,q2,q0
|
||||
veor q0,q0,q10 @
|
||||
vshr.u64 q10,q10,#6
|
||||
vshr.u64 q0,q0,#1 @
|
||||
veor q0,q0,q2 @
|
||||
veor q0,q0,q10 @
|
||||
|
||||
subs r3,#16
|
||||
bne .Loop_neon
|
||||
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
sub r0,#16
|
||||
vst1.64 d1,[r0]! @ write out Xi
|
||||
vst1.64 d0,[r0]
|
||||
|
||||
bx lr @ bx lr
|
||||
.size gcm_ghash_neon,.-gcm_ghash_neon
|
||||
#endif
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
@@ -1,244 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from ghashv8-armx.pl. */
|
||||
#include "arm_arch.h"
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.fpu neon
|
||||
#ifdef __thumb2__
|
||||
.syntax unified
|
||||
.thumb
|
||||
# define INST(a,b,c,d) .byte c,0xef,a,b
|
||||
#else
|
||||
.code 32
|
||||
# define INST(a,b,c,d) .byte a,b,c,0xf2
|
||||
#endif
|
||||
|
||||
.text
|
||||
.globl gcm_init_v8
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
vld1.64 {q9},[r1] @ load input H
|
||||
vmov.i8 q11,#0xe1
|
||||
vshl.i64 q11,q11,#57 @ 0xc2.0
|
||||
vext.8 q3,q9,q9,#8
|
||||
vshr.u64 q10,q11,#63
|
||||
vdup.32 q9,d18[1]
|
||||
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
|
||||
vshr.u64 q10,q3,#63
|
||||
vshr.s32 q9,q9,#31 @ broadcast carry bit
|
||||
vand q10,q10,q8
|
||||
vshl.i64 q3,q3,#1
|
||||
vext.8 q10,q10,q10,#8
|
||||
vand q8,q8,q9
|
||||
vorr q3,q3,q10 @ H<<<=1
|
||||
veor q12,q3,q8 @ twisted H
|
||||
vst1.64 {q12},[r0]! @ store Htable[0]
|
||||
|
||||
@ calculate H^2
|
||||
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
|
||||
INST(0xa8,0x0e,0xa8,0xf2) @ pmull q0,q12,q12
|
||||
veor q8,q8,q12
|
||||
INST(0xa9,0x4e,0xa9,0xf2) @ pmull2 q2,q12,q12
|
||||
INST(0xa0,0x2e,0xa0,0xf2) @ pmull q1,q8,q8
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase
|
||||
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q14,q0,q10
|
||||
|
||||
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
|
||||
veor q9,q9,q14
|
||||
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {q13,q14},[r0]! @ store Htable[1..2]
|
||||
bx lr
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
vld1.64 {q9},[r0] @ load Xi
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q12,q13},[r1] @ load twisted H, ...
|
||||
vshl.u64 q11,q11,#57
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q3,q9,q9,#8
|
||||
|
||||
INST(0x86,0x0e,0xa8,0xf2) @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
INST(0x87,0x4e,0xa9,0xf2) @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
INST(0xa2,0x2e,0xaa,0xf2) @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
bx lr
|
||||
.size gcm_gmult_v8,.-gcm_gmult_v8
|
||||
.globl gcm_ghash_v8
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
vld1.64 {q0},[r0] @ load [rotated] Xi
|
||||
@ "[rotated]" means that
|
||||
@ loaded value would have
|
||||
@ to be rotated in order to
|
||||
@ make it appear as in
|
||||
@ algorithm specification
|
||||
subs r3,r3,#32 @ see if r3 is 32 or larger
|
||||
mov r12,#16 @ r12 is used as post-
|
||||
@ increment for input pointer;
|
||||
@ as loop is modulo-scheduled
|
||||
@ r12 is zeroed just in time
|
||||
@ to preclude overstepping
|
||||
@ inp[len], which means that
|
||||
@ last block[s] are actually
|
||||
@ loaded twice, but last
|
||||
@ copy is not processed
|
||||
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q14},[r1]
|
||||
it eq
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
vext.8 q0,q0,q0,#8 @ rotate Xi
|
||||
vld1.64 {q8},[r2]! @ load [rotated] I[0]
|
||||
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q3,q8,q8,#8 @ rotate I[0]
|
||||
blo .Lodd_tail_v8 @ r3 was less than 32
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q7,q9,q9,#8
|
||||
veor q3,q3,q0 @ I[i]^=Xi
|
||||
INST(0x8e,0x8e,0xa8,0xf2) @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
INST(0x8f,0xce,0xa9,0xf2) @ pmull2 q6,q12,q7
|
||||
b .Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
.Loop_mod2x_v8:
|
||||
vext.8 q10,q3,q3,#8
|
||||
subs r3,r3,#32 @ is there more data?
|
||||
INST(0x86,0x0e,0xac,0xf2) @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
|
||||
it lo
|
||||
movlo r12,#0 @ is it time to zero r12?
|
||||
|
||||
INST(0xa2,0xae,0xaa,0xf2) @ pmull q5,q13,q9
|
||||
veor q10,q10,q3 @ Karatsuba pre-processing
|
||||
INST(0x87,0x4e,0xad,0xf2) @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
|
||||
veor q0,q0,q4 @ accumulate
|
||||
INST(0xa5,0x2e,0xab,0xf2) @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
|
||||
|
||||
veor q2,q2,q6
|
||||
it eq
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
veor q1,q1,q5
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
#endif
|
||||
veor q1,q1,q10
|
||||
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
vext.8 q7,q9,q9,#8
|
||||
vext.8 q3,q8,q8,#8
|
||||
veor q0,q1,q10
|
||||
INST(0x8e,0x8e,0xa8,0xf2) @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q3,q3,q2 @ accumulate q3 early
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
|
||||
veor q3,q3,q10
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
veor q3,q3,q0
|
||||
INST(0x8f,0xce,0xa9,0xf2) @ pmull2 q6,q12,q7
|
||||
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
|
||||
|
||||
veor q2,q2,q10
|
||||
vext.8 q3,q8,q8,#8 @ re-construct q3
|
||||
adds r3,r3,#32 @ re-construct r3
|
||||
veor q0,q0,q2 @ re-construct q0
|
||||
beq .Ldone_v8 @ is r3 zero?
|
||||
.Lodd_tail_v8:
|
||||
vext.8 q10,q0,q0,#8
|
||||
veor q3,q3,q0 @ inp^=Xi
|
||||
veor q9,q8,q10 @ q9 is rotated inp^Xi
|
||||
|
||||
INST(0x86,0x0e,0xa8,0xf2) @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
INST(0x87,0x4e,0xa9,0xf2) @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
INST(0xa2,0x2e,0xaa,0xf2) @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
.Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
bx lr
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,819 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from rc4-586.pl. */
|
||||
#ifdef PIC
|
||||
.text
|
||||
.globl RC4
|
||||
.type RC4,@function
|
||||
.align 16
|
||||
RC4:
|
||||
.L_RC4_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%edx
|
||||
movl 28(%esp),%esi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
xorl %ebx,%ebx
|
||||
cmpl $0,%edx
|
||||
je .L000abort
|
||||
movb (%edi),%al
|
||||
movb 4(%edi),%bl
|
||||
addl $8,%edi
|
||||
leal (%esi,%edx,1),%ecx
|
||||
subl %esi,%ebp
|
||||
movl %ecx,24(%esp)
|
||||
incb %al
|
||||
cmpl $-1,256(%edi)
|
||||
je .L001RC4_CHAR
|
||||
movl (%edi,%eax,4),%ecx
|
||||
andl $-4,%edx
|
||||
jz .L002loop1
|
||||
movl %ebp,32(%esp)
|
||||
testl $-8,%edx
|
||||
jz .L003go4loop4
|
||||
call .L004PIC_me_up
|
||||
.L004PIC_me_up:
|
||||
popl %ebp
|
||||
leal OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp
|
||||
btl $26,(%ebp)
|
||||
jnc .L003go4loop4
|
||||
movl 32(%esp),%ebp
|
||||
andl $-8,%edx
|
||||
leal -8(%esi,%edx,1),%edx
|
||||
movl %edx,-4(%edi)
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
movq (%esi),%mm0
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm2
|
||||
jmp .L005loop_mmx_enter
|
||||
.align 16
|
||||
.L006loop_mmx:
|
||||
addb %cl,%bl
|
||||
psllq $56,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movq (%esi),%mm0
|
||||
movq %mm2,-8(%ebp,%esi,1)
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm2
|
||||
.L005loop_mmx_enter:
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm0,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $8,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $16,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $24,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $32,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $40,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $48,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
movl %ebx,%edx
|
||||
xorl %ebx,%ebx
|
||||
movb %dl,%bl
|
||||
cmpl -4(%edi),%esi
|
||||
leal 8(%esi),%esi
|
||||
jb .L006loop_mmx
|
||||
psllq $56,%mm1
|
||||
pxor %mm1,%mm2
|
||||
movq %mm2,-8(%ebp,%esi,1)
|
||||
emms
|
||||
cmpl 24(%esp),%esi
|
||||
je .L007done
|
||||
jmp .L002loop1
|
||||
.align 16
|
||||
.L003go4loop4:
|
||||
leal -4(%esi,%edx,1),%edx
|
||||
movl %edx,28(%esp)
|
||||
.L008loop4:
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movl (%edi,%edx,4),%ebp
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
rorl $8,%ebp
|
||||
movl (%edi,%eax,4),%ecx
|
||||
orl (%edi,%edx,4),%ebp
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
rorl $8,%ebp
|
||||
movl (%edi,%eax,4),%ecx
|
||||
orl (%edi,%edx,4),%ebp
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
rorl $8,%ebp
|
||||
movl 32(%esp),%ecx
|
||||
orl (%edi,%edx,4),%ebp
|
||||
rorl $8,%ebp
|
||||
xorl (%esi),%ebp
|
||||
cmpl 28(%esp),%esi
|
||||
movl %ebp,(%ecx,%esi,1)
|
||||
leal 4(%esi),%esi
|
||||
movl (%edi,%eax,4),%ecx
|
||||
jb .L008loop4
|
||||
cmpl 24(%esp),%esi
|
||||
je .L007done
|
||||
movl 32(%esp),%ebp
|
||||
.align 16
|
||||
.L002loop1:
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
movl (%edi,%edx,4),%edx
|
||||
xorb (%esi),%dl
|
||||
leal 1(%esi),%esi
|
||||
movl (%edi,%eax,4),%ecx
|
||||
cmpl 24(%esp),%esi
|
||||
movb %dl,-1(%ebp,%esi,1)
|
||||
jb .L002loop1
|
||||
jmp .L007done
|
||||
.align 16
|
||||
.L001RC4_CHAR:
|
||||
movzbl (%edi,%eax,1),%ecx
|
||||
.L009cloop1:
|
||||
addb %cl,%bl
|
||||
movzbl (%edi,%ebx,1),%edx
|
||||
movb %cl,(%edi,%ebx,1)
|
||||
movb %dl,(%edi,%eax,1)
|
||||
addb %cl,%dl
|
||||
movzbl (%edi,%edx,1),%edx
|
||||
addb $1,%al
|
||||
xorb (%esi),%dl
|
||||
leal 1(%esi),%esi
|
||||
movzbl (%edi,%eax,1),%ecx
|
||||
cmpl 24(%esp),%esi
|
||||
movb %dl,-1(%ebp,%esi,1)
|
||||
jb .L009cloop1
|
||||
.L007done:
|
||||
decb %al
|
||||
movl %ebx,-4(%edi)
|
||||
movb %al,-8(%edi)
|
||||
.L000abort:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size RC4,.-.L_RC4_begin
|
||||
.globl RC4_set_key
|
||||
.type RC4_set_key,@function
|
||||
.align 16
|
||||
RC4_set_key:
|
||||
.L_RC4_set_key_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%ebp
|
||||
movl 28(%esp),%esi
|
||||
call .L010PIC_me_up
|
||||
.L010PIC_me_up:
|
||||
popl %edx
|
||||
leal OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx
|
||||
leal 8(%edi),%edi
|
||||
leal (%esi,%ebp,1),%esi
|
||||
negl %ebp
|
||||
xorl %eax,%eax
|
||||
movl %ebp,-4(%edi)
|
||||
btl $20,(%edx)
|
||||
jc .L011c1stloop
|
||||
.align 16
|
||||
.L012w1stloop:
|
||||
movl %eax,(%edi,%eax,4)
|
||||
addb $1,%al
|
||||
jnc .L012w1stloop
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L013w2ndloop:
|
||||
movl (%edi,%ecx,4),%eax
|
||||
addb (%esi,%ebp,1),%dl
|
||||
addb %al,%dl
|
||||
addl $1,%ebp
|
||||
movl (%edi,%edx,4),%ebx
|
||||
jnz .L014wnowrap
|
||||
movl -4(%edi),%ebp
|
||||
.L014wnowrap:
|
||||
movl %eax,(%edi,%edx,4)
|
||||
movl %ebx,(%edi,%ecx,4)
|
||||
addb $1,%cl
|
||||
jnc .L013w2ndloop
|
||||
jmp .L015exit
|
||||
.align 16
|
||||
.L011c1stloop:
|
||||
movb %al,(%edi,%eax,1)
|
||||
addb $1,%al
|
||||
jnc .L011c1stloop
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
xorl %ebx,%ebx
|
||||
.align 16
|
||||
.L016c2ndloop:
|
||||
movb (%edi,%ecx,1),%al
|
||||
addb (%esi,%ebp,1),%dl
|
||||
addb %al,%dl
|
||||
addl $1,%ebp
|
||||
movb (%edi,%edx,1),%bl
|
||||
jnz .L017cnowrap
|
||||
movl -4(%edi),%ebp
|
||||
.L017cnowrap:
|
||||
movb %al,(%edi,%edx,1)
|
||||
movb %bl,(%edi,%ecx,1)
|
||||
addb $1,%cl
|
||||
jnc .L016c2ndloop
|
||||
movl $-1,256(%edi)
|
||||
.L015exit:
|
||||
xorl %eax,%eax
|
||||
movl %eax,-8(%edi)
|
||||
movl %eax,-4(%edi)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size RC4_set_key,.-.L_RC4_set_key_begin
|
||||
.globl RC4_options
|
||||
.type RC4_options,@function
|
||||
.align 16
|
||||
RC4_options:
|
||||
.L_RC4_options_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
call .L018pic_point
|
||||
.L018pic_point:
|
||||
popl %eax
|
||||
leal .L019opts-.L018pic_point(%eax),%eax
|
||||
call .L020PIC_me_up
|
||||
.L020PIC_me_up:
|
||||
popl %edx
|
||||
leal OPENSSL_ia32cap_P-.L020PIC_me_up(%edx),%edx
|
||||
movl (%edx),%edx
|
||||
btl $20,%edx
|
||||
jc .L0211xchar
|
||||
btl $26,%edx
|
||||
jnc .L022ret
|
||||
addl $25,%eax
|
||||
ret
|
||||
.L0211xchar:
|
||||
addl $12,%eax
|
||||
.L022ret:
|
||||
ret
|
||||
.align 64
|
||||
.L019opts:
|
||||
.byte 114,99,52,40,52,120,44,105,110,116,41,0
|
||||
.byte 114,99,52,40,49,120,44,99,104,97,114,41,0
|
||||
.byte 114,99,52,40,56,120,44,109,109,120,41,0
|
||||
.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
|
||||
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
|
||||
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
.size RC4_options,.-.L_RC4_options_begin
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 2
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
.asciz "GNU"
|
||||
1:
|
||||
.p2align 2
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 2
|
||||
4:
|
||||
#else
|
||||
.text
|
||||
.globl RC4
|
||||
.type RC4,@function
|
||||
.align 16
|
||||
RC4:
|
||||
.L_RC4_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%edx
|
||||
movl 28(%esp),%esi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
xorl %ebx,%ebx
|
||||
cmpl $0,%edx
|
||||
je .L000abort
|
||||
movb (%edi),%al
|
||||
movb 4(%edi),%bl
|
||||
addl $8,%edi
|
||||
leal (%esi,%edx,1),%ecx
|
||||
subl %esi,%ebp
|
||||
movl %ecx,24(%esp)
|
||||
incb %al
|
||||
cmpl $-1,256(%edi)
|
||||
je .L001RC4_CHAR
|
||||
movl (%edi,%eax,4),%ecx
|
||||
andl $-4,%edx
|
||||
jz .L002loop1
|
||||
movl %ebp,32(%esp)
|
||||
testl $-8,%edx
|
||||
jz .L003go4loop4
|
||||
leal OPENSSL_ia32cap_P,%ebp
|
||||
btl $26,(%ebp)
|
||||
jnc .L003go4loop4
|
||||
movl 32(%esp),%ebp
|
||||
andl $-8,%edx
|
||||
leal -8(%esi,%edx,1),%edx
|
||||
movl %edx,-4(%edi)
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
movq (%esi),%mm0
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm2
|
||||
jmp .L004loop_mmx_enter
|
||||
.align 16
|
||||
.L005loop_mmx:
|
||||
addb %cl,%bl
|
||||
psllq $56,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movq (%esi),%mm0
|
||||
movq %mm2,-8(%ebp,%esi,1)
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm2
|
||||
.L004loop_mmx_enter:
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm0,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $8,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $16,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $24,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $32,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $40,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
addb %cl,%bl
|
||||
psllq $48,%mm1
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
incl %eax
|
||||
addl %ecx,%edx
|
||||
movzbl %al,%eax
|
||||
movzbl %dl,%edx
|
||||
pxor %mm1,%mm2
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movd (%edi,%edx,4),%mm1
|
||||
movl %ebx,%edx
|
||||
xorl %ebx,%ebx
|
||||
movb %dl,%bl
|
||||
cmpl -4(%edi),%esi
|
||||
leal 8(%esi),%esi
|
||||
jb .L005loop_mmx
|
||||
psllq $56,%mm1
|
||||
pxor %mm1,%mm2
|
||||
movq %mm2,-8(%ebp,%esi,1)
|
||||
emms
|
||||
cmpl 24(%esp),%esi
|
||||
je .L006done
|
||||
jmp .L002loop1
|
||||
.align 16
|
||||
.L003go4loop4:
|
||||
leal -4(%esi,%edx,1),%edx
|
||||
movl %edx,28(%esp)
|
||||
.L007loop4:
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
movl (%edi,%eax,4),%ecx
|
||||
movl (%edi,%edx,4),%ebp
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
rorl $8,%ebp
|
||||
movl (%edi,%eax,4),%ecx
|
||||
orl (%edi,%edx,4),%ebp
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
rorl $8,%ebp
|
||||
movl (%edi,%eax,4),%ecx
|
||||
orl (%edi,%edx,4),%ebp
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
rorl $8,%ebp
|
||||
movl 32(%esp),%ecx
|
||||
orl (%edi,%edx,4),%ebp
|
||||
rorl $8,%ebp
|
||||
xorl (%esi),%ebp
|
||||
cmpl 28(%esp),%esi
|
||||
movl %ebp,(%ecx,%esi,1)
|
||||
leal 4(%esi),%esi
|
||||
movl (%edi,%eax,4),%ecx
|
||||
jb .L007loop4
|
||||
cmpl 24(%esp),%esi
|
||||
je .L006done
|
||||
movl 32(%esp),%ebp
|
||||
.align 16
|
||||
.L002loop1:
|
||||
addb %cl,%bl
|
||||
movl (%edi,%ebx,4),%edx
|
||||
movl %ecx,(%edi,%ebx,4)
|
||||
movl %edx,(%edi,%eax,4)
|
||||
addl %ecx,%edx
|
||||
incb %al
|
||||
andl $255,%edx
|
||||
movl (%edi,%edx,4),%edx
|
||||
xorb (%esi),%dl
|
||||
leal 1(%esi),%esi
|
||||
movl (%edi,%eax,4),%ecx
|
||||
cmpl 24(%esp),%esi
|
||||
movb %dl,-1(%ebp,%esi,1)
|
||||
jb .L002loop1
|
||||
jmp .L006done
|
||||
.align 16
|
||||
.L001RC4_CHAR:
|
||||
movzbl (%edi,%eax,1),%ecx
|
||||
.L008cloop1:
|
||||
addb %cl,%bl
|
||||
movzbl (%edi,%ebx,1),%edx
|
||||
movb %cl,(%edi,%ebx,1)
|
||||
movb %dl,(%edi,%eax,1)
|
||||
addb %cl,%dl
|
||||
movzbl (%edi,%edx,1),%edx
|
||||
addb $1,%al
|
||||
xorb (%esi),%dl
|
||||
leal 1(%esi),%esi
|
||||
movzbl (%edi,%eax,1),%ecx
|
||||
cmpl 24(%esp),%esi
|
||||
movb %dl,-1(%ebp,%esi,1)
|
||||
jb .L008cloop1
|
||||
.L006done:
|
||||
decb %al
|
||||
movl %ebx,-4(%edi)
|
||||
movb %al,-8(%edi)
|
||||
.L000abort:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size RC4,.-.L_RC4_begin
|
||||
.globl RC4_set_key
|
||||
.type RC4_set_key,@function
|
||||
.align 16
|
||||
RC4_set_key:
|
||||
.L_RC4_set_key_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%ebp
|
||||
movl 28(%esp),%esi
|
||||
leal OPENSSL_ia32cap_P,%edx
|
||||
leal 8(%edi),%edi
|
||||
leal (%esi,%ebp,1),%esi
|
||||
negl %ebp
|
||||
xorl %eax,%eax
|
||||
movl %ebp,-4(%edi)
|
||||
btl $20,(%edx)
|
||||
jc .L009c1stloop
|
||||
.align 16
|
||||
.L010w1stloop:
|
||||
movl %eax,(%edi,%eax,4)
|
||||
addb $1,%al
|
||||
jnc .L010w1stloop
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L011w2ndloop:
|
||||
movl (%edi,%ecx,4),%eax
|
||||
addb (%esi,%ebp,1),%dl
|
||||
addb %al,%dl
|
||||
addl $1,%ebp
|
||||
movl (%edi,%edx,4),%ebx
|
||||
jnz .L012wnowrap
|
||||
movl -4(%edi),%ebp
|
||||
.L012wnowrap:
|
||||
movl %eax,(%edi,%edx,4)
|
||||
movl %ebx,(%edi,%ecx,4)
|
||||
addb $1,%cl
|
||||
jnc .L011w2ndloop
|
||||
jmp .L013exit
|
||||
.align 16
|
||||
.L009c1stloop:
|
||||
movb %al,(%edi,%eax,1)
|
||||
addb $1,%al
|
||||
jnc .L009c1stloop
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
xorl %ebx,%ebx
|
||||
.align 16
|
||||
.L014c2ndloop:
|
||||
movb (%edi,%ecx,1),%al
|
||||
addb (%esi,%ebp,1),%dl
|
||||
addb %al,%dl
|
||||
addl $1,%ebp
|
||||
movb (%edi,%edx,1),%bl
|
||||
jnz .L015cnowrap
|
||||
movl -4(%edi),%ebp
|
||||
.L015cnowrap:
|
||||
movb %al,(%edi,%edx,1)
|
||||
movb %bl,(%edi,%ecx,1)
|
||||
addb $1,%cl
|
||||
jnc .L014c2ndloop
|
||||
movl $-1,256(%edi)
|
||||
.L013exit:
|
||||
xorl %eax,%eax
|
||||
movl %eax,-8(%edi)
|
||||
movl %eax,-4(%edi)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size RC4_set_key,.-.L_RC4_set_key_begin
|
||||
.globl RC4_options
|
||||
.type RC4_options,@function
|
||||
.align 16
|
||||
RC4_options:
|
||||
.L_RC4_options_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
call .L016pic_point
|
||||
.L016pic_point:
|
||||
popl %eax
|
||||
leal .L017opts-.L016pic_point(%eax),%eax
|
||||
leal OPENSSL_ia32cap_P,%edx
|
||||
movl (%edx),%edx
|
||||
btl $20,%edx
|
||||
jc .L0181xchar
|
||||
btl $26,%edx
|
||||
jnc .L019ret
|
||||
addl $25,%eax
|
||||
ret
|
||||
.L0181xchar:
|
||||
addl $12,%eax
|
||||
.L019ret:
|
||||
ret
|
||||
.align 64
|
||||
.L017opts:
|
||||
.byte 114,99,52,40,52,120,44,105,110,116,41,0
|
||||
.byte 114,99,52,40,49,120,44,99,104,97,114,41,0
|
||||
.byte 114,99,52,40,56,120,44,109,109,120,41,0
|
||||
.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
|
||||
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
|
||||
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
.size RC4_options,.-.L_RC4_options_begin
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 2
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
.asciz "GNU"
|
||||
1:
|
||||
.p2align 2
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 2
|
||||
4:
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,755 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from x86-gf2m.pl. */
|
||||
#ifdef PIC
|
||||
.text
|
||||
.type _mul_1x1_mmx,@function
|
||||
.align 16
|
||||
_mul_1x1_mmx:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
subl $36,%esp
|
||||
movl %eax,%ecx
|
||||
leal (%eax,%eax,1),%edx
|
||||
andl $1073741823,%ecx
|
||||
leal (%edx,%edx,1),%ebp
|
||||
movl $0,(%esp)
|
||||
andl $2147483647,%edx
|
||||
movd %eax,%mm2
|
||||
movd %ebx,%mm3
|
||||
movl %ecx,4(%esp)
|
||||
xorl %edx,%ecx
|
||||
pxor %mm5,%mm5
|
||||
pxor %mm4,%mm4
|
||||
movl %edx,8(%esp)
|
||||
xorl %ebp,%edx
|
||||
movl %ecx,12(%esp)
|
||||
pcmpgtd %mm2,%mm5
|
||||
paddd %mm2,%mm2
|
||||
xorl %edx,%ecx
|
||||
movl %ebp,16(%esp)
|
||||
xorl %edx,%ebp
|
||||
pand %mm3,%mm5
|
||||
pcmpgtd %mm2,%mm4
|
||||
movl %ecx,20(%esp)
|
||||
xorl %ecx,%ebp
|
||||
psllq $31,%mm5
|
||||
pand %mm3,%mm4
|
||||
movl %edx,24(%esp)
|
||||
movl $7,%esi
|
||||
movl %ebp,28(%esp)
|
||||
movl %esi,%ebp
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
movl %ebp,%edi
|
||||
psllq $30,%mm4
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
movd (%esp,%esi,4),%mm0
|
||||
movl %ebp,%esi
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
movd (%esp,%edi,4),%mm2
|
||||
movl %ebp,%edi
|
||||
psllq $3,%mm2
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
movl %ebp,%esi
|
||||
psllq $6,%mm1
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
pxor %mm1,%mm0
|
||||
movd (%esp,%edi,4),%mm2
|
||||
movl %ebp,%edi
|
||||
psllq $9,%mm2
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
movl %ebp,%esi
|
||||
psllq $12,%mm1
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
pxor %mm1,%mm0
|
||||
movd (%esp,%edi,4),%mm2
|
||||
movl %ebp,%edi
|
||||
psllq $15,%mm2
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
movl %ebp,%esi
|
||||
psllq $18,%mm1
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
pxor %mm1,%mm0
|
||||
movd (%esp,%edi,4),%mm2
|
||||
movl %ebp,%edi
|
||||
psllq $21,%mm2
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
movl %ebp,%esi
|
||||
psllq $24,%mm1
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
pxor %mm1,%mm0
|
||||
movd (%esp,%edi,4),%mm2
|
||||
pxor %mm4,%mm0
|
||||
psllq $27,%mm2
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
pxor %mm5,%mm0
|
||||
psllq $30,%mm1
|
||||
addl $36,%esp
|
||||
pxor %mm1,%mm0
|
||||
ret
|
||||
.size _mul_1x1_mmx,.-_mul_1x1_mmx
|
||||
.type _mul_1x1_ialu,@function
|
||||
.align 16
|
||||
_mul_1x1_ialu:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
subl $36,%esp
|
||||
movl %eax,%ecx
|
||||
leal (%eax,%eax,1),%edx
|
||||
leal (,%eax,4),%ebp
|
||||
andl $1073741823,%ecx
|
||||
leal (%eax,%eax,1),%edi
|
||||
sarl $31,%eax
|
||||
movl $0,(%esp)
|
||||
andl $2147483647,%edx
|
||||
movl %ecx,4(%esp)
|
||||
xorl %edx,%ecx
|
||||
movl %edx,8(%esp)
|
||||
xorl %ebp,%edx
|
||||
movl %ecx,12(%esp)
|
||||
xorl %edx,%ecx
|
||||
movl %ebp,16(%esp)
|
||||
xorl %edx,%ebp
|
||||
movl %ecx,20(%esp)
|
||||
xorl %ecx,%ebp
|
||||
sarl $31,%edi
|
||||
andl %ebx,%eax
|
||||
movl %edx,24(%esp)
|
||||
andl %ebx,%edi
|
||||
movl %ebp,28(%esp)
|
||||
movl %eax,%edx
|
||||
shll $31,%eax
|
||||
movl %edi,%ecx
|
||||
shrl $1,%edx
|
||||
movl $7,%esi
|
||||
shll $30,%edi
|
||||
andl %ebx,%esi
|
||||
shrl $2,%ecx
|
||||
xorl %edi,%eax
|
||||
shrl $3,%ebx
|
||||
movl $7,%edi
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
xorl (%esp,%esi,4),%eax
|
||||
movl $7,%esi
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl $7,%edi
|
||||
movl %ebp,%ecx
|
||||
shll $3,%ebp
|
||||
andl %ebx,%edi
|
||||
shrl $29,%ecx
|
||||
xorl %ebp,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
movl (%esp,%esi,4),%ecx
|
||||
movl $7,%esi
|
||||
movl %ecx,%ebp
|
||||
shll $6,%ecx
|
||||
andl %ebx,%esi
|
||||
shrl $26,%ebp
|
||||
xorl %ecx,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ebp,%edx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl $7,%edi
|
||||
movl %ebp,%ecx
|
||||
shll $9,%ebp
|
||||
andl %ebx,%edi
|
||||
shrl $23,%ecx
|
||||
xorl %ebp,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
movl (%esp,%esi,4),%ecx
|
||||
movl $7,%esi
|
||||
movl %ecx,%ebp
|
||||
shll $12,%ecx
|
||||
andl %ebx,%esi
|
||||
shrl $20,%ebp
|
||||
xorl %ecx,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ebp,%edx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl $7,%edi
|
||||
movl %ebp,%ecx
|
||||
shll $15,%ebp
|
||||
andl %ebx,%edi
|
||||
shrl $17,%ecx
|
||||
xorl %ebp,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
movl (%esp,%esi,4),%ecx
|
||||
movl $7,%esi
|
||||
movl %ecx,%ebp
|
||||
shll $18,%ecx
|
||||
andl %ebx,%esi
|
||||
shrl $14,%ebp
|
||||
xorl %ecx,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ebp,%edx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl $7,%edi
|
||||
movl %ebp,%ecx
|
||||
shll $21,%ebp
|
||||
andl %ebx,%edi
|
||||
shrl $11,%ecx
|
||||
xorl %ebp,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
movl (%esp,%esi,4),%ecx
|
||||
movl $7,%esi
|
||||
movl %ecx,%ebp
|
||||
shll $24,%ecx
|
||||
andl %ebx,%esi
|
||||
shrl $8,%ebp
|
||||
xorl %ecx,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ebp,%edx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl %ebp,%ecx
|
||||
shll $27,%ebp
|
||||
movl (%esp,%esi,4),%edi
|
||||
shrl $5,%ecx
|
||||
movl %edi,%esi
|
||||
xorl %ebp,%eax
|
||||
shll $30,%edi
|
||||
xorl %ecx,%edx
|
||||
shrl $2,%esi
|
||||
xorl %edi,%eax
|
||||
xorl %esi,%edx
|
||||
addl $36,%esp
|
||||
ret
|
||||
.size _mul_1x1_ialu,.-_mul_1x1_ialu
|
||||
.globl bn_GF2m_mul_2x2
|
||||
.type bn_GF2m_mul_2x2,@function
|
||||
.align 16
|
||||
bn_GF2m_mul_2x2:
|
||||
.L_bn_GF2m_mul_2x2_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
call .L000PIC_me_up
|
||||
.L000PIC_me_up:
|
||||
popl %edx
|
||||
leal OPENSSL_ia32cap_P-.L000PIC_me_up(%edx),%edx
|
||||
movl (%edx),%eax
|
||||
movl 4(%edx),%edx
|
||||
testl $8388608,%eax
|
||||
jz .L001ialu
|
||||
testl $16777216,%eax
|
||||
jz .L002mmx
|
||||
testl $2,%edx
|
||||
jz .L002mmx
|
||||
movups 8(%esp),%xmm0
|
||||
shufps $177,%xmm0,%xmm0
|
||||
.byte 102,15,58,68,192,1
|
||||
movl 4(%esp),%eax
|
||||
movups %xmm0,(%eax)
|
||||
ret
|
||||
.align 16
|
||||
.L002mmx:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 24(%esp),%eax
|
||||
movl 32(%esp),%ebx
|
||||
call _mul_1x1_mmx
|
||||
movq %mm0,%mm7
|
||||
movl 28(%esp),%eax
|
||||
movl 36(%esp),%ebx
|
||||
call _mul_1x1_mmx
|
||||
movq %mm0,%mm6
|
||||
movl 24(%esp),%eax
|
||||
movl 32(%esp),%ebx
|
||||
xorl 28(%esp),%eax
|
||||
xorl 36(%esp),%ebx
|
||||
call _mul_1x1_mmx
|
||||
pxor %mm7,%mm0
|
||||
movl 20(%esp),%eax
|
||||
pxor %mm6,%mm0
|
||||
movq %mm0,%mm2
|
||||
psllq $32,%mm0
|
||||
popl %edi
|
||||
psrlq $32,%mm2
|
||||
popl %esi
|
||||
pxor %mm6,%mm0
|
||||
popl %ebx
|
||||
pxor %mm7,%mm2
|
||||
movq %mm0,(%eax)
|
||||
popl %ebp
|
||||
movq %mm2,8(%eax)
|
||||
emms
|
||||
ret
|
||||
.align 16
|
||||
.L001ialu:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
subl $20,%esp
|
||||
movl 44(%esp),%eax
|
||||
movl 52(%esp),%ebx
|
||||
call _mul_1x1_ialu
|
||||
movl %eax,8(%esp)
|
||||
movl %edx,12(%esp)
|
||||
movl 48(%esp),%eax
|
||||
movl 56(%esp),%ebx
|
||||
call _mul_1x1_ialu
|
||||
movl %eax,(%esp)
|
||||
movl %edx,4(%esp)
|
||||
movl 44(%esp),%eax
|
||||
movl 52(%esp),%ebx
|
||||
xorl 48(%esp),%eax
|
||||
xorl 56(%esp),%ebx
|
||||
call _mul_1x1_ialu
|
||||
movl 40(%esp),%ebp
|
||||
movl (%esp),%ebx
|
||||
movl 4(%esp),%ecx
|
||||
movl 8(%esp),%edi
|
||||
movl 12(%esp),%esi
|
||||
xorl %edx,%eax
|
||||
xorl %ecx,%edx
|
||||
xorl %ebx,%eax
|
||||
movl %ebx,(%ebp)
|
||||
xorl %edi,%edx
|
||||
movl %esi,12(%ebp)
|
||||
xorl %esi,%eax
|
||||
addl $20,%esp
|
||||
xorl %esi,%edx
|
||||
popl %edi
|
||||
xorl %edx,%eax
|
||||
popl %esi
|
||||
movl %edx,8(%ebp)
|
||||
popl %ebx
|
||||
movl %eax,4(%ebp)
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin
|
||||
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
|
||||
.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
|
||||
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
|
||||
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
||||
.byte 62,0
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 2
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
.asciz "GNU"
|
||||
1:
|
||||
.p2align 2
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 2
|
||||
4:
|
||||
#else
|
||||
.text
|
||||
.type _mul_1x1_mmx,@function
|
||||
.align 16
|
||||
_mul_1x1_mmx:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
subl $36,%esp
|
||||
movl %eax,%ecx
|
||||
leal (%eax,%eax,1),%edx
|
||||
andl $1073741823,%ecx
|
||||
leal (%edx,%edx,1),%ebp
|
||||
movl $0,(%esp)
|
||||
andl $2147483647,%edx
|
||||
movd %eax,%mm2
|
||||
movd %ebx,%mm3
|
||||
movl %ecx,4(%esp)
|
||||
xorl %edx,%ecx
|
||||
pxor %mm5,%mm5
|
||||
pxor %mm4,%mm4
|
||||
movl %edx,8(%esp)
|
||||
xorl %ebp,%edx
|
||||
movl %ecx,12(%esp)
|
||||
pcmpgtd %mm2,%mm5
|
||||
paddd %mm2,%mm2
|
||||
xorl %edx,%ecx
|
||||
movl %ebp,16(%esp)
|
||||
xorl %edx,%ebp
|
||||
pand %mm3,%mm5
|
||||
pcmpgtd %mm2,%mm4
|
||||
movl %ecx,20(%esp)
|
||||
xorl %ecx,%ebp
|
||||
psllq $31,%mm5
|
||||
pand %mm3,%mm4
|
||||
movl %edx,24(%esp)
|
||||
movl $7,%esi
|
||||
movl %ebp,28(%esp)
|
||||
movl %esi,%ebp
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
movl %ebp,%edi
|
||||
psllq $30,%mm4
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
movd (%esp,%esi,4),%mm0
|
||||
movl %ebp,%esi
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
movd (%esp,%edi,4),%mm2
|
||||
movl %ebp,%edi
|
||||
psllq $3,%mm2
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
movl %ebp,%esi
|
||||
psllq $6,%mm1
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
pxor %mm1,%mm0
|
||||
movd (%esp,%edi,4),%mm2
|
||||
movl %ebp,%edi
|
||||
psllq $9,%mm2
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
movl %ebp,%esi
|
||||
psllq $12,%mm1
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
pxor %mm1,%mm0
|
||||
movd (%esp,%edi,4),%mm2
|
||||
movl %ebp,%edi
|
||||
psllq $15,%mm2
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
movl %ebp,%esi
|
||||
psllq $18,%mm1
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
pxor %mm1,%mm0
|
||||
movd (%esp,%edi,4),%mm2
|
||||
movl %ebp,%edi
|
||||
psllq $21,%mm2
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
movl %ebp,%esi
|
||||
psllq $24,%mm1
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
pxor %mm1,%mm0
|
||||
movd (%esp,%edi,4),%mm2
|
||||
pxor %mm4,%mm0
|
||||
psllq $27,%mm2
|
||||
pxor %mm2,%mm0
|
||||
movd (%esp,%esi,4),%mm1
|
||||
pxor %mm5,%mm0
|
||||
psllq $30,%mm1
|
||||
addl $36,%esp
|
||||
pxor %mm1,%mm0
|
||||
ret
|
||||
.size _mul_1x1_mmx,.-_mul_1x1_mmx
|
||||
.type _mul_1x1_ialu,@function
|
||||
.align 16
|
||||
_mul_1x1_ialu:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
subl $36,%esp
|
||||
movl %eax,%ecx
|
||||
leal (%eax,%eax,1),%edx
|
||||
leal (,%eax,4),%ebp
|
||||
andl $1073741823,%ecx
|
||||
leal (%eax,%eax,1),%edi
|
||||
sarl $31,%eax
|
||||
movl $0,(%esp)
|
||||
andl $2147483647,%edx
|
||||
movl %ecx,4(%esp)
|
||||
xorl %edx,%ecx
|
||||
movl %edx,8(%esp)
|
||||
xorl %ebp,%edx
|
||||
movl %ecx,12(%esp)
|
||||
xorl %edx,%ecx
|
||||
movl %ebp,16(%esp)
|
||||
xorl %edx,%ebp
|
||||
movl %ecx,20(%esp)
|
||||
xorl %ecx,%ebp
|
||||
sarl $31,%edi
|
||||
andl %ebx,%eax
|
||||
movl %edx,24(%esp)
|
||||
andl %ebx,%edi
|
||||
movl %ebp,28(%esp)
|
||||
movl %eax,%edx
|
||||
shll $31,%eax
|
||||
movl %edi,%ecx
|
||||
shrl $1,%edx
|
||||
movl $7,%esi
|
||||
shll $30,%edi
|
||||
andl %ebx,%esi
|
||||
shrl $2,%ecx
|
||||
xorl %edi,%eax
|
||||
shrl $3,%ebx
|
||||
movl $7,%edi
|
||||
andl %ebx,%edi
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
xorl (%esp,%esi,4),%eax
|
||||
movl $7,%esi
|
||||
andl %ebx,%esi
|
||||
shrl $3,%ebx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl $7,%edi
|
||||
movl %ebp,%ecx
|
||||
shll $3,%ebp
|
||||
andl %ebx,%edi
|
||||
shrl $29,%ecx
|
||||
xorl %ebp,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
movl (%esp,%esi,4),%ecx
|
||||
movl $7,%esi
|
||||
movl %ecx,%ebp
|
||||
shll $6,%ecx
|
||||
andl %ebx,%esi
|
||||
shrl $26,%ebp
|
||||
xorl %ecx,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ebp,%edx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl $7,%edi
|
||||
movl %ebp,%ecx
|
||||
shll $9,%ebp
|
||||
andl %ebx,%edi
|
||||
shrl $23,%ecx
|
||||
xorl %ebp,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
movl (%esp,%esi,4),%ecx
|
||||
movl $7,%esi
|
||||
movl %ecx,%ebp
|
||||
shll $12,%ecx
|
||||
andl %ebx,%esi
|
||||
shrl $20,%ebp
|
||||
xorl %ecx,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ebp,%edx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl $7,%edi
|
||||
movl %ebp,%ecx
|
||||
shll $15,%ebp
|
||||
andl %ebx,%edi
|
||||
shrl $17,%ecx
|
||||
xorl %ebp,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
movl (%esp,%esi,4),%ecx
|
||||
movl $7,%esi
|
||||
movl %ecx,%ebp
|
||||
shll $18,%ecx
|
||||
andl %ebx,%esi
|
||||
shrl $14,%ebp
|
||||
xorl %ecx,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ebp,%edx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl $7,%edi
|
||||
movl %ebp,%ecx
|
||||
shll $21,%ebp
|
||||
andl %ebx,%edi
|
||||
shrl $11,%ecx
|
||||
xorl %ebp,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ecx,%edx
|
||||
movl (%esp,%esi,4),%ecx
|
||||
movl $7,%esi
|
||||
movl %ecx,%ebp
|
||||
shll $24,%ecx
|
||||
andl %ebx,%esi
|
||||
shrl $8,%ebp
|
||||
xorl %ecx,%eax
|
||||
shrl $3,%ebx
|
||||
xorl %ebp,%edx
|
||||
movl (%esp,%edi,4),%ebp
|
||||
movl %ebp,%ecx
|
||||
shll $27,%ebp
|
||||
movl (%esp,%esi,4),%edi
|
||||
shrl $5,%ecx
|
||||
movl %edi,%esi
|
||||
xorl %ebp,%eax
|
||||
shll $30,%edi
|
||||
xorl %ecx,%edx
|
||||
shrl $2,%esi
|
||||
xorl %edi,%eax
|
||||
xorl %esi,%edx
|
||||
addl $36,%esp
|
||||
ret
|
||||
.size _mul_1x1_ialu,.-_mul_1x1_ialu
|
||||
.globl bn_GF2m_mul_2x2
|
||||
.type bn_GF2m_mul_2x2,@function
|
||||
.align 16
|
||||
bn_GF2m_mul_2x2:
|
||||
.L_bn_GF2m_mul_2x2_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
leal OPENSSL_ia32cap_P,%edx
|
||||
movl (%edx),%eax
|
||||
movl 4(%edx),%edx
|
||||
testl $8388608,%eax
|
||||
jz .L000ialu
|
||||
testl $16777216,%eax
|
||||
jz .L001mmx
|
||||
testl $2,%edx
|
||||
jz .L001mmx
|
||||
movups 8(%esp),%xmm0
|
||||
shufps $177,%xmm0,%xmm0
|
||||
.byte 102,15,58,68,192,1
|
||||
movl 4(%esp),%eax
|
||||
movups %xmm0,(%eax)
|
||||
ret
|
||||
.align 16
|
||||
.L001mmx:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 24(%esp),%eax
|
||||
movl 32(%esp),%ebx
|
||||
call _mul_1x1_mmx
|
||||
movq %mm0,%mm7
|
||||
movl 28(%esp),%eax
|
||||
movl 36(%esp),%ebx
|
||||
call _mul_1x1_mmx
|
||||
movq %mm0,%mm6
|
||||
movl 24(%esp),%eax
|
||||
movl 32(%esp),%ebx
|
||||
xorl 28(%esp),%eax
|
||||
xorl 36(%esp),%ebx
|
||||
call _mul_1x1_mmx
|
||||
pxor %mm7,%mm0
|
||||
movl 20(%esp),%eax
|
||||
pxor %mm6,%mm0
|
||||
movq %mm0,%mm2
|
||||
psllq $32,%mm0
|
||||
popl %edi
|
||||
psrlq $32,%mm2
|
||||
popl %esi
|
||||
pxor %mm6,%mm0
|
||||
popl %ebx
|
||||
pxor %mm7,%mm2
|
||||
movq %mm0,(%eax)
|
||||
popl %ebp
|
||||
movq %mm2,8(%eax)
|
||||
emms
|
||||
ret
|
||||
.align 16
|
||||
.L000ialu:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
subl $20,%esp
|
||||
movl 44(%esp),%eax
|
||||
movl 52(%esp),%ebx
|
||||
call _mul_1x1_ialu
|
||||
movl %eax,8(%esp)
|
||||
movl %edx,12(%esp)
|
||||
movl 48(%esp),%eax
|
||||
movl 56(%esp),%ebx
|
||||
call _mul_1x1_ialu
|
||||
movl %eax,(%esp)
|
||||
movl %edx,4(%esp)
|
||||
movl 44(%esp),%eax
|
||||
movl 52(%esp),%ebx
|
||||
xorl 48(%esp),%eax
|
||||
xorl 56(%esp),%ebx
|
||||
call _mul_1x1_ialu
|
||||
movl 40(%esp),%ebp
|
||||
movl (%esp),%ebx
|
||||
movl 4(%esp),%ecx
|
||||
movl 8(%esp),%edi
|
||||
movl 12(%esp),%esi
|
||||
xorl %edx,%eax
|
||||
xorl %ecx,%edx
|
||||
xorl %ebx,%eax
|
||||
movl %ebx,(%ebp)
|
||||
xorl %edi,%edx
|
||||
movl %esi,12(%ebp)
|
||||
xorl %esi,%eax
|
||||
addl $20,%esp
|
||||
xorl %esi,%edx
|
||||
popl %edi
|
||||
xorl %edx,%eax
|
||||
popl %esi
|
||||
movl %edx,8(%ebp)
|
||||
popl %ebx
|
||||
movl %eax,4(%ebp)
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin
|
||||
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
|
||||
.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
|
||||
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
|
||||
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
||||
.byte 62,0
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 2
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
.asciz "GNU"
|
||||
1:
|
||||
.p2align 2
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 2
|
||||
4:
|
||||
#endif
|
||||
@@ -1,995 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from x86-mont.pl. */
|
||||
#ifdef PIC
|
||||
.text
|
||||
.globl bn_mul_mont
|
||||
.type bn_mul_mont,@function
|
||||
.align 16
|
||||
bn_mul_mont:
|
||||
.L_bn_mul_mont_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
xorl %eax,%eax
|
||||
movl 40(%esp),%edi
|
||||
cmpl $4,%edi
|
||||
jl .L000just_leave
|
||||
leal 20(%esp),%esi
|
||||
leal 24(%esp),%edx
|
||||
addl $2,%edi
|
||||
negl %edi
|
||||
leal -32(%esp,%edi,4),%ebp
|
||||
negl %edi
|
||||
movl %ebp,%eax
|
||||
subl %edx,%eax
|
||||
andl $2047,%eax
|
||||
subl %eax,%ebp
|
||||
xorl %ebp,%edx
|
||||
andl $2048,%edx
|
||||
xorl $2048,%edx
|
||||
subl %edx,%ebp
|
||||
andl $-64,%ebp
|
||||
movl %esp,%eax
|
||||
subl %ebp,%eax
|
||||
andl $-4096,%eax
|
||||
movl %esp,%edx
|
||||
leal (%ebp,%eax,1),%esp
|
||||
movl (%esp),%eax
|
||||
cmpl %ebp,%esp
|
||||
ja .L001page_walk
|
||||
jmp .L002page_walk_done
|
||||
.align 16
|
||||
.L001page_walk:
|
||||
leal -4096(%esp),%esp
|
||||
movl (%esp),%eax
|
||||
cmpl %ebp,%esp
|
||||
ja .L001page_walk
|
||||
.L002page_walk_done:
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
movl 12(%esi),%ebp
|
||||
movl 16(%esi),%esi
|
||||
movl (%esi),%esi
|
||||
movl %eax,4(%esp)
|
||||
movl %ebx,8(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %ebp,16(%esp)
|
||||
movl %esi,20(%esp)
|
||||
leal -3(%edi),%ebx
|
||||
movl %edx,24(%esp)
|
||||
call .L003PIC_me_up
|
||||
.L003PIC_me_up:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L004non_sse2
|
||||
movl $-1,%eax
|
||||
movd %eax,%mm7
|
||||
movl 8(%esp),%esi
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebp
|
||||
xorl %edx,%edx
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
movq %mm5,%mm2
|
||||
movq %mm5,%mm0
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L0051st:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
leal 1(%ecx),%ecx
|
||||
cmpl %ebx,%ecx
|
||||
jl .L0051st
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm2,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
incl %edx
|
||||
.L006outer:
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi,%edx,4),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd 32(%esp),%mm6
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
paddq %mm6,%mm5
|
||||
movq %mm5,%mm0
|
||||
movq %mm5,%mm2
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 36(%esp),%mm6
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
incl %ecx
|
||||
decl %ebx
|
||||
.L007inner:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
movd 36(%esp,%ecx,4),%mm6
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
decl %ebx
|
||||
leal 1(%ecx),%ecx
|
||||
jnz .L007inner
|
||||
movl %ecx,%ebx
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
movd 36(%esp,%ebx,4),%mm6
|
||||
paddq %mm2,%mm3
|
||||
paddq %mm6,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
leal 1(%edx),%edx
|
||||
cmpl %ebx,%edx
|
||||
jle .L006outer
|
||||
emms
|
||||
jmp .L008common_tail
|
||||
.align 16
|
||||
.L004non_sse2:
|
||||
movl 8(%esp),%esi
|
||||
leal 1(%ebx),%ebp
|
||||
movl 12(%esp),%edi
|
||||
xorl %ecx,%ecx
|
||||
movl %esi,%edx
|
||||
andl $1,%ebp
|
||||
subl %edi,%edx
|
||||
leal 4(%edi,%ebx,4),%eax
|
||||
orl %edx,%ebp
|
||||
movl (%edi),%edi
|
||||
jz .L009bn_sqr_mont
|
||||
movl %eax,28(%esp)
|
||||
movl (%esi),%eax
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L010mull:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %eax,%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
movl (%esi,%ecx,4),%eax
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L010mull
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
movl 20(%esp),%edi
|
||||
addl %ebp,%eax
|
||||
movl 16(%esp),%esi
|
||||
adcl $0,%edx
|
||||
imull 32(%esp),%edi
|
||||
movl %eax,32(%esp,%ebx,4)
|
||||
xorl %ecx,%ecx
|
||||
movl %edx,36(%esp,%ebx,4)
|
||||
movl %ecx,40(%esp,%ebx,4)
|
||||
movl (%esi),%eax
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
incl %ecx
|
||||
jmp .L0112ndmadd
|
||||
.align 16
|
||||
.L0121stmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L0121stmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%eax
|
||||
movl 20(%esp),%edi
|
||||
adcl $0,%edx
|
||||
movl 16(%esp),%esi
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
imull 32(%esp),%edi
|
||||
xorl %ecx,%ecx
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
movl %ebp,32(%esp,%ebx,4)
|
||||
adcl $0,%ecx
|
||||
movl (%esi),%eax
|
||||
movl %edx,36(%esp,%ebx,4)
|
||||
movl %ecx,40(%esp,%ebx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0112ndmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0112ndmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ebx,4)
|
||||
xorl %eax,%eax
|
||||
movl 12(%esp),%ecx
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
adcl 40(%esp,%ebx,4),%eax
|
||||
leal 4(%ecx),%ecx
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl 28(%esp),%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je .L008common_tail
|
||||
movl (%ecx),%edi
|
||||
movl 8(%esp),%esi
|
||||
movl %ecx,12(%esp)
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
movl (%esi),%eax
|
||||
jmp .L0121stmadd
|
||||
.align 16
|
||||
.L009bn_sqr_mont:
|
||||
movl %ebx,(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %edi,%eax
|
||||
mull %edi
|
||||
movl %eax,32(%esp)
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L013sqr:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
leal (%ebx,%eax,2),%ebp
|
||||
shrl $31,%eax
|
||||
cmpl (%esp),%ecx
|
||||
movl %eax,%ebx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L013sqr
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
movl 20(%esp),%edi
|
||||
adcl $0,%edx
|
||||
movl 16(%esp),%esi
|
||||
leal (%ebx,%eax,2),%ebp
|
||||
imull 32(%esp),%edi
|
||||
shrl $31,%eax
|
||||
movl %ebp,32(%esp,%ecx,4)
|
||||
leal (%eax,%edx,2),%ebp
|
||||
movl (%esi),%eax
|
||||
shrl $31,%edx
|
||||
movl %ebp,36(%esp,%ecx,4)
|
||||
movl %edx,40(%esp,%ecx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl %ecx,%ebx
|
||||
adcl $0,%edx
|
||||
movl 4(%esi),%eax
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0143rdmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl 4(%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 36(%esp,%ecx,4),%ebp
|
||||
leal 2(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0143rdmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ebx,4)
|
||||
movl 12(%esp),%ecx
|
||||
xorl %eax,%eax
|
||||
movl 8(%esp),%esi
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
adcl 40(%esp,%ebx,4),%eax
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl %ebx,%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je .L008common_tail
|
||||
movl 4(%esi,%ecx,4),%edi
|
||||
leal 1(%ecx),%ecx
|
||||
movl %edi,%eax
|
||||
movl %ecx,12(%esp)
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,32(%esp,%ecx,4)
|
||||
xorl %ebp,%ebp
|
||||
cmpl %ebx,%ecx
|
||||
leal 1(%ecx),%ecx
|
||||
je .L015sqrlast
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
.align 16
|
||||
.L016sqradd:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
leal (%eax,%eax,1),%ebp
|
||||
adcl $0,%edx
|
||||
shrl $31,%eax
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%eax
|
||||
addl %ebx,%ebp
|
||||
adcl $0,%eax
|
||||
cmpl (%esp),%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %eax,%ebx
|
||||
jle .L016sqradd
|
||||
movl %edx,%ebp
|
||||
addl %edx,%edx
|
||||
shrl $31,%ebp
|
||||
addl %ebx,%edx
|
||||
adcl $0,%ebp
|
||||
.L015sqrlast:
|
||||
movl 20(%esp),%edi
|
||||
movl 16(%esp),%esi
|
||||
imull 32(%esp),%edi
|
||||
addl 32(%esp,%ecx,4),%edx
|
||||
movl (%esi),%eax
|
||||
adcl $0,%ebp
|
||||
movl %edx,32(%esp,%ecx,4)
|
||||
movl %ebp,36(%esp,%ecx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
leal -1(%ecx),%ebx
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
movl 4(%esi),%eax
|
||||
jmp .L0143rdmadd
|
||||
.align 16
|
||||
.L008common_tail:
|
||||
movl 16(%esp),%ebp
|
||||
movl 4(%esp),%edi
|
||||
leal 32(%esp),%esi
|
||||
movl (%esi),%eax
|
||||
movl %ebx,%ecx
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L017sub:
|
||||
sbbl (%ebp,%edx,4),%eax
|
||||
movl %eax,(%edi,%edx,4)
|
||||
decl %ecx
|
||||
movl 4(%esi,%edx,4),%eax
|
||||
leal 1(%edx),%edx
|
||||
jge .L017sub
|
||||
sbbl $0,%eax
|
||||
movl $-1,%edx
|
||||
xorl %eax,%edx
|
||||
jmp .L018copy
|
||||
.align 16
|
||||
.L018copy:
|
||||
movl 32(%esp,%ebx,4),%esi
|
||||
movl (%edi,%ebx,4),%ebp
|
||||
movl %ecx,32(%esp,%ebx,4)
|
||||
andl %eax,%esi
|
||||
andl %edx,%ebp
|
||||
orl %esi,%ebp
|
||||
movl %ebp,(%edi,%ebx,4)
|
||||
decl %ebx
|
||||
jge .L018copy
|
||||
movl 24(%esp),%esp
|
||||
movl $1,%eax
|
||||
.L000just_leave:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_mul_mont,.-.L_bn_mul_mont_begin
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
|
||||
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
|
||||
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
||||
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
.byte 111,114,103,62,0
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 2
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
.asciz "GNU"
|
||||
1:
|
||||
.p2align 2
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 2
|
||||
4:
|
||||
#else
|
||||
.text
|
||||
.globl bn_mul_mont
|
||||
.type bn_mul_mont,@function
|
||||
.align 16
|
||||
bn_mul_mont:
|
||||
.L_bn_mul_mont_begin:
|
||||
#ifdef __CET__
|
||||
|
||||
.byte 243,15,30,251
|
||||
#endif
|
||||
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
xorl %eax,%eax
|
||||
movl 40(%esp),%edi
|
||||
cmpl $4,%edi
|
||||
jl .L000just_leave
|
||||
leal 20(%esp),%esi
|
||||
leal 24(%esp),%edx
|
||||
addl $2,%edi
|
||||
negl %edi
|
||||
leal -32(%esp,%edi,4),%ebp
|
||||
negl %edi
|
||||
movl %ebp,%eax
|
||||
subl %edx,%eax
|
||||
andl $2047,%eax
|
||||
subl %eax,%ebp
|
||||
xorl %ebp,%edx
|
||||
andl $2048,%edx
|
||||
xorl $2048,%edx
|
||||
subl %edx,%ebp
|
||||
andl $-64,%ebp
|
||||
movl %esp,%eax
|
||||
subl %ebp,%eax
|
||||
andl $-4096,%eax
|
||||
movl %esp,%edx
|
||||
leal (%ebp,%eax,1),%esp
|
||||
movl (%esp),%eax
|
||||
cmpl %ebp,%esp
|
||||
ja .L001page_walk
|
||||
jmp .L002page_walk_done
|
||||
.align 16
|
||||
.L001page_walk:
|
||||
leal -4096(%esp),%esp
|
||||
movl (%esp),%eax
|
||||
cmpl %ebp,%esp
|
||||
ja .L001page_walk
|
||||
.L002page_walk_done:
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
movl 12(%esi),%ebp
|
||||
movl 16(%esi),%esi
|
||||
movl (%esi),%esi
|
||||
movl %eax,4(%esp)
|
||||
movl %ebx,8(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %ebp,16(%esp)
|
||||
movl %esi,20(%esp)
|
||||
leal -3(%edi),%ebx
|
||||
movl %edx,24(%esp)
|
||||
leal OPENSSL_ia32cap_P,%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L003non_sse2
|
||||
movl $-1,%eax
|
||||
movd %eax,%mm7
|
||||
movl 8(%esp),%esi
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%ebp
|
||||
xorl %edx,%edx
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
movq %mm5,%mm2
|
||||
movq %mm5,%mm0
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L0041st:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
leal 1(%ecx),%ecx
|
||||
cmpl %ebx,%ecx
|
||||
jl .L0041st
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm2,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
incl %edx
|
||||
.L005outer:
|
||||
xorl %ecx,%ecx
|
||||
movd (%edi,%edx,4),%mm4
|
||||
movd (%esi),%mm5
|
||||
movd 32(%esp),%mm6
|
||||
movd (%ebp),%mm3
|
||||
pmuludq %mm4,%mm5
|
||||
paddq %mm6,%mm5
|
||||
movq %mm5,%mm0
|
||||
movq %mm5,%mm2
|
||||
pand %mm7,%mm0
|
||||
pmuludq 20(%esp),%mm5
|
||||
pmuludq %mm5,%mm3
|
||||
paddq %mm0,%mm3
|
||||
movd 36(%esp),%mm6
|
||||
movd 4(%ebp),%mm1
|
||||
movd 4(%esi),%mm0
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
incl %ecx
|
||||
decl %ebx
|
||||
.L006inner:
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
movd 36(%esp,%ecx,4),%mm6
|
||||
pand %mm7,%mm0
|
||||
movd 4(%ebp,%ecx,4),%mm1
|
||||
paddq %mm0,%mm3
|
||||
movd 4(%esi,%ecx,4),%mm0
|
||||
psrlq $32,%mm2
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm3
|
||||
paddq %mm6,%mm2
|
||||
decl %ebx
|
||||
leal 1(%ecx),%ecx
|
||||
jnz .L006inner
|
||||
movl %ecx,%ebx
|
||||
pmuludq %mm4,%mm0
|
||||
pmuludq %mm5,%mm1
|
||||
paddq %mm0,%mm2
|
||||
paddq %mm1,%mm3
|
||||
movq %mm2,%mm0
|
||||
pand %mm7,%mm0
|
||||
paddq %mm0,%mm3
|
||||
movd %mm3,28(%esp,%ecx,4)
|
||||
psrlq $32,%mm2
|
||||
psrlq $32,%mm3
|
||||
movd 36(%esp,%ebx,4),%mm6
|
||||
paddq %mm2,%mm3
|
||||
paddq %mm6,%mm3
|
||||
movq %mm3,32(%esp,%ebx,4)
|
||||
leal 1(%edx),%edx
|
||||
cmpl %ebx,%edx
|
||||
jle .L005outer
|
||||
emms
|
||||
jmp .L007common_tail
|
||||
.align 16
|
||||
.L003non_sse2:
|
||||
movl 8(%esp),%esi
|
||||
leal 1(%ebx),%ebp
|
||||
movl 12(%esp),%edi
|
||||
xorl %ecx,%ecx
|
||||
movl %esi,%edx
|
||||
andl $1,%ebp
|
||||
subl %edi,%edx
|
||||
leal 4(%edi,%ebx,4),%eax
|
||||
orl %edx,%ebp
|
||||
movl (%edi),%edi
|
||||
jz .L008bn_sqr_mont
|
||||
movl %eax,28(%esp)
|
||||
movl (%esi),%eax
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L009mull:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %eax,%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
movl (%esi,%ecx,4),%eax
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L009mull
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
movl 20(%esp),%edi
|
||||
addl %ebp,%eax
|
||||
movl 16(%esp),%esi
|
||||
adcl $0,%edx
|
||||
imull 32(%esp),%edi
|
||||
movl %eax,32(%esp,%ebx,4)
|
||||
xorl %ecx,%ecx
|
||||
movl %edx,36(%esp,%ebx,4)
|
||||
movl %ecx,40(%esp,%ebx,4)
|
||||
movl (%esi),%eax
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
incl %ecx
|
||||
jmp .L0102ndmadd
|
||||
.align 16
|
||||
.L0111stmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L0111stmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%eax
|
||||
movl 20(%esp),%edi
|
||||
adcl $0,%edx
|
||||
movl 16(%esp),%esi
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
imull 32(%esp),%edi
|
||||
xorl %ecx,%ecx
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
movl %ebp,32(%esp,%ebx,4)
|
||||
adcl $0,%ecx
|
||||
movl (%esi),%eax
|
||||
movl %edx,36(%esp,%ebx,4)
|
||||
movl %ecx,40(%esp,%ebx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl 4(%esi),%eax
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0102ndmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0102ndmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ebx,4)
|
||||
xorl %eax,%eax
|
||||
movl 12(%esp),%ecx
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
adcl 40(%esp,%ebx,4),%eax
|
||||
leal 4(%ecx),%ecx
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl 28(%esp),%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je .L007common_tail
|
||||
movl (%ecx),%edi
|
||||
movl 8(%esp),%esi
|
||||
movl %ecx,12(%esp)
|
||||
xorl %ecx,%ecx
|
||||
xorl %edx,%edx
|
||||
movl (%esi),%eax
|
||||
jmp .L0111stmadd
|
||||
.align 16
|
||||
.L008bn_sqr_mont:
|
||||
movl %ebx,(%esp)
|
||||
movl %ecx,12(%esp)
|
||||
movl %edi,%eax
|
||||
mull %edi
|
||||
movl %eax,32(%esp)
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
incl %ecx
|
||||
.align 16
|
||||
.L012sqr:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
leal (%ebx,%eax,2),%ebp
|
||||
shrl $31,%eax
|
||||
cmpl (%esp),%ecx
|
||||
movl %eax,%ebx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
jl .L012sqr
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
movl 20(%esp),%edi
|
||||
adcl $0,%edx
|
||||
movl 16(%esp),%esi
|
||||
leal (%ebx,%eax,2),%ebp
|
||||
imull 32(%esp),%edi
|
||||
shrl $31,%eax
|
||||
movl %ebp,32(%esp,%ecx,4)
|
||||
leal (%eax,%edx,2),%ebp
|
||||
movl (%esi),%eax
|
||||
shrl $31,%edx
|
||||
movl %ebp,36(%esp,%ecx,4)
|
||||
movl %edx,40(%esp,%ecx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
movl %ecx,%ebx
|
||||
adcl $0,%edx
|
||||
movl 4(%esi),%eax
|
||||
movl $1,%ecx
|
||||
.align 16
|
||||
.L0133rdmadd:
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl 4(%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 36(%esp,%ecx,4),%ebp
|
||||
leal 2(%ecx),%ecx
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
movl (%esi,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
cmpl %ebx,%ecx
|
||||
movl %ebp,24(%esp,%ecx,4)
|
||||
jl .L0133rdmadd
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl 32(%esp,%ebx,4),%ebp
|
||||
adcl $0,%edx
|
||||
addl %eax,%ebp
|
||||
adcl $0,%edx
|
||||
movl %ebp,28(%esp,%ebx,4)
|
||||
movl 12(%esp),%ecx
|
||||
xorl %eax,%eax
|
||||
movl 8(%esp),%esi
|
||||
addl 36(%esp,%ebx,4),%edx
|
||||
adcl 40(%esp,%ebx,4),%eax
|
||||
movl %edx,32(%esp,%ebx,4)
|
||||
cmpl %ebx,%ecx
|
||||
movl %eax,36(%esp,%ebx,4)
|
||||
je .L007common_tail
|
||||
movl 4(%esi,%ecx,4),%edi
|
||||
leal 1(%ecx),%ecx
|
||||
movl %edi,%eax
|
||||
movl %ecx,12(%esp)
|
||||
mull %edi
|
||||
addl 32(%esp,%ecx,4),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,32(%esp,%ecx,4)
|
||||
xorl %ebp,%ebp
|
||||
cmpl %ebx,%ecx
|
||||
leal 1(%ecx),%ecx
|
||||
je .L014sqrlast
|
||||
movl %edx,%ebx
|
||||
shrl $1,%edx
|
||||
andl $1,%ebx
|
||||
.align 16
|
||||
.L015sqradd:
|
||||
movl (%esi,%ecx,4),%eax
|
||||
movl %edx,%ebp
|
||||
mull %edi
|
||||
addl %ebp,%eax
|
||||
leal (%eax,%eax,1),%ebp
|
||||
adcl $0,%edx
|
||||
shrl $31,%eax
|
||||
addl 32(%esp,%ecx,4),%ebp
|
||||
leal 1(%ecx),%ecx
|
||||
adcl $0,%eax
|
||||
addl %ebx,%ebp
|
||||
adcl $0,%eax
|
||||
cmpl (%esp),%ecx
|
||||
movl %ebp,28(%esp,%ecx,4)
|
||||
movl %eax,%ebx
|
||||
jle .L015sqradd
|
||||
movl %edx,%ebp
|
||||
addl %edx,%edx
|
||||
shrl $31,%ebp
|
||||
addl %ebx,%edx
|
||||
adcl $0,%ebp
|
||||
.L014sqrlast:
|
||||
movl 20(%esp),%edi
|
||||
movl 16(%esp),%esi
|
||||
imull 32(%esp),%edi
|
||||
addl 32(%esp,%ecx,4),%edx
|
||||
movl (%esi),%eax
|
||||
adcl $0,%ebp
|
||||
movl %edx,32(%esp,%ecx,4)
|
||||
movl %ebp,36(%esp,%ecx,4)
|
||||
mull %edi
|
||||
addl 32(%esp),%eax
|
||||
leal -1(%ecx),%ebx
|
||||
adcl $0,%edx
|
||||
movl $1,%ecx
|
||||
movl 4(%esi),%eax
|
||||
jmp .L0133rdmadd
|
||||
.align 16
|
||||
.L007common_tail:
|
||||
movl 16(%esp),%ebp
|
||||
movl 4(%esp),%edi
|
||||
leal 32(%esp),%esi
|
||||
movl (%esi),%eax
|
||||
movl %ebx,%ecx
|
||||
xorl %edx,%edx
|
||||
.align 16
|
||||
.L016sub:
|
||||
sbbl (%ebp,%edx,4),%eax
|
||||
movl %eax,(%edi,%edx,4)
|
||||
decl %ecx
|
||||
movl 4(%esi,%edx,4),%eax
|
||||
leal 1(%edx),%edx
|
||||
jge .L016sub
|
||||
sbbl $0,%eax
|
||||
movl $-1,%edx
|
||||
xorl %eax,%edx
|
||||
jmp .L017copy
|
||||
.align 16
|
||||
.L017copy:
|
||||
movl 32(%esp,%ebx,4),%esi
|
||||
movl (%edi,%ebx,4),%ebp
|
||||
movl %ecx,32(%esp,%ebx,4)
|
||||
andl %eax,%esi
|
||||
andl %edx,%ebp
|
||||
orl %esi,%ebp
|
||||
movl %ebp,(%edi,%ebx,4)
|
||||
decl %ebx
|
||||
jge .L017copy
|
||||
movl 24(%esp),%esp
|
||||
movl $1,%eax
|
||||
.L000just_leave:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_mul_mont,.-.L_bn_mul_mont_begin
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
|
||||
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
|
||||
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
|
||||
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
.byte 111,114,103,62,0
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
|
||||
.section ".note.gnu.property", "a"
|
||||
.p2align 2
|
||||
.long 1f - 0f
|
||||
.long 4f - 1f
|
||||
.long 5
|
||||
0:
|
||||
.asciz "GNU"
|
||||
1:
|
||||
.p2align 2
|
||||
.long 0xc0000002
|
||||
.long 3f - 2f
|
||||
2:
|
||||
.long 3
|
||||
3:
|
||||
.p2align 2
|
||||
4:
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,569 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from ghashp8-ppc.pl. */
|
||||
.machine "any"
|
||||
|
||||
.text
|
||||
|
||||
.globl gcm_init_p8
|
||||
.type gcm_init_p8,@function
|
||||
.align 5
|
||||
gcm_init_p8:
|
||||
li 0,-4096
|
||||
li 8,0x10
|
||||
mfspr 12,256
|
||||
li 9,0x20
|
||||
mtspr 256,0
|
||||
li 10,0x30
|
||||
.long 0x7D202699
|
||||
|
||||
vspltisb 8,-16
|
||||
vspltisb 5,1
|
||||
vaddubm 8,8,8
|
||||
vxor 4,4,4
|
||||
vor 8,8,5
|
||||
vsldoi 8,8,4,15
|
||||
vsldoi 6,4,5,1
|
||||
vaddubm 8,8,8
|
||||
vspltisb 7,7
|
||||
vor 8,8,6
|
||||
vspltb 6,9,0
|
||||
vsl 9,9,5
|
||||
vsrab 6,6,7
|
||||
vand 6,6,8
|
||||
vxor 3,9,6
|
||||
|
||||
vsldoi 9,3,3,8
|
||||
vsldoi 8,4,8,8
|
||||
vsldoi 11,4,9,8
|
||||
vsldoi 10,9,4,8
|
||||
|
||||
.long 0x7D001F99
|
||||
.long 0x7D681F99
|
||||
li 8,0x40
|
||||
.long 0x7D291F99
|
||||
li 9,0x50
|
||||
.long 0x7D4A1F99
|
||||
li 10,0x60
|
||||
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 16,0,6
|
||||
|
||||
vsldoi 17,16,16,8
|
||||
vsldoi 19,4,17,8
|
||||
vsldoi 18,17,4,8
|
||||
|
||||
.long 0x7E681F99
|
||||
li 8,0x70
|
||||
.long 0x7E291F99
|
||||
li 9,0x80
|
||||
.long 0x7E4A1F99
|
||||
li 10,0x90
|
||||
.long 0x10039CC8
|
||||
.long 0x11B09CC8
|
||||
.long 0x10238CC8
|
||||
.long 0x11D08CC8
|
||||
.long 0x104394C8
|
||||
.long 0x11F094C8
|
||||
|
||||
.long 0x10E044C8
|
||||
.long 0x114D44C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vsldoi 11,14,4,8
|
||||
vsldoi 9,4,14,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
vxor 13,13,11
|
||||
vxor 15,15,9
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vsldoi 13,13,13,8
|
||||
vxor 0,0,7
|
||||
vxor 13,13,10
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
vsldoi 9,13,13,8
|
||||
.long 0x100044C8
|
||||
.long 0x11AD44C8
|
||||
vxor 6,6,2
|
||||
vxor 9,9,15
|
||||
vxor 0,0,6
|
||||
vxor 13,13,9
|
||||
|
||||
vsldoi 9,0,0,8
|
||||
vsldoi 17,13,13,8
|
||||
vsldoi 11,4,9,8
|
||||
vsldoi 10,9,4,8
|
||||
vsldoi 19,4,17,8
|
||||
vsldoi 18,17,4,8
|
||||
|
||||
.long 0x7D681F99
|
||||
li 8,0xa0
|
||||
.long 0x7D291F99
|
||||
li 9,0xb0
|
||||
.long 0x7D4A1F99
|
||||
li 10,0xc0
|
||||
.long 0x7E681F99
|
||||
.long 0x7E291F99
|
||||
.long 0x7E4A1F99
|
||||
|
||||
mtspr 256,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size gcm_init_p8,.-gcm_init_p8
|
||||
.globl gcm_gmult_p8
|
||||
.type gcm_gmult_p8,@function
|
||||
.align 5
|
||||
gcm_gmult_p8:
|
||||
lis 0,0xfff8
|
||||
li 8,0x10
|
||||
mfspr 12,256
|
||||
li 9,0x20
|
||||
mtspr 256,0
|
||||
li 10,0x30
|
||||
.long 0x7C601E99
|
||||
|
||||
.long 0x7D682699
|
||||
|
||||
.long 0x7D292699
|
||||
|
||||
.long 0x7D4A2699
|
||||
|
||||
.long 0x7D002699
|
||||
|
||||
vxor 4,4,4
|
||||
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 0,0,6
|
||||
|
||||
|
||||
.long 0x7C001F99
|
||||
|
||||
mtspr 256,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size gcm_gmult_p8,.-gcm_gmult_p8
|
||||
|
||||
.globl gcm_ghash_p8
|
||||
.type gcm_ghash_p8,@function
|
||||
.align 5
|
||||
gcm_ghash_p8:
|
||||
li 0,-4096
|
||||
li 8,0x10
|
||||
mfspr 12,256
|
||||
li 9,0x20
|
||||
mtspr 256,0
|
||||
li 10,0x30
|
||||
.long 0x7C001E99
|
||||
|
||||
.long 0x7D682699
|
||||
li 8,0x40
|
||||
|
||||
.long 0x7D292699
|
||||
li 9,0x50
|
||||
|
||||
.long 0x7D4A2699
|
||||
li 10,0x60
|
||||
|
||||
.long 0x7D002699
|
||||
|
||||
vxor 4,4,4
|
||||
|
||||
cmplwi 6,64
|
||||
bge .Lgcm_ghash_p8_4x
|
||||
|
||||
.long 0x7C602E99
|
||||
addi 5,5,16
|
||||
subic. 6,6,16
|
||||
|
||||
vxor 3,3,0
|
||||
beq .Lshort
|
||||
|
||||
.long 0x7E682699
|
||||
li 8,16
|
||||
.long 0x7E292699
|
||||
add 9,5,6
|
||||
.long 0x7E4A2699
|
||||
b .Loop_2x
|
||||
|
||||
.align 5
|
||||
.Loop_2x:
|
||||
.long 0x7E002E99
|
||||
|
||||
|
||||
subic 6,6,32
|
||||
.long 0x10039CC8
|
||||
.long 0x11B05CC8
|
||||
subfe 0,0,0
|
||||
.long 0x10238CC8
|
||||
.long 0x11D04CC8
|
||||
and 0,0,6
|
||||
.long 0x104394C8
|
||||
.long 0x11F054C8
|
||||
add 5,5,0
|
||||
|
||||
vxor 0,0,13
|
||||
vxor 1,1,14
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 2,2,15
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
.long 0x7C682E99
|
||||
addi 5,5,32
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
|
||||
vxor 6,6,2
|
||||
vxor 3,3,6
|
||||
vxor 3,3,0
|
||||
.long 0x7c092840
|
||||
bgt .Loop_2x
|
||||
|
||||
cmplwi 6,0
|
||||
bne .Leven
|
||||
|
||||
.Lshort:
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
|
||||
.Leven:
|
||||
vxor 0,0,6
|
||||
|
||||
.long 0x7C001F99
|
||||
|
||||
mtspr 256,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,4,0
|
||||
.long 0
|
||||
.align 5
|
||||
.gcm_ghash_p8_4x:
|
||||
.Lgcm_ghash_p8_4x:
|
||||
stwu 1,-232(1)
|
||||
li 10,39
|
||||
li 11,55
|
||||
stvx 20,10,1
|
||||
addi 10,10,32
|
||||
stvx 21,11,1
|
||||
addi 11,11,32
|
||||
stvx 22,10,1
|
||||
addi 10,10,32
|
||||
stvx 23,11,1
|
||||
addi 11,11,32
|
||||
stvx 24,10,1
|
||||
addi 10,10,32
|
||||
stvx 25,11,1
|
||||
addi 11,11,32
|
||||
stvx 26,10,1
|
||||
addi 10,10,32
|
||||
stvx 27,11,1
|
||||
addi 11,11,32
|
||||
stvx 28,10,1
|
||||
addi 10,10,32
|
||||
stvx 29,11,1
|
||||
addi 11,11,32
|
||||
stvx 30,10,1
|
||||
li 10,0x60
|
||||
stvx 31,11,1
|
||||
li 0,-1
|
||||
stw 12,228(1)
|
||||
mtspr 256,0
|
||||
|
||||
lvsl 5,0,8
|
||||
|
||||
li 8,0x70
|
||||
.long 0x7E292699
|
||||
li 9,0x80
|
||||
vspltisb 6,8
|
||||
|
||||
li 10,0x90
|
||||
.long 0x7EE82699
|
||||
li 8,0xa0
|
||||
.long 0x7F092699
|
||||
li 9,0xb0
|
||||
.long 0x7F2A2699
|
||||
li 10,0xc0
|
||||
.long 0x7FA82699
|
||||
li 8,0x10
|
||||
.long 0x7FC92699
|
||||
li 9,0x20
|
||||
.long 0x7FEA2699
|
||||
li 10,0x30
|
||||
|
||||
vsldoi 7,4,6,8
|
||||
vaddubm 18,5,7
|
||||
vaddubm 19,6,18
|
||||
|
||||
srwi 6,6,4
|
||||
|
||||
.long 0x7C602E99
|
||||
.long 0x7E082E99
|
||||
subic. 6,6,8
|
||||
.long 0x7EC92E99
|
||||
.long 0x7F8A2E99
|
||||
addi 5,5,0x40
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
vxor 2,3,0
|
||||
|
||||
.long 0x11B0BCC8
|
||||
.long 0x11D0C4C8
|
||||
.long 0x11F0CCC8
|
||||
|
||||
vperm 11,17,9,18
|
||||
vperm 5,22,28,19
|
||||
vperm 10,17,9,19
|
||||
vperm 6,22,28,18
|
||||
.long 0x12B68CC8
|
||||
.long 0x12855CC8
|
||||
.long 0x137C4CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vxor 21,21,14
|
||||
vxor 20,20,13
|
||||
vxor 27,27,21
|
||||
vxor 26,26,15
|
||||
|
||||
blt .Ltail_4x
|
||||
|
||||
.Loop_4x:
|
||||
.long 0x7C602E99
|
||||
.long 0x7E082E99
|
||||
subic. 6,6,4
|
||||
.long 0x7EC92E99
|
||||
.long 0x7F8A2E99
|
||||
addi 5,5,0x40
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.long 0x1002ECC8
|
||||
.long 0x1022F4C8
|
||||
.long 0x1042FCC8
|
||||
.long 0x11B0BCC8
|
||||
.long 0x11D0C4C8
|
||||
.long 0x11F0CCC8
|
||||
|
||||
vxor 0,0,20
|
||||
vxor 1,1,27
|
||||
vxor 2,2,26
|
||||
vperm 5,22,28,19
|
||||
vperm 6,22,28,18
|
||||
|
||||
.long 0x10E044C8
|
||||
.long 0x12855CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x12B68CC8
|
||||
.long 0x137C4CC8
|
||||
.long 0x100044C8
|
||||
|
||||
vxor 20,20,13
|
||||
vxor 26,26,15
|
||||
vxor 2,2,3
|
||||
vxor 21,21,14
|
||||
vxor 2,2,6
|
||||
vxor 27,27,21
|
||||
vxor 2,2,0
|
||||
bge .Loop_4x
|
||||
|
||||
.Ltail_4x:
|
||||
.long 0x1002ECC8
|
||||
.long 0x1022F4C8
|
||||
.long 0x1042FCC8
|
||||
|
||||
vxor 0,0,20
|
||||
vxor 1,1,27
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 2,2,26
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 0,0,6
|
||||
|
||||
addic. 6,6,4
|
||||
beq .Ldone_4x
|
||||
|
||||
.long 0x7C602E99
|
||||
cmplwi 6,2
|
||||
li 6,-4
|
||||
blt .Lone
|
||||
.long 0x7E082E99
|
||||
beq .Ltwo
|
||||
|
||||
.Lthree:
|
||||
.long 0x7EC92E99
|
||||
|
||||
|
||||
|
||||
|
||||
vxor 2,3,0
|
||||
vor 29,23,23
|
||||
vor 30,24,24
|
||||
vor 31,25,25
|
||||
|
||||
vperm 5,16,22,19
|
||||
vperm 6,16,22,18
|
||||
.long 0x12B08CC8
|
||||
.long 0x13764CC8
|
||||
.long 0x12855CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vxor 27,27,21
|
||||
b .Ltail_4x
|
||||
|
||||
.align 4
|
||||
.Ltwo:
|
||||
|
||||
|
||||
|
||||
vxor 2,3,0
|
||||
vperm 5,4,16,19
|
||||
vperm 6,4,16,18
|
||||
|
||||
vsldoi 29,4,17,8
|
||||
vor 30,17,17
|
||||
vsldoi 31,17,4,8
|
||||
|
||||
.long 0x12855CC8
|
||||
.long 0x13704CC8
|
||||
.long 0x134654C8
|
||||
|
||||
b .Ltail_4x
|
||||
|
||||
.align 4
|
||||
.Lone:
|
||||
|
||||
|
||||
vsldoi 29,4,9,8
|
||||
vor 30,9,9
|
||||
vsldoi 31,9,4,8
|
||||
|
||||
vxor 2,3,0
|
||||
vxor 20,20,20
|
||||
vxor 27,27,27
|
||||
vxor 26,26,26
|
||||
|
||||
b .Ltail_4x
|
||||
|
||||
.Ldone_4x:
|
||||
|
||||
.long 0x7C001F99
|
||||
|
||||
li 10,39
|
||||
li 11,55
|
||||
mtspr 256,12
|
||||
lvx 20,10,1
|
||||
addi 10,10,32
|
||||
lvx 21,11,1
|
||||
addi 11,11,32
|
||||
lvx 22,10,1
|
||||
addi 10,10,32
|
||||
lvx 23,11,1
|
||||
addi 11,11,32
|
||||
lvx 24,10,1
|
||||
addi 10,10,32
|
||||
lvx 25,11,1
|
||||
addi 11,11,32
|
||||
lvx 26,10,1
|
||||
addi 10,10,32
|
||||
lvx 27,11,1
|
||||
addi 11,11,32
|
||||
lvx 28,10,1
|
||||
addi 10,10,32
|
||||
lvx 29,11,1
|
||||
addi 11,11,32
|
||||
lvx 30,10,1
|
||||
lvx 31,11,1
|
||||
addi 1,1,232
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x04,0,0x80,0,4,0
|
||||
.long 0
|
||||
.size gcm_ghash_p8,.-gcm_ghash_p8
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,586 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from poly1305-ppcfp.pl. */
|
||||
.machine "any"
|
||||
.text
|
||||
|
||||
.globl poly1305_init_fpu
|
||||
.type poly1305_init_fpu,@function
|
||||
.align 6
|
||||
poly1305_init_fpu:
|
||||
stwu 1,-24(1)
|
||||
mflr 6
|
||||
stw 6,28(1)
|
||||
|
||||
bl .LPICmeup
|
||||
|
||||
xor 0,0,0
|
||||
mtlr 6
|
||||
|
||||
lfd 8,8*0(5)
|
||||
lfd 9,8*1(5)
|
||||
lfd 10,8*2(5)
|
||||
lfd 11,8*3(5)
|
||||
lfd 12,8*4(5)
|
||||
lfd 13,8*5(5)
|
||||
|
||||
stfd 8,8*0(3)
|
||||
stfd 9,8*1(3)
|
||||
stfd 10,8*2(3)
|
||||
stfd 11,8*3(3)
|
||||
|
||||
.long 0x7c040040
|
||||
beq- .Lno_key
|
||||
|
||||
lfd 6,8*13(5)
|
||||
mffs 7
|
||||
|
||||
stfd 8,8*4(3)
|
||||
stfd 9,8*5(3)
|
||||
stfd 10,8*6(3)
|
||||
stfd 11,8*7(3)
|
||||
|
||||
li 8,4
|
||||
li 9,8
|
||||
li 10,12
|
||||
lwbrx 7,0,4
|
||||
lwbrx 8,8,4
|
||||
lwbrx 9,9,4
|
||||
lwbrx 10,10,4
|
||||
|
||||
lis 11,0xf000
|
||||
ori 12,11,3
|
||||
andc 7,7,11
|
||||
andc 8,8,12
|
||||
andc 9,9,12
|
||||
andc 10,10,12
|
||||
|
||||
stw 7,36(3)
|
||||
stw 8,44(3)
|
||||
stw 9,52(3)
|
||||
stw 10,60(3)
|
||||
|
||||
mtfsf 255,6
|
||||
stfd 8,8*18(3)
|
||||
stfd 9,8*19(3)
|
||||
stfd 10,8*20(3)
|
||||
stfd 11,8*21(3)
|
||||
stfd 12,8*22(3)
|
||||
stfd 13,8*23(3)
|
||||
|
||||
lfd 0,8*4(3)
|
||||
lfd 2,8*5(3)
|
||||
lfd 4,8*6(3)
|
||||
lfd 6,8*7(3)
|
||||
|
||||
fsub 0,0,8
|
||||
fsub 2,2,9
|
||||
fsub 4,4,10
|
||||
fsub 6,6,11
|
||||
|
||||
lfd 8,8*6(5)
|
||||
lfd 9,8*7(5)
|
||||
lfd 10,8*8(5)
|
||||
lfd 11,8*9(5)
|
||||
|
||||
fmul 3,2,13
|
||||
fmul 5,4,13
|
||||
stfd 7,8*15(3)
|
||||
fmul 7,6,13
|
||||
|
||||
fadd 1,0,8
|
||||
stfd 3,8*12(3)
|
||||
fadd 3,2,9
|
||||
stfd 5,8*13(3)
|
||||
fadd 5,4,10
|
||||
stfd 7,8*14(3)
|
||||
fadd 7,6,11
|
||||
|
||||
fsub 1,1,8
|
||||
fsub 3,3,9
|
||||
fsub 5,5,10
|
||||
fsub 7,7,11
|
||||
|
||||
lfd 8,8*10(5)
|
||||
lfd 9,8*11(5)
|
||||
lfd 10,8*12(5)
|
||||
|
||||
fsub 0,0,1
|
||||
fsub 2,2,3
|
||||
fsub 4,4,5
|
||||
fsub 6,6,7
|
||||
|
||||
stfd 1,8*5(3)
|
||||
stfd 3,8*7(3)
|
||||
stfd 5,8*9(3)
|
||||
stfd 7,8*11(3)
|
||||
|
||||
stfd 0,8*4(3)
|
||||
stfd 2,8*6(3)
|
||||
stfd 4,8*8(3)
|
||||
stfd 6,8*10(3)
|
||||
|
||||
lfd 2,8*12(3)
|
||||
lfd 4,8*13(3)
|
||||
lfd 6,8*14(3)
|
||||
lfd 0,8*15(3)
|
||||
|
||||
fadd 3,2,8
|
||||
fadd 5,4,9
|
||||
fadd 7,6,10
|
||||
|
||||
fsub 3,3,8
|
||||
fsub 5,5,9
|
||||
fsub 7,7,10
|
||||
|
||||
fsub 2,2,3
|
||||
fsub 4,4,5
|
||||
fsub 6,6,7
|
||||
|
||||
stfd 3,8*13(3)
|
||||
stfd 5,8*15(3)
|
||||
stfd 7,8*17(3)
|
||||
|
||||
stfd 2,8*12(3)
|
||||
stfd 4,8*14(3)
|
||||
stfd 6,8*16(3)
|
||||
|
||||
mtfsf 255,0
|
||||
.Lno_key:
|
||||
xor 3,3,3
|
||||
addi 1,1,24
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,4,1,0x80,0,2,0
|
||||
.size poly1305_init_fpu,.-poly1305_init_fpu
|
||||
|
||||
.globl poly1305_blocks_fpu
|
||||
.type poly1305_blocks_fpu,@function
|
||||
.align 4
|
||||
poly1305_blocks_fpu:
|
||||
srwi. 5,5,4
|
||||
beq- .Labort
|
||||
|
||||
stwu 1,-216(1)
|
||||
mflr 0
|
||||
stfd 14,72(1)
|
||||
stfd 15,80(1)
|
||||
stfd 16,88(1)
|
||||
stfd 17,96(1)
|
||||
stfd 18,104(1)
|
||||
stfd 19,112(1)
|
||||
stfd 20,120(1)
|
||||
stfd 21,128(1)
|
||||
stfd 22,136(1)
|
||||
stfd 23,144(1)
|
||||
stfd 24,152(1)
|
||||
stfd 25,160(1)
|
||||
stfd 26,168(1)
|
||||
stfd 27,176(1)
|
||||
stfd 28,184(1)
|
||||
stfd 29,192(1)
|
||||
stfd 30,200(1)
|
||||
stfd 31,208(1)
|
||||
stw 0,220(1)
|
||||
|
||||
xor 0,0,0
|
||||
li 10,1
|
||||
mtctr 5
|
||||
neg 5,5
|
||||
stw 0,56(1)
|
||||
stw 10,60(1)
|
||||
|
||||
lfd 8,8*18(3)
|
||||
lfd 9,8*19(3)
|
||||
lfd 10,8*20(3)
|
||||
lfd 11,8*21(3)
|
||||
lfd 12,8*22(3)
|
||||
lfd 13,8*23(3)
|
||||
|
||||
lfd 0,8*0(3)
|
||||
lfd 2,8*1(3)
|
||||
lfd 4,8*2(3)
|
||||
lfd 6,8*3(3)
|
||||
|
||||
stfd 8,24(1)
|
||||
oris 10,6,18736
|
||||
stfd 9,32(1)
|
||||
stfd 10,40(1)
|
||||
stw 10,48(1)
|
||||
|
||||
li 11,4
|
||||
li 12,8
|
||||
li 6,12
|
||||
lwbrx 7,0,4
|
||||
lwbrx 8,11,4
|
||||
lwbrx 9,12,4
|
||||
lwbrx 10,6,4
|
||||
addi 4,4,16
|
||||
|
||||
stw 7,28(1)
|
||||
stw 8,36(1)
|
||||
stw 9,44(1)
|
||||
stw 10,52(1)
|
||||
|
||||
mffs 28
|
||||
lfd 29,56(1)
|
||||
lfd 14,8*4(3)
|
||||
lfd 15,8*5(3)
|
||||
lfd 16,8*6(3)
|
||||
lfd 17,8*7(3)
|
||||
lfd 18,8*8(3)
|
||||
lfd 19,8*9(3)
|
||||
lfd 24,8*10(3)
|
||||
lfd 25,8*11(3)
|
||||
lfd 26,8*12(3)
|
||||
lfd 27,8*13(3)
|
||||
lfd 20,8*14(3)
|
||||
lfd 21,8*15(3)
|
||||
lfd 22,8*16(3)
|
||||
lfd 23,8*17(3)
|
||||
|
||||
stfd 28,56(1)
|
||||
mtfsf 255,29
|
||||
|
||||
addic 5,5,1
|
||||
addze 0,0
|
||||
slwi. 0,0,4
|
||||
sub 4,4,0
|
||||
|
||||
lfd 28,24(1)
|
||||
lfd 29,32(1)
|
||||
lfd 30,40(1)
|
||||
lfd 31,48(1)
|
||||
|
||||
fsub 0,0,8
|
||||
lwbrx 7,0,4
|
||||
fsub 2,2,9
|
||||
lwbrx 8,11,4
|
||||
fsub 4,4,10
|
||||
lwbrx 9,12,4
|
||||
fsub 6,6,11
|
||||
lwbrx 10,6,4
|
||||
|
||||
fsub 28,28,8
|
||||
addi 4,4,16
|
||||
fsub 29,29,9
|
||||
fsub 30,30,10
|
||||
fsub 31,31,11
|
||||
|
||||
fadd 28,28,0
|
||||
stw 7,28(1)
|
||||
fadd 29,29,2
|
||||
stw 8,36(1)
|
||||
fadd 30,30,4
|
||||
stw 9,44(1)
|
||||
fadd 31,31,6
|
||||
stw 10,52(1)
|
||||
|
||||
b .Lentry
|
||||
|
||||
.align 4
|
||||
.Loop:
|
||||
fsub 30,30,8
|
||||
addic 5,5,1
|
||||
fsub 31,31,9
|
||||
addze 0,0
|
||||
fsub 26,26,10
|
||||
slwi. 0,0,4
|
||||
fsub 27,27,11
|
||||
sub 4,4,0
|
||||
|
||||
fadd 0,0,30
|
||||
fadd 1,1,31
|
||||
fadd 4,4,26
|
||||
fadd 5,5,27
|
||||
|
||||
|
||||
fadd 26,2,10
|
||||
lwbrx 7,0,4
|
||||
fadd 27,3,10
|
||||
lwbrx 8,11,4
|
||||
fadd 30,6,12
|
||||
lwbrx 9,12,4
|
||||
fadd 31,7,12
|
||||
lwbrx 10,6,4
|
||||
fadd 24,0,9
|
||||
addi 4,4,16
|
||||
fadd 25,1,9
|
||||
fadd 28,4,11
|
||||
fadd 29,5,11
|
||||
|
||||
fsub 26,26,10
|
||||
stw 7,28(1)
|
||||
fsub 27,27,10
|
||||
stw 8,36(1)
|
||||
fsub 30,30,12
|
||||
stw 9,44(1)
|
||||
fsub 31,31,12
|
||||
stw 10,52(1)
|
||||
fsub 24,24,9
|
||||
fsub 25,25,9
|
||||
fsub 28,28,11
|
||||
fsub 29,29,11
|
||||
|
||||
fsub 2,2,26
|
||||
fsub 3,3,27
|
||||
fsub 6,6,30
|
||||
fsub 7,7,31
|
||||
fsub 4,4,28
|
||||
fsub 5,5,29
|
||||
fsub 0,0,24
|
||||
fsub 1,1,25
|
||||
|
||||
fadd 2,2,24
|
||||
fadd 3,3,25
|
||||
fadd 6,6,28
|
||||
fadd 7,7,29
|
||||
fadd 4,4,26
|
||||
fadd 5,5,27
|
||||
fmadd 0,30,13,0
|
||||
fmadd 1,31,13,1
|
||||
|
||||
fadd 29,2,3
|
||||
lfd 26,8*12(3)
|
||||
fadd 31,6,7
|
||||
lfd 27,8*13(3)
|
||||
fadd 30,4,5
|
||||
lfd 24,8*10(3)
|
||||
fadd 28,0,1
|
||||
lfd 25,8*11(3)
|
||||
.Lentry:
|
||||
fmul 0,22,29
|
||||
fmul 1,23,29
|
||||
fmul 4,16,29
|
||||
fmul 5,17,29
|
||||
fmul 2,14,29
|
||||
fmul 3,15,29
|
||||
fmul 6,18,29
|
||||
fmul 7,19,29
|
||||
|
||||
fmadd 0,26,31,0
|
||||
fmadd 1,27,31,1
|
||||
fmadd 4,22,31,4
|
||||
fmadd 5,23,31,5
|
||||
fmadd 2,20,31,2
|
||||
fmadd 3,21,31,3
|
||||
fmadd 6,14,31,6
|
||||
fmadd 7,15,31,7
|
||||
|
||||
fmadd 0,20,30,0
|
||||
fmadd 1,21,30,1
|
||||
fmadd 4,14,30,4
|
||||
fmadd 5,15,30,5
|
||||
fmadd 2,22,30,2
|
||||
fmadd 3,23,30,3
|
||||
fmadd 6,16,30,6
|
||||
fmadd 7,17,30,7
|
||||
|
||||
fmadd 0,14,28,0
|
||||
lfd 30,24(1)
|
||||
fmadd 1,15,28,1
|
||||
lfd 31,32(1)
|
||||
fmadd 4,18,28,4
|
||||
lfd 26,40(1)
|
||||
fmadd 5,19,28,5
|
||||
lfd 27,48(1)
|
||||
fmadd 2,16,28,2
|
||||
fmadd 3,17,28,3
|
||||
fmadd 6,24,28,6
|
||||
fmadd 7,25,28,7
|
||||
|
||||
bdnz .Loop
|
||||
|
||||
|
||||
fadd 24,0,9
|
||||
fadd 25,1,9
|
||||
fadd 28,4,11
|
||||
fadd 29,5,11
|
||||
fadd 26,2,10
|
||||
fadd 27,3,10
|
||||
fadd 30,6,12
|
||||
fadd 31,7,12
|
||||
|
||||
fsub 24,24,9
|
||||
fsub 25,25,9
|
||||
fsub 28,28,11
|
||||
fsub 29,29,11
|
||||
fsub 26,26,10
|
||||
fsub 27,27,10
|
||||
fsub 30,30,12
|
||||
fsub 31,31,12
|
||||
|
||||
fsub 2,2,26
|
||||
fsub 3,3,27
|
||||
fsub 6,6,30
|
||||
fsub 7,7,31
|
||||
fsub 4,4,28
|
||||
fsub 5,5,29
|
||||
fsub 0,0,24
|
||||
fsub 1,1,25
|
||||
|
||||
fadd 2,2,24
|
||||
fadd 3,3,25
|
||||
fadd 6,6,28
|
||||
fadd 7,7,29
|
||||
fadd 4,4,26
|
||||
fadd 5,5,27
|
||||
fmadd 0,30,13,0
|
||||
fmadd 1,31,13,1
|
||||
|
||||
fadd 29,2,3
|
||||
fadd 31,6,7
|
||||
fadd 30,4,5
|
||||
fadd 28,0,1
|
||||
|
||||
lfd 0,56(1)
|
||||
fadd 29,29,9
|
||||
fadd 31,31,11
|
||||
fadd 30,30,10
|
||||
fadd 28,28,8
|
||||
|
||||
stfd 29,8*1(3)
|
||||
stfd 31,8*3(3)
|
||||
stfd 30,8*2(3)
|
||||
stfd 28,8*0(3)
|
||||
|
||||
mtfsf 255,0
|
||||
lfd 14,72(1)
|
||||
lfd 15,80(1)
|
||||
lfd 16,88(1)
|
||||
lfd 17,96(1)
|
||||
lfd 18,104(1)
|
||||
lfd 19,112(1)
|
||||
lfd 20,120(1)
|
||||
lfd 21,128(1)
|
||||
lfd 22,136(1)
|
||||
lfd 23,144(1)
|
||||
lfd 24,152(1)
|
||||
lfd 25,160(1)
|
||||
lfd 26,168(1)
|
||||
lfd 27,176(1)
|
||||
lfd 28,184(1)
|
||||
lfd 29,192(1)
|
||||
lfd 30,200(1)
|
||||
lfd 31,208(1)
|
||||
addi 1,1,216
|
||||
.Labort:
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,4,1,0x80,0,4,0
|
||||
.size poly1305_blocks_fpu,.-poly1305_blocks_fpu
|
||||
.globl poly1305_emit_fpu
|
||||
.type poly1305_emit_fpu,@function
|
||||
.align 4
|
||||
poly1305_emit_fpu:
|
||||
stwu 1,-40(1)
|
||||
mflr 0
|
||||
stw 28,24(1)
|
||||
stw 29,28(1)
|
||||
stw 30,32(1)
|
||||
stw 31,36(1)
|
||||
stw 0,44(1)
|
||||
|
||||
lwz 28,0(3)
|
||||
lwz 7,4(3)
|
||||
lwz 29,8(3)
|
||||
lwz 8,12(3)
|
||||
lwz 30,16(3)
|
||||
lwz 9,20(3)
|
||||
lwz 31,24(3)
|
||||
lwz 10,28(3)
|
||||
|
||||
lis 0,0xfff0
|
||||
andc 28,28,0
|
||||
andc 29,29,0
|
||||
andc 30,30,0
|
||||
andc 31,31,0
|
||||
li 0,3
|
||||
|
||||
srwi 6,31,2
|
||||
and 11,31,0
|
||||
andc 31,31,0
|
||||
add 31,31,6
|
||||
addc 7,7,31
|
||||
adde 8,8,28
|
||||
adde 9,9,29
|
||||
adde 10,10,30
|
||||
addze 11,11
|
||||
|
||||
addic 28,7,5
|
||||
addze 29,8
|
||||
addze 30,9
|
||||
addze 31,10
|
||||
addze 0,11
|
||||
|
||||
srwi 0,0,2
|
||||
neg 0,0
|
||||
srawi 0,0,31
|
||||
|
||||
andc 7,7,0
|
||||
and 28,28,0
|
||||
andc 8,8,0
|
||||
and 29,29,0
|
||||
or 7,7,28
|
||||
lwz 28,0(5)
|
||||
andc 9,9,0
|
||||
and 30,30,0
|
||||
or 8,8,29
|
||||
lwz 29,4(5)
|
||||
andc 10,10,0
|
||||
and 31,31,0
|
||||
or 9,9,30
|
||||
lwz 30,8(5)
|
||||
or 10,10,31
|
||||
lwz 31,12(5)
|
||||
|
||||
addc 7,7,28
|
||||
adde 8,8,29
|
||||
adde 9,9,30
|
||||
adde 10,10,31
|
||||
li 29,4
|
||||
stwbrx 7,0,4
|
||||
li 30,8
|
||||
stwbrx 8,29,4
|
||||
li 31,12
|
||||
stwbrx 9,30,4
|
||||
stwbrx 10,31,4
|
||||
lwz 28,24(1)
|
||||
lwz 29,28(1)
|
||||
lwz 30,32(1)
|
||||
lwz 31,36(1)
|
||||
addi 1,1,40
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,4,1,0x80,4,3,0
|
||||
.size poly1305_emit_fpu,.-poly1305_emit_fpu
|
||||
.align 6
|
||||
.LPICmeup:
|
||||
mflr 0
|
||||
bcl 20,31,$+4
|
||||
mflr 5
|
||||
addi 5,5,56
|
||||
mtlr 0
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.space 28
|
||||
|
||||
.long 0x43300000,0x00000000
|
||||
.long 0x45300000,0x00000000
|
||||
.long 0x47300000,0x00000000
|
||||
.long 0x49300000,0x00000000
|
||||
.long 0x4b500000,0x00000000
|
||||
|
||||
.long 0x37f40000,0x00000000
|
||||
|
||||
.long 0x44300000,0x00000000
|
||||
.long 0x46300000,0x00000000
|
||||
.long 0x48300000,0x00000000
|
||||
.long 0x4a300000,0x00000000
|
||||
.long 0x3e300000,0x00000000
|
||||
.long 0x40300000,0x00000000
|
||||
.long 0x42300000,0x00000000
|
||||
|
||||
.long 0x00000000,0x00000001
|
||||
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,80,80,67,32,70,80,85,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 4
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,356 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from ppccpuid.pl. */
|
||||
.machine "any"
|
||||
.text
|
||||
|
||||
.globl OPENSSL_fpu_probe
|
||||
.type OPENSSL_fpu_probe,@function
|
||||
.align 4
|
||||
OPENSSL_fpu_probe:
|
||||
fmr 0,0
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.size OPENSSL_fpu_probe,.-OPENSSL_fpu_probe
|
||||
.globl OPENSSL_ppc64_probe
|
||||
.type OPENSSL_ppc64_probe,@function
|
||||
.align 4
|
||||
OPENSSL_ppc64_probe:
|
||||
fcfid 1,1
|
||||
rldicl 0,0,32,32
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.size OPENSSL_ppc64_probe,.-OPENSSL_ppc64_probe
|
||||
|
||||
.globl OPENSSL_altivec_probe
|
||||
.type OPENSSL_altivec_probe,@function
|
||||
.align 4
|
||||
OPENSSL_altivec_probe:
|
||||
.long 0x10000484
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.size OPENSSL_altivec_probe,.-OPENSSL_altivec_probe
|
||||
|
||||
.globl OPENSSL_crypto207_probe
|
||||
.type OPENSSL_crypto207_probe,@function
|
||||
.align 4
|
||||
OPENSSL_crypto207_probe:
|
||||
.long 0x7C000E99
|
||||
.long 0x10000508
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.size OPENSSL_crypto207_probe,.-OPENSSL_crypto207_probe
|
||||
|
||||
.globl OPENSSL_madd300_probe
|
||||
.type OPENSSL_madd300_probe,@function
|
||||
.align 4
|
||||
OPENSSL_madd300_probe:
|
||||
xor 0,0,0
|
||||
.long 0x10600033
|
||||
.long 0x10600031
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
|
||||
.globl OPENSSL_wipe_cpu
|
||||
.type OPENSSL_wipe_cpu,@function
|
||||
.align 4
|
||||
OPENSSL_wipe_cpu:
|
||||
xor 0,0,0
|
||||
fmr 0,31
|
||||
fmr 1,31
|
||||
fmr 2,31
|
||||
mr 3,1
|
||||
fmr 3,31
|
||||
xor 4,4,4
|
||||
fmr 4,31
|
||||
xor 5,5,5
|
||||
fmr 5,31
|
||||
xor 6,6,6
|
||||
fmr 6,31
|
||||
xor 7,7,7
|
||||
fmr 7,31
|
||||
xor 8,8,8
|
||||
fmr 8,31
|
||||
xor 9,9,9
|
||||
fmr 9,31
|
||||
xor 10,10,10
|
||||
fmr 10,31
|
||||
xor 11,11,11
|
||||
fmr 11,31
|
||||
xor 12,12,12
|
||||
fmr 12,31
|
||||
fmr 13,31
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
||||
|
||||
.globl OPENSSL_atomic_add
|
||||
.type OPENSSL_atomic_add,@function
|
||||
.align 4
|
||||
OPENSSL_atomic_add:
|
||||
.Ladd: lwarx 5,0,3
|
||||
add 0,4,5
|
||||
stwcx. 0,0,3
|
||||
bne- .Ladd
|
||||
mr 3,0
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
||||
|
||||
.globl OPENSSL_rdtsc_mftb
|
||||
.type OPENSSL_rdtsc_mftb,@function
|
||||
.align 4
|
||||
OPENSSL_rdtsc_mftb:
|
||||
mftb 3
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.size OPENSSL_rdtsc_mftb,.-OPENSSL_rdtsc_mftb
|
||||
|
||||
.globl OPENSSL_rdtsc_mfspr268
|
||||
.type OPENSSL_rdtsc_mfspr268,@function
|
||||
.align 4
|
||||
OPENSSL_rdtsc_mfspr268:
|
||||
mfspr 3,268
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.size OPENSSL_rdtsc_mfspr268,.-OPENSSL_rdtsc_mfspr268
|
||||
|
||||
.globl OPENSSL_cleanse
|
||||
.type OPENSSL_cleanse,@function
|
||||
.align 4
|
||||
OPENSSL_cleanse:
|
||||
cmplwi 4,7
|
||||
li 0,0
|
||||
bge .Lot
|
||||
cmplwi 4,0
|
||||
.long 0x4DC20020
|
||||
.Little: mtctr 4
|
||||
stb 0,0(3)
|
||||
addi 3,3,1
|
||||
bdnz $-8
|
||||
blr
|
||||
.Lot: andi. 5,3,3
|
||||
beq .Laligned
|
||||
stb 0,0(3)
|
||||
subi 4,4,1
|
||||
addi 3,3,1
|
||||
b .Lot
|
||||
.Laligned:
|
||||
srwi 5,4,2
|
||||
mtctr 5
|
||||
stw 0,0(3)
|
||||
addi 3,3,4
|
||||
bdnz $-8
|
||||
andi. 4,4,3
|
||||
bne .Little
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
||||
|
||||
.globl CRYPTO_memcmp
|
||||
.type CRYPTO_memcmp,@function
|
||||
.align 4
|
||||
CRYPTO_memcmp:
|
||||
cmplwi 5,0
|
||||
li 0,0
|
||||
beq .Lno_data
|
||||
mtctr 5
|
||||
.Loop_cmp:
|
||||
lbz 6,0(3)
|
||||
addi 3,3,1
|
||||
lbz 7,0(4)
|
||||
addi 4,4,1
|
||||
xor 6,6,7
|
||||
or 0,0,6
|
||||
bdnz .Loop_cmp
|
||||
|
||||
.Lno_data:
|
||||
li 3,0
|
||||
sub 3,3,0
|
||||
extrwi 3,3,1,0
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,3,0
|
||||
.long 0
|
||||
.size CRYPTO_memcmp,.-CRYPTO_memcmp
|
||||
.globl OPENSSL_instrument_bus_mftb
|
||||
.type OPENSSL_instrument_bus_mftb,@function
|
||||
.align 4
|
||||
OPENSSL_instrument_bus_mftb:
|
||||
mtctr 4
|
||||
|
||||
mftb 7
|
||||
li 8,0
|
||||
|
||||
dcbf 0,3
|
||||
lwarx 6,0,3
|
||||
add 6,6,8
|
||||
stwcx. 6,0,3
|
||||
stwx 6,0,3
|
||||
|
||||
.Loop: mftb 6
|
||||
sub 8,6,7
|
||||
mr 7,6
|
||||
dcbf 0,3
|
||||
lwarx 6,0,3
|
||||
add 6,6,8
|
||||
stwcx. 6,0,3
|
||||
stwx 6,0,3
|
||||
addi 3,3,4
|
||||
bdnz .Loop
|
||||
|
||||
mr 3,4
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size OPENSSL_instrument_bus_mftb,.-OPENSSL_instrument_bus_mftb
|
||||
|
||||
.globl OPENSSL_instrument_bus2_mftb
|
||||
.type OPENSSL_instrument_bus2_mftb,@function
|
||||
.align 4
|
||||
OPENSSL_instrument_bus2_mftb:
|
||||
mr 0,4
|
||||
slwi 4,4,2
|
||||
|
||||
mftb 7
|
||||
li 8,0
|
||||
|
||||
dcbf 0,3
|
||||
lwarx 6,0,3
|
||||
add 6,6,8
|
||||
stwcx. 6,0,3
|
||||
stwx 6,0,3
|
||||
|
||||
mftb 6
|
||||
sub 8,6,7
|
||||
mr 7,6
|
||||
mr 9,8
|
||||
.Loop2:
|
||||
dcbf 0,3
|
||||
lwarx 6,0,3
|
||||
add 6,6,8
|
||||
stwcx. 6,0,3
|
||||
stwx 6,0,3
|
||||
|
||||
addic. 5,5,-1
|
||||
beq .Ldone2
|
||||
|
||||
mftb 6
|
||||
sub 8,6,7
|
||||
mr 7,6
|
||||
.long 0x7f884840
|
||||
mr 9,8
|
||||
|
||||
mfcr 6
|
||||
not 6,6
|
||||
rlwinm 6,6,1,29,29
|
||||
|
||||
sub. 4,4,6
|
||||
add 3,3,6
|
||||
bne .Loop2
|
||||
|
||||
.Ldone2:
|
||||
srwi 4,4,2
|
||||
sub 3,0,4
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,3,0
|
||||
.long 0
|
||||
.size OPENSSL_instrument_bus2_mftb,.-OPENSSL_instrument_bus2_mftb
|
||||
|
||||
.globl OPENSSL_instrument_bus_mfspr268
|
||||
.type OPENSSL_instrument_bus_mfspr268,@function
|
||||
.align 4
|
||||
OPENSSL_instrument_bus_mfspr268:
|
||||
mtctr 4
|
||||
|
||||
mfspr 7,268
|
||||
li 8,0
|
||||
|
||||
dcbf 0,3
|
||||
lwarx 6,0,3
|
||||
add 6,6,8
|
||||
stwcx. 6,0,3
|
||||
stwx 6,0,3
|
||||
|
||||
.Loop3: mfspr 6,268
|
||||
sub 8,6,7
|
||||
mr 7,6
|
||||
dcbf 0,3
|
||||
lwarx 6,0,3
|
||||
add 6,6,8
|
||||
stwcx. 6,0,3
|
||||
stwx 6,0,3
|
||||
addi 3,3,4
|
||||
bdnz .Loop3
|
||||
|
||||
mr 3,4
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size OPENSSL_instrument_bus_mfspr268,.-OPENSSL_instrument_bus_mfspr268
|
||||
|
||||
.globl OPENSSL_instrument_bus2_mfspr268
|
||||
.type OPENSSL_instrument_bus2_mfspr268,@function
|
||||
.align 4
|
||||
OPENSSL_instrument_bus2_mfspr268:
|
||||
mr 0,4
|
||||
slwi 4,4,2
|
||||
|
||||
mfspr 7,268
|
||||
li 8,0
|
||||
|
||||
dcbf 0,3
|
||||
lwarx 6,0,3
|
||||
add 6,6,8
|
||||
stwcx. 6,0,3
|
||||
stwx 6,0,3
|
||||
|
||||
mfspr 6,268
|
||||
sub 8,6,7
|
||||
mr 7,6
|
||||
mr 9,8
|
||||
.Loop4:
|
||||
dcbf 0,3
|
||||
lwarx 6,0,3
|
||||
add 6,6,8
|
||||
stwcx. 6,0,3
|
||||
stwx 6,0,3
|
||||
|
||||
addic. 5,5,-1
|
||||
beq .Ldone4
|
||||
|
||||
mfspr 6,268
|
||||
sub 8,6,7
|
||||
mr 7,6
|
||||
.long 0x7f884840
|
||||
mr 9,8
|
||||
|
||||
mfcr 6
|
||||
not 6,6
|
||||
rlwinm 6,6,1,29,29
|
||||
|
||||
sub. 4,4,6
|
||||
add 3,3,6
|
||||
bne .Loop4
|
||||
|
||||
.Ldone4:
|
||||
srwi 4,4,2
|
||||
sub 3,0,4
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,3,0
|
||||
.long 0
|
||||
.size OPENSSL_instrument_bus2_mfspr268,.-OPENSSL_instrument_bus2_mfspr268
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,735 +0,0 @@
|
||||
/* Do not modify. This file is auto-generated from sha512p8-ppc.pl. */
|
||||
.machine "any"
|
||||
.text
|
||||
|
||||
.globl sha256_block_p8
|
||||
.type sha256_block_p8,@function
|
||||
.align 6
|
||||
sha256_block_p8:
|
||||
stwu 1,-328(1)
|
||||
mflr 8
|
||||
li 10,175
|
||||
li 11,191
|
||||
stvx 24,10,1
|
||||
addi 10,10,32
|
||||
mfspr 12,256
|
||||
stvx 25,11,1
|
||||
addi 11,11,32
|
||||
stvx 26,10,1
|
||||
addi 10,10,32
|
||||
stvx 27,11,1
|
||||
addi 11,11,32
|
||||
stvx 28,10,1
|
||||
addi 10,10,32
|
||||
stvx 29,11,1
|
||||
addi 11,11,32
|
||||
stvx 30,10,1
|
||||
stvx 31,11,1
|
||||
li 11,-4096+255
|
||||
stw 12,300(1)
|
||||
li 10,0x10
|
||||
stw 26,304(1)
|
||||
li 26,0x20
|
||||
stw 27,308(1)
|
||||
li 27,0x30
|
||||
stw 28,312(1)
|
||||
li 28,0x40
|
||||
stw 29,316(1)
|
||||
li 29,0x50
|
||||
stw 30,320(1)
|
||||
li 30,0x60
|
||||
stw 31,324(1)
|
||||
li 31,0x70
|
||||
stw 8,332(1)
|
||||
mtspr 256,11
|
||||
|
||||
bl .LPICmeup
|
||||
addi 11,1,47
|
||||
.long 0x7C001E19
|
||||
.long 0x7C8A1E19
|
||||
vsldoi 1,0,0,4
|
||||
vsldoi 2,0,0,8
|
||||
vsldoi 3,0,0,12
|
||||
vsldoi 5,4,4,4
|
||||
vsldoi 6,4,4,8
|
||||
vsldoi 7,4,4,12
|
||||
li 0,3
|
||||
b .Loop
|
||||
.align 5
|
||||
.Loop:
|
||||
lvx 28,0,6
|
||||
.long 0x7D002699
|
||||
addi 4,4,16
|
||||
mr 7,6
|
||||
stvx 0,0,11
|
||||
stvx 1,10,11
|
||||
stvx 2,26,11
|
||||
stvx 3,27,11
|
||||
stvx 4,28,11
|
||||
stvx 5,29,11
|
||||
stvx 6,30,11
|
||||
stvx 7,31,11
|
||||
vadduwm 7,7,28
|
||||
lvx 28,10,6
|
||||
vadduwm 7,7,8
|
||||
vsel 29,6,5,4
|
||||
vadduwm 6,6,28
|
||||
vadduwm 7,7,29
|
||||
.long 0x13C4FE82
|
||||
vadduwm 7,7,30
|
||||
vxor 29,0,1
|
||||
vsel 29,1,2,29
|
||||
vadduwm 3,3,7
|
||||
.long 0x13C08682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 7,7,30
|
||||
lvx 28,26,7
|
||||
vsldoi 9,8,8,4
|
||||
vadduwm 6,6,9
|
||||
vsel 29,5,4,3
|
||||
vadduwm 5,5,28
|
||||
vadduwm 6,6,29
|
||||
.long 0x13C3FE82
|
||||
vadduwm 6,6,30
|
||||
vxor 29,7,0
|
||||
vsel 29,0,1,29
|
||||
vadduwm 2,2,6
|
||||
.long 0x13C78682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 6,6,30
|
||||
lvx 28,27,7
|
||||
vsldoi 10,9,9,4
|
||||
vadduwm 5,5,10
|
||||
vsel 29,4,3,2
|
||||
vadduwm 4,4,28
|
||||
vadduwm 5,5,29
|
||||
.long 0x13C2FE82
|
||||
vadduwm 5,5,30
|
||||
vxor 29,6,7
|
||||
vsel 29,7,0,29
|
||||
vadduwm 1,1,5
|
||||
.long 0x13C68682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 5,5,30
|
||||
lvx 28,28,7
|
||||
.long 0x7D802699
|
||||
addi 4,4,16
|
||||
vsldoi 11,10,10,4
|
||||
vadduwm 4,4,11
|
||||
vsel 29,3,2,1
|
||||
vadduwm 3,3,28
|
||||
vadduwm 4,4,29
|
||||
.long 0x13C1FE82
|
||||
vadduwm 4,4,30
|
||||
vxor 29,5,6
|
||||
vsel 29,6,7,29
|
||||
vadduwm 0,0,4
|
||||
.long 0x13C58682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 4,4,30
|
||||
lvx 28,29,7
|
||||
vadduwm 3,3,12
|
||||
vsel 29,2,1,0
|
||||
vadduwm 2,2,28
|
||||
vadduwm 3,3,29
|
||||
.long 0x13C0FE82
|
||||
vadduwm 3,3,30
|
||||
vxor 29,4,5
|
||||
vsel 29,5,6,29
|
||||
vadduwm 7,7,3
|
||||
.long 0x13C48682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 3,3,30
|
||||
lvx 28,30,7
|
||||
vsldoi 13,12,12,4
|
||||
vadduwm 2,2,13
|
||||
vsel 29,1,0,7
|
||||
vadduwm 1,1,28
|
||||
vadduwm 2,2,29
|
||||
.long 0x13C7FE82
|
||||
vadduwm 2,2,30
|
||||
vxor 29,3,4
|
||||
vsel 29,4,5,29
|
||||
vadduwm 6,6,2
|
||||
.long 0x13C38682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 2,2,30
|
||||
lvx 28,31,7
|
||||
addi 7,7,0x80
|
||||
vsldoi 14,13,13,4
|
||||
vadduwm 1,1,14
|
||||
vsel 29,0,7,6
|
||||
vadduwm 0,0,28
|
||||
vadduwm 1,1,29
|
||||
.long 0x13C6FE82
|
||||
vadduwm 1,1,30
|
||||
vxor 29,2,3
|
||||
vsel 29,3,4,29
|
||||
vadduwm 5,5,1
|
||||
.long 0x13C28682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 1,1,30
|
||||
lvx 28,0,7
|
||||
.long 0x7E002699
|
||||
addi 4,4,16
|
||||
vsldoi 15,14,14,4
|
||||
vadduwm 0,0,15
|
||||
vsel 29,7,6,5
|
||||
vadduwm 7,7,28
|
||||
vadduwm 0,0,29
|
||||
.long 0x13C5FE82
|
||||
vadduwm 0,0,30
|
||||
vxor 29,1,2
|
||||
vsel 29,2,3,29
|
||||
vadduwm 4,4,0
|
||||
.long 0x13C18682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 0,0,30
|
||||
lvx 28,10,7
|
||||
vadduwm 7,7,16
|
||||
vsel 29,6,5,4
|
||||
vadduwm 6,6,28
|
||||
vadduwm 7,7,29
|
||||
.long 0x13C4FE82
|
||||
vadduwm 7,7,30
|
||||
vxor 29,0,1
|
||||
vsel 29,1,2,29
|
||||
vadduwm 3,3,7
|
||||
.long 0x13C08682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 7,7,30
|
||||
lvx 28,26,7
|
||||
vsldoi 17,16,16,4
|
||||
vadduwm 6,6,17
|
||||
vsel 29,5,4,3
|
||||
vadduwm 5,5,28
|
||||
vadduwm 6,6,29
|
||||
.long 0x13C3FE82
|
||||
vadduwm 6,6,30
|
||||
vxor 29,7,0
|
||||
vsel 29,0,1,29
|
||||
vadduwm 2,2,6
|
||||
.long 0x13C78682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 6,6,30
|
||||
lvx 28,27,7
|
||||
vsldoi 18,17,17,4
|
||||
vadduwm 5,5,18
|
||||
vsel 29,4,3,2
|
||||
vadduwm 4,4,28
|
||||
vadduwm 5,5,29
|
||||
.long 0x13C2FE82
|
||||
vadduwm 5,5,30
|
||||
vxor 29,6,7
|
||||
vsel 29,7,0,29
|
||||
vadduwm 1,1,5
|
||||
.long 0x13C68682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 5,5,30
|
||||
lvx 28,28,7
|
||||
.long 0x7F002699
|
||||
addi 4,4,16
|
||||
vsldoi 19,18,18,4
|
||||
vadduwm 4,4,19
|
||||
vsel 29,3,2,1
|
||||
vadduwm 3,3,28
|
||||
vadduwm 4,4,29
|
||||
.long 0x13C1FE82
|
||||
vadduwm 4,4,30
|
||||
vxor 29,5,6
|
||||
vsel 29,6,7,29
|
||||
vadduwm 0,0,4
|
||||
.long 0x13C58682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 4,4,30
|
||||
lvx 28,29,7
|
||||
vadduwm 3,3,24
|
||||
vsel 29,2,1,0
|
||||
vadduwm 2,2,28
|
||||
vadduwm 3,3,29
|
||||
.long 0x13C0FE82
|
||||
vadduwm 3,3,30
|
||||
vxor 29,4,5
|
||||
vsel 29,5,6,29
|
||||
vadduwm 7,7,3
|
||||
.long 0x13C48682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 3,3,30
|
||||
lvx 28,30,7
|
||||
vsldoi 25,24,24,4
|
||||
vadduwm 2,2,25
|
||||
vsel 29,1,0,7
|
||||
vadduwm 1,1,28
|
||||
vadduwm 2,2,29
|
||||
.long 0x13C7FE82
|
||||
vadduwm 2,2,30
|
||||
vxor 29,3,4
|
||||
vsel 29,4,5,29
|
||||
vadduwm 6,6,2
|
||||
.long 0x13C38682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 2,2,30
|
||||
lvx 28,31,7
|
||||
addi 7,7,0x80
|
||||
vsldoi 26,25,25,4
|
||||
vadduwm 1,1,26
|
||||
vsel 29,0,7,6
|
||||
vadduwm 0,0,28
|
||||
vadduwm 1,1,29
|
||||
.long 0x13C6FE82
|
||||
vadduwm 1,1,30
|
||||
vxor 29,2,3
|
||||
vsel 29,3,4,29
|
||||
vadduwm 5,5,1
|
||||
.long 0x13C28682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 1,1,30
|
||||
lvx 28,0,7
|
||||
vsldoi 27,26,26,4
|
||||
.long 0x13C90682
|
||||
vadduwm 8,8,30
|
||||
.long 0x13DA7E82
|
||||
vadduwm 8,8,30
|
||||
vadduwm 8,8,17
|
||||
vadduwm 0,0,27
|
||||
vsel 29,7,6,5
|
||||
vadduwm 7,7,28
|
||||
vadduwm 0,0,29
|
||||
.long 0x13C5FE82
|
||||
vadduwm 0,0,30
|
||||
vxor 29,1,2
|
||||
vsel 29,2,3,29
|
||||
vadduwm 4,4,0
|
||||
.long 0x13C18682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 0,0,30
|
||||
lvx 28,10,7
|
||||
mtctr 0
|
||||
b .L16_xx
|
||||
.align 5
|
||||
.L16_xx:
|
||||
.long 0x13CA0682
|
||||
vadduwm 9,9,30
|
||||
.long 0x13DB7E82
|
||||
vadduwm 9,9,30
|
||||
vadduwm 9,9,18
|
||||
vadduwm 7,7,8
|
||||
vsel 29,6,5,4
|
||||
vadduwm 6,6,28
|
||||
vadduwm 7,7,29
|
||||
.long 0x13C4FE82
|
||||
vadduwm 7,7,30
|
||||
vxor 29,0,1
|
||||
vsel 29,1,2,29
|
||||
vadduwm 3,3,7
|
||||
.long 0x13C08682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 7,7,30
|
||||
lvx 28,26,7
|
||||
.long 0x13CB0682
|
||||
vadduwm 10,10,30
|
||||
.long 0x13C87E82
|
||||
vadduwm 10,10,30
|
||||
vadduwm 10,10,19
|
||||
vadduwm 6,6,9
|
||||
vsel 29,5,4,3
|
||||
vadduwm 5,5,28
|
||||
vadduwm 6,6,29
|
||||
.long 0x13C3FE82
|
||||
vadduwm 6,6,30
|
||||
vxor 29,7,0
|
||||
vsel 29,0,1,29
|
||||
vadduwm 2,2,6
|
||||
.long 0x13C78682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 6,6,30
|
||||
lvx 28,27,7
|
||||
.long 0x13CC0682
|
||||
vadduwm 11,11,30
|
||||
.long 0x13C97E82
|
||||
vadduwm 11,11,30
|
||||
vadduwm 11,11,24
|
||||
vadduwm 5,5,10
|
||||
vsel 29,4,3,2
|
||||
vadduwm 4,4,28
|
||||
vadduwm 5,5,29
|
||||
.long 0x13C2FE82
|
||||
vadduwm 5,5,30
|
||||
vxor 29,6,7
|
||||
vsel 29,7,0,29
|
||||
vadduwm 1,1,5
|
||||
.long 0x13C68682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 5,5,30
|
||||
lvx 28,28,7
|
||||
.long 0x13CD0682
|
||||
vadduwm 12,12,30
|
||||
.long 0x13CA7E82
|
||||
vadduwm 12,12,30
|
||||
vadduwm 12,12,25
|
||||
vadduwm 4,4,11
|
||||
vsel 29,3,2,1
|
||||
vadduwm 3,3,28
|
||||
vadduwm 4,4,29
|
||||
.long 0x13C1FE82
|
||||
vadduwm 4,4,30
|
||||
vxor 29,5,6
|
||||
vsel 29,6,7,29
|
||||
vadduwm 0,0,4
|
||||
.long 0x13C58682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 4,4,30
|
||||
lvx 28,29,7
|
||||
.long 0x13CE0682
|
||||
vadduwm 13,13,30
|
||||
.long 0x13CB7E82
|
||||
vadduwm 13,13,30
|
||||
vadduwm 13,13,26
|
||||
vadduwm 3,3,12
|
||||
vsel 29,2,1,0
|
||||
vadduwm 2,2,28
|
||||
vadduwm 3,3,29
|
||||
.long 0x13C0FE82
|
||||
vadduwm 3,3,30
|
||||
vxor 29,4,5
|
||||
vsel 29,5,6,29
|
||||
vadduwm 7,7,3
|
||||
.long 0x13C48682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 3,3,30
|
||||
lvx 28,30,7
|
||||
.long 0x13CF0682
|
||||
vadduwm 14,14,30
|
||||
.long 0x13CC7E82
|
||||
vadduwm 14,14,30
|
||||
vadduwm 14,14,27
|
||||
vadduwm 2,2,13
|
||||
vsel 29,1,0,7
|
||||
vadduwm 1,1,28
|
||||
vadduwm 2,2,29
|
||||
.long 0x13C7FE82
|
||||
vadduwm 2,2,30
|
||||
vxor 29,3,4
|
||||
vsel 29,4,5,29
|
||||
vadduwm 6,6,2
|
||||
.long 0x13C38682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 2,2,30
|
||||
lvx 28,31,7
|
||||
addi 7,7,0x80
|
||||
.long 0x13D00682
|
||||
vadduwm 15,15,30
|
||||
.long 0x13CD7E82
|
||||
vadduwm 15,15,30
|
||||
vadduwm 15,15,8
|
||||
vadduwm 1,1,14
|
||||
vsel 29,0,7,6
|
||||
vadduwm 0,0,28
|
||||
vadduwm 1,1,29
|
||||
.long 0x13C6FE82
|
||||
vadduwm 1,1,30
|
||||
vxor 29,2,3
|
||||
vsel 29,3,4,29
|
||||
vadduwm 5,5,1
|
||||
.long 0x13C28682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 1,1,30
|
||||
lvx 28,0,7
|
||||
.long 0x13D10682
|
||||
vadduwm 16,16,30
|
||||
.long 0x13CE7E82
|
||||
vadduwm 16,16,30
|
||||
vadduwm 16,16,9
|
||||
vadduwm 0,0,15
|
||||
vsel 29,7,6,5
|
||||
vadduwm 7,7,28
|
||||
vadduwm 0,0,29
|
||||
.long 0x13C5FE82
|
||||
vadduwm 0,0,30
|
||||
vxor 29,1,2
|
||||
vsel 29,2,3,29
|
||||
vadduwm 4,4,0
|
||||
.long 0x13C18682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 0,0,30
|
||||
lvx 28,10,7
|
||||
.long 0x13D20682
|
||||
vadduwm 17,17,30
|
||||
.long 0x13CF7E82
|
||||
vadduwm 17,17,30
|
||||
vadduwm 17,17,10
|
||||
vadduwm 7,7,16
|
||||
vsel 29,6,5,4
|
||||
vadduwm 6,6,28
|
||||
vadduwm 7,7,29
|
||||
.long 0x13C4FE82
|
||||
vadduwm 7,7,30
|
||||
vxor 29,0,1
|
||||
vsel 29,1,2,29
|
||||
vadduwm 3,3,7
|
||||
.long 0x13C08682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 7,7,30
|
||||
lvx 28,26,7
|
||||
.long 0x13D30682
|
||||
vadduwm 18,18,30
|
||||
.long 0x13D07E82
|
||||
vadduwm 18,18,30
|
||||
vadduwm 18,18,11
|
||||
vadduwm 6,6,17
|
||||
vsel 29,5,4,3
|
||||
vadduwm 5,5,28
|
||||
vadduwm 6,6,29
|
||||
.long 0x13C3FE82
|
||||
vadduwm 6,6,30
|
||||
vxor 29,7,0
|
||||
vsel 29,0,1,29
|
||||
vadduwm 2,2,6
|
||||
.long 0x13C78682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 6,6,30
|
||||
lvx 28,27,7
|
||||
.long 0x13D80682
|
||||
vadduwm 19,19,30
|
||||
.long 0x13D17E82
|
||||
vadduwm 19,19,30
|
||||
vadduwm 19,19,12
|
||||
vadduwm 5,5,18
|
||||
vsel 29,4,3,2
|
||||
vadduwm 4,4,28
|
||||
vadduwm 5,5,29
|
||||
.long 0x13C2FE82
|
||||
vadduwm 5,5,30
|
||||
vxor 29,6,7
|
||||
vsel 29,7,0,29
|
||||
vadduwm 1,1,5
|
||||
.long 0x13C68682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 5,5,30
|
||||
lvx 28,28,7
|
||||
.long 0x13D90682
|
||||
vadduwm 24,24,30
|
||||
.long 0x13D27E82
|
||||
vadduwm 24,24,30
|
||||
vadduwm 24,24,13
|
||||
vadduwm 4,4,19
|
||||
vsel 29,3,2,1
|
||||
vadduwm 3,3,28
|
||||
vadduwm 4,4,29
|
||||
.long 0x13C1FE82
|
||||
vadduwm 4,4,30
|
||||
vxor 29,5,6
|
||||
vsel 29,6,7,29
|
||||
vadduwm 0,0,4
|
||||
.long 0x13C58682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 4,4,30
|
||||
lvx 28,29,7
|
||||
.long 0x13DA0682
|
||||
vadduwm 25,25,30
|
||||
.long 0x13D37E82
|
||||
vadduwm 25,25,30
|
||||
vadduwm 25,25,14
|
||||
vadduwm 3,3,24
|
||||
vsel 29,2,1,0
|
||||
vadduwm 2,2,28
|
||||
vadduwm 3,3,29
|
||||
.long 0x13C0FE82
|
||||
vadduwm 3,3,30
|
||||
vxor 29,4,5
|
||||
vsel 29,5,6,29
|
||||
vadduwm 7,7,3
|
||||
.long 0x13C48682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 3,3,30
|
||||
lvx 28,30,7
|
||||
.long 0x13DB0682
|
||||
vadduwm 26,26,30
|
||||
.long 0x13D87E82
|
||||
vadduwm 26,26,30
|
||||
vadduwm 26,26,15
|
||||
vadduwm 2,2,25
|
||||
vsel 29,1,0,7
|
||||
vadduwm 1,1,28
|
||||
vadduwm 2,2,29
|
||||
.long 0x13C7FE82
|
||||
vadduwm 2,2,30
|
||||
vxor 29,3,4
|
||||
vsel 29,4,5,29
|
||||
vadduwm 6,6,2
|
||||
.long 0x13C38682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 2,2,30
|
||||
lvx 28,31,7
|
||||
addi 7,7,0x80
|
||||
.long 0x13C80682
|
||||
vadduwm 27,27,30
|
||||
.long 0x13D97E82
|
||||
vadduwm 27,27,30
|
||||
vadduwm 27,27,16
|
||||
vadduwm 1,1,26
|
||||
vsel 29,0,7,6
|
||||
vadduwm 0,0,28
|
||||
vadduwm 1,1,29
|
||||
.long 0x13C6FE82
|
||||
vadduwm 1,1,30
|
||||
vxor 29,2,3
|
||||
vsel 29,3,4,29
|
||||
vadduwm 5,5,1
|
||||
.long 0x13C28682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 1,1,30
|
||||
lvx 28,0,7
|
||||
.long 0x13C90682
|
||||
vadduwm 8,8,30
|
||||
.long 0x13DA7E82
|
||||
vadduwm 8,8,30
|
||||
vadduwm 8,8,17
|
||||
vadduwm 0,0,27
|
||||
vsel 29,7,6,5
|
||||
vadduwm 7,7,28
|
||||
vadduwm 0,0,29
|
||||
.long 0x13C5FE82
|
||||
vadduwm 0,0,30
|
||||
vxor 29,1,2
|
||||
vsel 29,2,3,29
|
||||
vadduwm 4,4,0
|
||||
.long 0x13C18682
|
||||
vadduwm 30,30,29
|
||||
vadduwm 0,0,30
|
||||
lvx 28,10,7
|
||||
bdnz .L16_xx
|
||||
|
||||
lvx 10,0,11
|
||||
subic. 5,5,1
|
||||
lvx 11,10,11
|
||||
vadduwm 0,0,10
|
||||
lvx 12,26,11
|
||||
vadduwm 1,1,11
|
||||
lvx 13,27,11
|
||||
vadduwm 2,2,12
|
||||
lvx 14,28,11
|
||||
vadduwm 3,3,13
|
||||
lvx 15,29,11
|
||||
vadduwm 4,4,14
|
||||
lvx 16,30,11
|
||||
vadduwm 5,5,15
|
||||
lvx 17,31,11
|
||||
vadduwm 6,6,16
|
||||
vadduwm 7,7,17
|
||||
bne .Loop
|
||||
lvx 8,26,7
|
||||
vperm 0,0,1,28
|
||||
lvx 9,27,7
|
||||
vperm 4,4,5,28
|
||||
vperm 0,0,2,8
|
||||
vperm 4,4,6,8
|
||||
vperm 0,0,3,9
|
||||
vperm 4,4,7,9
|
||||
.long 0x7C001F19
|
||||
.long 0x7C8A1F19
|
||||
addi 11,1,175
|
||||
mtlr 8
|
||||
mtspr 256,12
|
||||
lvx 24,0,11
|
||||
lvx 25,10,11
|
||||
lvx 26,26,11
|
||||
lvx 27,27,11
|
||||
lvx 28,28,11
|
||||
lvx 29,29,11
|
||||
lvx 30,30,11
|
||||
lvx 31,31,11
|
||||
lwz 26,304(1)
|
||||
lwz 27,308(1)
|
||||
lwz 28,312(1)
|
||||
lwz 29,316(1)
|
||||
lwz 30,320(1)
|
||||
lwz 31,324(1)
|
||||
addi 1,1,328
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,4,1,0x80,6,3,0
|
||||
.long 0
|
||||
.size sha256_block_p8,.-sha256_block_p8
|
||||
.align 6
|
||||
.LPICmeup:
|
||||
mflr 0
|
||||
bcl 20,31,$+4
|
||||
mflr 6
|
||||
addi 6,6,56
|
||||
mtlr 0
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
.space 28
|
||||
.long 0x428a2f98,0x428a2f98,0x428a2f98,0x428a2f98
|
||||
.long 0x71374491,0x71374491,0x71374491,0x71374491
|
||||
.long 0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf
|
||||
.long 0xe9b5dba5,0xe9b5dba5,0xe9b5dba5,0xe9b5dba5
|
||||
.long 0x3956c25b,0x3956c25b,0x3956c25b,0x3956c25b
|
||||
.long 0x59f111f1,0x59f111f1,0x59f111f1,0x59f111f1
|
||||
.long 0x923f82a4,0x923f82a4,0x923f82a4,0x923f82a4
|
||||
.long 0xab1c5ed5,0xab1c5ed5,0xab1c5ed5,0xab1c5ed5
|
||||
.long 0xd807aa98,0xd807aa98,0xd807aa98,0xd807aa98
|
||||
.long 0x12835b01,0x12835b01,0x12835b01,0x12835b01
|
||||
.long 0x243185be,0x243185be,0x243185be,0x243185be
|
||||
.long 0x550c7dc3,0x550c7dc3,0x550c7dc3,0x550c7dc3
|
||||
.long 0x72be5d74,0x72be5d74,0x72be5d74,0x72be5d74
|
||||
.long 0x80deb1fe,0x80deb1fe,0x80deb1fe,0x80deb1fe
|
||||
.long 0x9bdc06a7,0x9bdc06a7,0x9bdc06a7,0x9bdc06a7
|
||||
.long 0xc19bf174,0xc19bf174,0xc19bf174,0xc19bf174
|
||||
.long 0xe49b69c1,0xe49b69c1,0xe49b69c1,0xe49b69c1
|
||||
.long 0xefbe4786,0xefbe4786,0xefbe4786,0xefbe4786
|
||||
.long 0x0fc19dc6,0x0fc19dc6,0x0fc19dc6,0x0fc19dc6
|
||||
.long 0x240ca1cc,0x240ca1cc,0x240ca1cc,0x240ca1cc
|
||||
.long 0x2de92c6f,0x2de92c6f,0x2de92c6f,0x2de92c6f
|
||||
.long 0x4a7484aa,0x4a7484aa,0x4a7484aa,0x4a7484aa
|
||||
.long 0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc
|
||||
.long 0x76f988da,0x76f988da,0x76f988da,0x76f988da
|
||||
.long 0x983e5152,0x983e5152,0x983e5152,0x983e5152
|
||||
.long 0xa831c66d,0xa831c66d,0xa831c66d,0xa831c66d
|
||||
.long 0xb00327c8,0xb00327c8,0xb00327c8,0xb00327c8
|
||||
.long 0xbf597fc7,0xbf597fc7,0xbf597fc7,0xbf597fc7
|
||||
.long 0xc6e00bf3,0xc6e00bf3,0xc6e00bf3,0xc6e00bf3
|
||||
.long 0xd5a79147,0xd5a79147,0xd5a79147,0xd5a79147
|
||||
.long 0x06ca6351,0x06ca6351,0x06ca6351,0x06ca6351
|
||||
.long 0x14292967,0x14292967,0x14292967,0x14292967
|
||||
.long 0x27b70a85,0x27b70a85,0x27b70a85,0x27b70a85
|
||||
.long 0x2e1b2138,0x2e1b2138,0x2e1b2138,0x2e1b2138
|
||||
.long 0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc
|
||||
.long 0x53380d13,0x53380d13,0x53380d13,0x53380d13
|
||||
.long 0x650a7354,0x650a7354,0x650a7354,0x650a7354
|
||||
.long 0x766a0abb,0x766a0abb,0x766a0abb,0x766a0abb
|
||||
.long 0x81c2c92e,0x81c2c92e,0x81c2c92e,0x81c2c92e
|
||||
.long 0x92722c85,0x92722c85,0x92722c85,0x92722c85
|
||||
.long 0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1
|
||||
.long 0xa81a664b,0xa81a664b,0xa81a664b,0xa81a664b
|
||||
.long 0xc24b8b70,0xc24b8b70,0xc24b8b70,0xc24b8b70
|
||||
.long 0xc76c51a3,0xc76c51a3,0xc76c51a3,0xc76c51a3
|
||||
.long 0xd192e819,0xd192e819,0xd192e819,0xd192e819
|
||||
.long 0xd6990624,0xd6990624,0xd6990624,0xd6990624
|
||||
.long 0xf40e3585,0xf40e3585,0xf40e3585,0xf40e3585
|
||||
.long 0x106aa070,0x106aa070,0x106aa070,0x106aa070
|
||||
.long 0x19a4c116,0x19a4c116,0x19a4c116,0x19a4c116
|
||||
.long 0x1e376c08,0x1e376c08,0x1e376c08,0x1e376c08
|
||||
.long 0x2748774c,0x2748774c,0x2748774c,0x2748774c
|
||||
.long 0x34b0bcb5,0x34b0bcb5,0x34b0bcb5,0x34b0bcb5
|
||||
.long 0x391c0cb3,0x391c0cb3,0x391c0cb3,0x391c0cb3
|
||||
.long 0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a
|
||||
.long 0x5b9cca4f,0x5b9cca4f,0x5b9cca4f,0x5b9cca4f
|
||||
.long 0x682e6ff3,0x682e6ff3,0x682e6ff3,0x682e6ff3
|
||||
.long 0x748f82ee,0x748f82ee,0x748f82ee,0x748f82ee
|
||||
.long 0x78a5636f,0x78a5636f,0x78a5636f,0x78a5636f
|
||||
.long 0x84c87814,0x84c87814,0x84c87814,0x84c87814
|
||||
.long 0x8cc70208,0x8cc70208,0x8cc70208,0x8cc70208
|
||||
.long 0x90befffa,0x90befffa,0x90befffa,0x90befffa
|
||||
.long 0xa4506ceb,0xa4506ceb,0xa4506ceb,0xa4506ceb
|
||||
.long 0xbef9a3f7,0xbef9a3f7,0xbef9a3f7,0xbef9a3f7
|
||||
.long 0xc67178f2,0xc67178f2,0xc67178f2,0xc67178f2
|
||||
.long 0,0,0,0
|
||||
.long 0x00010203,0x10111213,0x10111213,0x10111213
|
||||
.long 0x00010203,0x04050607,0x10111213,0x10111213
|
||||
.long 0x00010203,0x04050607,0x08090a0b,0x10111213
|
||||
.byte 83,72,65,50,53,54,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user