Compiler fix
This commit is contained in:
parent
efb322df66
commit
e87d5111a2
19 changed files with 1401 additions and 279 deletions
132
src/crypto/cn/asm/cn1/cnv1_double_main_loop.inc
Normal file
132
src/crypto/cn/asm/cn1/cnv1_double_main_loop.inc
Normal file
|
@ -0,0 +1,132 @@
|
|||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 32
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov r8, QWORD PTR [rcx+8]
|
||||
mov r12d, 524288
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
mov rbx, QWORD PTR [rdx+32]
|
||||
xor rbx, QWORD PTR [rdx]
|
||||
mov rsi, QWORD PTR [rdx+40]
|
||||
mov r10, rbx
|
||||
xor rsi, QWORD PTR [rdx+8]
|
||||
and r10d, 2097136
|
||||
mov rdi, QWORD PTR [r8+32]
|
||||
xor rdi, QWORD PTR [r8]
|
||||
movq xmm3, rbx
|
||||
mov rbp, QWORD PTR [r8+40]
|
||||
mov r9, rdi
|
||||
xor rbp, QWORD PTR [r8+8]
|
||||
movq xmm0, rsi
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
and r9d, 2097136
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movq xmm4, rdi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov r14, QWORD PTR [rdx+224]
|
||||
mov r13, QWORD PTR [rdx+232]
|
||||
mov r15, QWORD PTR [r8+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rbp
|
||||
movq xmm5, rax
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
movq xmm0, rcx
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov rcx, QWORD PTR [r8+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
movq xmm6, rax
|
||||
punpcklqdq xmm5, xmm0
|
||||
mov rax, QWORD PTR [rdx+240]
|
||||
movq xmm0, rcx
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
punpcklqdq xmm6, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_double:
|
||||
aesenc xmm1, xmm3
|
||||
aesenc xmm2, xmm4
|
||||
movdqa xmm0, xmm1
|
||||
movq r11, xmm2
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm1
|
||||
movq QWORD PTR [r14+r10], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
movdqa xmm0, xmm2
|
||||
shr rax, 24
|
||||
pxor xmm0, xmm6
|
||||
movdqa xmm6, xmm2
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r14+r10+8], rax
|
||||
movq QWORD PTR [r15+r9], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
shr rax, 24
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
movq rcx, xmm1
|
||||
mov QWORD PTR [r15+r9+8], rax
|
||||
mov r9, rcx
|
||||
and r9d, 2097136
|
||||
mov r10, QWORD PTR [r14+r9]
|
||||
mov r8, QWORD PTR [r14+r9+8]
|
||||
mov rax, r10
|
||||
mul rcx
|
||||
add rsi, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [r14+r9], rbx
|
||||
xor rax, rsi
|
||||
mov QWORD PTR [r14+r9+8], rax
|
||||
xor rsi, r8
|
||||
xor rbx, r10
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
mov r10, rbx
|
||||
and r10d, 2097136
|
||||
movq xmm3, rbx
|
||||
pinsrq xmm3, rsi, 1
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov rcx, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
mul r11
|
||||
add rbp, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r15+r8], rdi
|
||||
xor rax, rbp
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
mov r9, rdi
|
||||
xor rbp, rcx
|
||||
and r9d, 2097136
|
||||
movq xmm4, rdi
|
||||
pinsrq xmm4, rbp, 1
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
sub r12, 1
|
||||
jne main_loop_cnv1_double
|
||||
|
||||
mov rbx, QWORD PTR [rsp+80]
|
||||
mov rbp, QWORD PTR [rsp+88]
|
||||
mov rsi, QWORD PTR [rsp+96]
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
add rsp, 32
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
263
src/crypto/cn/asm/cn1/cnv1_quad_main_loop.inc
Normal file
263
src/crypto/cn/asm/cn1/cnv1_quad_main_loop.inc
Normal file
|
@ -0,0 +1,263 @@
|
|||
mov rax, rsp
|
||||
mov QWORD PTR [rax+8], rbx
|
||||
mov QWORD PTR [rax+16], rbp
|
||||
mov QWORD PTR [rax+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 144
|
||||
mov r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+8]
|
||||
mov r10, QWORD PTR [rcx+16]
|
||||
mov r11, QWORD PTR [rcx+24]
|
||||
mov rbp, QWORD PTR [r8+224]
|
||||
mov r13, QWORD PTR [r8+232]
|
||||
mov r14, QWORD PTR [r9+224]
|
||||
mov r15, QWORD PTR [r10+224]
|
||||
mov r12, QWORD PTR [r11+224]
|
||||
mov rcx, QWORD PTR [r8+40]
|
||||
xor rcx, QWORD PTR [r8+8]
|
||||
mov rbx, QWORD PTR [r8+32]
|
||||
xor rbx, QWORD PTR [r8]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+40]
|
||||
xor rcx, QWORD PTR [r9+8]
|
||||
movq xmm1, rbx
|
||||
movaps XMMWORD PTR [rax-56], xmm6
|
||||
movaps XMMWORD PTR [rax-72], xmm7
|
||||
movaps XMMWORD PTR [rax-88], xmm8
|
||||
movaps XMMWORD PTR [rax-104], xmm9
|
||||
movaps XMMWORD PTR [rax-120], xmm10
|
||||
movaps XMMWORD PTR [rsp+48], xmm11
|
||||
movaps XMMWORD PTR [rsp+32], xmm12
|
||||
and ebx, 2097136
|
||||
mov rsi, QWORD PTR [r10+32]
|
||||
movq xmm2, rdi
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
and edi, 2097136
|
||||
xor rsi, QWORD PTR [r10]
|
||||
mov rdx, QWORD PTR [r8+56]
|
||||
xor rdx, QWORD PTR [r8+24]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r9+240]
|
||||
movq xmm3, rsi
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
and esi, 2097136
|
||||
mov rax, QWORD PTR [r10+240]
|
||||
punpcklqdq xmm1, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+40]
|
||||
xor rcx, QWORD PTR [r10+8]
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
mov rax, QWORD PTR [r11+240]
|
||||
punpcklqdq xmm2, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp+24], rax
|
||||
mov rcx, QWORD PTR [r11+40]
|
||||
xor rcx, QWORD PTR [r11+8]
|
||||
mov rax, QWORD PTR [r11+32]
|
||||
xor rax, QWORD PTR [r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+48]
|
||||
xor rcx, QWORD PTR [r8+16]
|
||||
movq xmm4, rax
|
||||
and eax, 2097136
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rdx
|
||||
mov rdx, QWORD PTR [r9+56]
|
||||
xor rdx, QWORD PTR [r9+24]
|
||||
movq xmm5, rcx
|
||||
mov rcx, QWORD PTR [r9+48]
|
||||
xor rcx, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm0, rdx
|
||||
mov rdx, QWORD PTR [r10+56]
|
||||
xor rdx, QWORD PTR [r10+24]
|
||||
movq xmm6, rcx
|
||||
mov rcx, QWORD PTR [r10+48]
|
||||
xor rcx, QWORD PTR [r10+16]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movq xmm0, rdx
|
||||
mov rdx, QWORD PTR [r11+56]
|
||||
movq xmm7, rcx
|
||||
punpcklqdq xmm7, xmm0
|
||||
xor rdx, QWORD PTR [r11+24]
|
||||
mov rcx, QWORD PTR [r11+48]
|
||||
xor rcx, QWORD PTR [r11+16]
|
||||
mov r11d, 524288
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
movq xmm0, rdx
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
movq xmm8, rcx
|
||||
punpcklqdq xmm8, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_quad:
|
||||
aesenc xmm9, xmm1
|
||||
aesenc xmm10, xmm2
|
||||
aesenc xmm11, xmm3
|
||||
aesenc xmm12, xmm4
|
||||
movd ecx, xmm9
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+rbp]
|
||||
movd ecx, xmm10
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r14]
|
||||
movd ecx, xmm11
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r15]
|
||||
movd ecx, xmm12
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r12]
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm9
|
||||
movq QWORD PTR [rbp+rbx], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm10
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm6
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rbp+rbx+8], rcx
|
||||
movq rbx, xmm1
|
||||
movq QWORD PTR [r14+rdi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm11
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm7
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r14+rdi+8], rcx
|
||||
movq rdi, xmm2
|
||||
movq QWORD PTR [r15+rsi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm12
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm8
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r15+rsi+8], rcx
|
||||
movq QWORD PTR [r12+rax], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
shr rcx, 24
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r12+rax+8], rcx
|
||||
movq rcx, xmm9
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
mov r9, QWORD PTR [rbp+r8]
|
||||
mov r10, QWORD PTR [rbp+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm1, 1
|
||||
add rcx, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [rbp+r8], rbx
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [rbp+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rbx, r9
|
||||
movq xmm1, rbx
|
||||
and ebx, 2097136
|
||||
pinsrq xmm1, rcx, 1
|
||||
movq rcx, xmm10
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
mov r9, QWORD PTR [r14+r8]
|
||||
mov r10, QWORD PTR [r14+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm2, 1
|
||||
add rcx, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r14+r8], rdi
|
||||
xor rax, rcx
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r14+r8+8], rax
|
||||
xor rcx, r10
|
||||
movq xmm2, rdi
|
||||
and edi, 2097136
|
||||
pinsrq xmm2, rcx, 1
|
||||
movq rcx, xmm11
|
||||
movq rsi, xmm3
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqa xmm6, xmm10
|
||||
movdqa xmm7, xmm11
|
||||
movdqa xmm8, xmm12
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov r10, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm3, 1
|
||||
add rcx, rax
|
||||
add rsi, rdx
|
||||
mov rax, QWORD PTR [rsp+16]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r15+r8], rsi
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rsi, r9
|
||||
movq xmm3, rsi
|
||||
and esi, 2097136
|
||||
pinsrq xmm3, rcx, 1
|
||||
movq rcx, xmm12
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
mov r9, QWORD PTR [r12+r8]
|
||||
mov r10, QWORD PTR [r12+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
mov rcx, rax
|
||||
movq rax, xmm4
|
||||
add rax, rdx
|
||||
mov QWORD PTR [r12+r8], rax
|
||||
xor rax, r9
|
||||
pextrq rdx, xmm4, 1
|
||||
add rdx, rcx
|
||||
mov rcx, QWORD PTR [rsp+24]
|
||||
xor rcx, rdx
|
||||
xor rdx, r10
|
||||
movq xmm4, rax
|
||||
mov QWORD PTR [r12+r8+8], rcx
|
||||
and eax, 2097136
|
||||
pinsrq xmm4, rdx, 1
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
sub r11, 1
|
||||
jne main_loop_cnv1_quad
|
||||
|
||||
movaps xmm7, XMMWORD PTR [rsp+112]
|
||||
lea r11, QWORD PTR [rsp+144]
|
||||
mov rbx, QWORD PTR [r11+48]
|
||||
mov rbp, QWORD PTR [r11+56]
|
||||
mov rsi, QWORD PTR [r11+64]
|
||||
movaps xmm6, XMMWORD PTR [r11-16]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm9, XMMWORD PTR [r11-64]
|
||||
movaps xmm10, XMMWORD PTR [r11-80]
|
||||
movaps xmm11, XMMWORD PTR [r11-96]
|
||||
movaps xmm12, XMMWORD PTR [r11-112]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
66
src/crypto/cn/asm/cn1/cnv1_single_main_loop.inc
Normal file
66
src/crypto/cn/asm/cn1/cnv1_single_main_loop.inc
Normal file
|
@ -0,0 +1,66 @@
|
|||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov esi, 524288
|
||||
mov r11, QWORD PTR [rdx+32]
|
||||
xor r11, QWORD PTR [rdx]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
mov rbx, QWORD PTR [rdx+40]
|
||||
xor rbx, QWORD PTR [rdx+8]
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov rbp, QWORD PTR [rdx+240]
|
||||
mov r14, QWORD PTR [rdx+232]
|
||||
movq xmm2, rax
|
||||
pinsrq xmm2, rcx, 1
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_single:
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
movdqu xmm1, XMMWORD PTR [rdi+r8]
|
||||
movq xmm0, r11
|
||||
pinsrq xmm0, rbx, 1
|
||||
aesenc xmm1, xmm0
|
||||
movq r15, xmm1
|
||||
mov r9, r15
|
||||
and r9d, 2097136
|
||||
movdqa xmm0, xmm1
|
||||
pxor xmm0, xmm2
|
||||
movdqa xmm2, xmm1
|
||||
movq QWORD PTR [rdi+r8], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov eax, edx
|
||||
shr rax, 24
|
||||
mov ecx, DWORD PTR [r14+rax*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rdi+r8+8], rcx
|
||||
mov r10, QWORD PTR [rdi+r9]
|
||||
mov r8, QWORD PTR [rdi+r9+8]
|
||||
mov rax, r10
|
||||
mul r15
|
||||
add rbx, rax
|
||||
add r11, rdx
|
||||
mov QWORD PTR [rdi+r9], r11
|
||||
mov rax, rbx
|
||||
xor rbx, r8
|
||||
xor r11, r10
|
||||
xor rax, rbp
|
||||
mov QWORD PTR [rdi+r9+8], rax
|
||||
sub rsi, 1
|
||||
jne main_loop_cnv1_single
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
mov rbx, QWORD PTR [rsp+8]
|
||||
mov rbp, QWORD PTR [rsp+16]
|
||||
mov rsi, QWORD PTR [rsp+24]
|
||||
mov rdi, QWORD PTR [rsp+32]
|
|
@ -11,6 +11,9 @@
|
|||
# define FN_PREFIX(fn) fn
|
||||
.section .text
|
||||
#endif
|
||||
.global FN_PREFIX(cnv1_single_mainloop_asm)
|
||||
.global FN_PREFIX(cnv1_double_mainloop_asm)
|
||||
.global FN_PREFIX(cnv1_quad_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||
|
@ -19,6 +22,33 @@
|
|||
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_single_mainloop_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn1/cnv1_single_main_loop.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_double_mainloop_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn1/cnv1_double_main_loop.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_quad_mainloop_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn1/cnv1_quad_main_loop.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||
sub rsp, 48
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||
PUBLIC cnv1_single_mainloop_asm
|
||||
PUBLIC cnv1_double_mainloop_asm
|
||||
PUBLIC cnv1_quad_mainloop_asm
|
||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||
PUBLIC cnv2_mainloop_ryzen_asm
|
||||
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||
|
@ -6,6 +9,27 @@ PUBLIC cnv2_double_mainloop_sandybridge_asm
|
|||
PUBLIC cnv2_rwz_mainloop_asm
|
||||
PUBLIC cnv2_rwz_double_mainloop_asm
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_single_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_single_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_double_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_double_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_double_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_quad_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_quad_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_quad_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
|
||||
|
|
132
src/crypto/cn/asm/win64/cn1/cnv1_double_main_loop.inc
Normal file
132
src/crypto/cn/asm/win64/cn1/cnv1_double_main_loop.inc
Normal file
|
@ -0,0 +1,132 @@
|
|||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 32
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov r8, QWORD PTR [rcx+8]
|
||||
mov r12d, 524288
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
mov rbx, QWORD PTR [rdx+32]
|
||||
xor rbx, QWORD PTR [rdx]
|
||||
mov rsi, QWORD PTR [rdx+40]
|
||||
mov r10, rbx
|
||||
xor rsi, QWORD PTR [rdx+8]
|
||||
and r10d, 2097136
|
||||
mov rdi, QWORD PTR [r8+32]
|
||||
xor rdi, QWORD PTR [r8]
|
||||
movd xmm3, rbx
|
||||
mov rbp, QWORD PTR [r8+40]
|
||||
mov r9, rdi
|
||||
xor rbp, QWORD PTR [r8+8]
|
||||
movd xmm0, rsi
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
and r9d, 2097136
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movd xmm4, rdi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov r14, QWORD PTR [rdx+224]
|
||||
mov r13, QWORD PTR [rdx+232]
|
||||
mov r15, QWORD PTR [r8+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movd xmm0, rbp
|
||||
movd xmm5, rax
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
movd xmm0, rcx
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov rcx, QWORD PTR [r8+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
movd xmm6, rax
|
||||
punpcklqdq xmm5, xmm0
|
||||
mov rax, QWORD PTR [rdx+240]
|
||||
movd xmm0, rcx
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
punpcklqdq xmm6, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_double:
|
||||
aesenc xmm1, xmm3
|
||||
aesenc xmm2, xmm4
|
||||
movdqa xmm0, xmm1
|
||||
movd r11, xmm2
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm1
|
||||
movd QWORD PTR [r14+r10], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
movdqa xmm0, xmm2
|
||||
shr rax, 24
|
||||
pxor xmm0, xmm6
|
||||
movdqa xmm6, xmm2
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r14+r10+8], rax
|
||||
movd QWORD PTR [r15+r9], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
shr rax, 24
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
movd rcx, xmm1
|
||||
mov QWORD PTR [r15+r9+8], rax
|
||||
mov r9, rcx
|
||||
and r9d, 2097136
|
||||
mov r10, QWORD PTR [r14+r9]
|
||||
mov r8, QWORD PTR [r14+r9+8]
|
||||
mov rax, r10
|
||||
mul rcx
|
||||
add rsi, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [r14+r9], rbx
|
||||
xor rax, rsi
|
||||
mov QWORD PTR [r14+r9+8], rax
|
||||
xor rsi, r8
|
||||
xor rbx, r10
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
mov r10, rbx
|
||||
and r10d, 2097136
|
||||
movd xmm3, rbx
|
||||
pinsrq xmm3, rsi, 1
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov rcx, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
mul r11
|
||||
add rbp, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r15+r8], rdi
|
||||
xor rax, rbp
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
mov r9, rdi
|
||||
xor rbp, rcx
|
||||
and r9d, 2097136
|
||||
movd xmm4, rdi
|
||||
pinsrq xmm4, rbp, 1
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
sub r12, 1
|
||||
jne main_loop_cnv1_double
|
||||
|
||||
mov rbx, QWORD PTR [rsp+80]
|
||||
mov rbp, QWORD PTR [rsp+88]
|
||||
mov rsi, QWORD PTR [rsp+96]
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
add rsp, 32
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
263
src/crypto/cn/asm/win64/cn1/cnv1_quad_main_loop.inc
Normal file
263
src/crypto/cn/asm/win64/cn1/cnv1_quad_main_loop.inc
Normal file
|
@ -0,0 +1,263 @@
|
|||
mov rax, rsp
|
||||
mov QWORD PTR [rax+8], rbx
|
||||
mov QWORD PTR [rax+16], rbp
|
||||
mov QWORD PTR [rax+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 144
|
||||
mov r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+8]
|
||||
mov r10, QWORD PTR [rcx+16]
|
||||
mov r11, QWORD PTR [rcx+24]
|
||||
mov rbp, QWORD PTR [r8+224]
|
||||
mov r13, QWORD PTR [r8+232]
|
||||
mov r14, QWORD PTR [r9+224]
|
||||
mov r15, QWORD PTR [r10+224]
|
||||
mov r12, QWORD PTR [r11+224]
|
||||
mov rcx, QWORD PTR [r8+40]
|
||||
xor rcx, QWORD PTR [r8+8]
|
||||
mov rbx, QWORD PTR [r8+32]
|
||||
xor rbx, QWORD PTR [r8]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+40]
|
||||
xor rcx, QWORD PTR [r9+8]
|
||||
movd xmm1, rbx
|
||||
movaps XMMWORD PTR [rax-56], xmm6
|
||||
movaps XMMWORD PTR [rax-72], xmm7
|
||||
movaps XMMWORD PTR [rax-88], xmm8
|
||||
movaps XMMWORD PTR [rax-104], xmm9
|
||||
movaps XMMWORD PTR [rax-120], xmm10
|
||||
movaps XMMWORD PTR [rsp+48], xmm11
|
||||
movaps XMMWORD PTR [rsp+32], xmm12
|
||||
and ebx, 2097136
|
||||
mov rsi, QWORD PTR [r10+32]
|
||||
movd xmm2, rdi
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
and edi, 2097136
|
||||
xor rsi, QWORD PTR [r10]
|
||||
mov rdx, QWORD PTR [r8+56]
|
||||
xor rdx, QWORD PTR [r8+24]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r9+240]
|
||||
movd xmm3, rsi
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
and esi, 2097136
|
||||
mov rax, QWORD PTR [r10+240]
|
||||
punpcklqdq xmm1, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+40]
|
||||
xor rcx, QWORD PTR [r10+8]
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
mov rax, QWORD PTR [r11+240]
|
||||
punpcklqdq xmm2, xmm0
|
||||
movd xmm0, rcx
|
||||
mov QWORD PTR [rsp+24], rax
|
||||
mov rcx, QWORD PTR [r11+40]
|
||||
xor rcx, QWORD PTR [r11+8]
|
||||
mov rax, QWORD PTR [r11+32]
|
||||
xor rax, QWORD PTR [r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+48]
|
||||
xor rcx, QWORD PTR [r8+16]
|
||||
movd xmm4, rax
|
||||
and eax, 2097136
|
||||
punpcklqdq xmm4, xmm0
|
||||
movd xmm0, rdx
|
||||
mov rdx, QWORD PTR [r9+56]
|
||||
xor rdx, QWORD PTR [r9+24]
|
||||
movd xmm5, rcx
|
||||
mov rcx, QWORD PTR [r9+48]
|
||||
xor rcx, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm0, rdx
|
||||
mov rdx, QWORD PTR [r10+56]
|
||||
xor rdx, QWORD PTR [r10+24]
|
||||
movd xmm6, rcx
|
||||
mov rcx, QWORD PTR [r10+48]
|
||||
xor rcx, QWORD PTR [r10+16]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movd xmm0, rdx
|
||||
mov rdx, QWORD PTR [r11+56]
|
||||
movd xmm7, rcx
|
||||
punpcklqdq xmm7, xmm0
|
||||
xor rdx, QWORD PTR [r11+24]
|
||||
mov rcx, QWORD PTR [r11+48]
|
||||
xor rcx, QWORD PTR [r11+16]
|
||||
mov r11d, 524288
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
movd xmm0, rdx
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
movd xmm8, rcx
|
||||
punpcklqdq xmm8, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_quad:
|
||||
aesenc xmm9, xmm1
|
||||
aesenc xmm10, xmm2
|
||||
aesenc xmm11, xmm3
|
||||
aesenc xmm12, xmm4
|
||||
movd ecx, xmm9
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+rbp]
|
||||
movd ecx, xmm10
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r14]
|
||||
movd ecx, xmm11
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r15]
|
||||
movd ecx, xmm12
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r12]
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm9
|
||||
movd QWORD PTR [rbp+rbx], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm10
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm6
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rbp+rbx+8], rcx
|
||||
movd rbx, xmm1
|
||||
movd QWORD PTR [r14+rdi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm11
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm7
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r14+rdi+8], rcx
|
||||
movd rdi, xmm2
|
||||
movd QWORD PTR [r15+rsi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm12
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm8
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r15+rsi+8], rcx
|
||||
movd QWORD PTR [r12+rax], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
shr rcx, 24
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r12+rax+8], rcx
|
||||
movd rcx, xmm9
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
mov r9, QWORD PTR [rbp+r8]
|
||||
mov r10, QWORD PTR [rbp+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm1, 1
|
||||
add rcx, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [rbp+r8], rbx
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [rbp+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rbx, r9
|
||||
movd xmm1, rbx
|
||||
and ebx, 2097136
|
||||
pinsrq xmm1, rcx, 1
|
||||
movd rcx, xmm10
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
mov r9, QWORD PTR [r14+r8]
|
||||
mov r10, QWORD PTR [r14+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm2, 1
|
||||
add rcx, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r14+r8], rdi
|
||||
xor rax, rcx
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r14+r8+8], rax
|
||||
xor rcx, r10
|
||||
movd xmm2, rdi
|
||||
and edi, 2097136
|
||||
pinsrq xmm2, rcx, 1
|
||||
movd rcx, xmm11
|
||||
movd rsi, xmm3
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqa xmm6, xmm10
|
||||
movdqa xmm7, xmm11
|
||||
movdqa xmm8, xmm12
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov r10, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm3, 1
|
||||
add rcx, rax
|
||||
add rsi, rdx
|
||||
mov rax, QWORD PTR [rsp+16]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r15+r8], rsi
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rsi, r9
|
||||
movd xmm3, rsi
|
||||
and esi, 2097136
|
||||
pinsrq xmm3, rcx, 1
|
||||
movd rcx, xmm12
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
mov r9, QWORD PTR [r12+r8]
|
||||
mov r10, QWORD PTR [r12+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
mov rcx, rax
|
||||
movd rax, xmm4
|
||||
add rax, rdx
|
||||
mov QWORD PTR [r12+r8], rax
|
||||
xor rax, r9
|
||||
pextrq rdx, xmm4, 1
|
||||
add rdx, rcx
|
||||
mov rcx, QWORD PTR [rsp+24]
|
||||
xor rcx, rdx
|
||||
xor rdx, r10
|
||||
movd xmm4, rax
|
||||
mov QWORD PTR [r12+r8+8], rcx
|
||||
and eax, 2097136
|
||||
pinsrq xmm4, rdx, 1
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
sub r11, 1
|
||||
jne main_loop_cnv1_quad
|
||||
|
||||
movaps xmm7, XMMWORD PTR [rsp+112]
|
||||
lea r11, QWORD PTR [rsp+144]
|
||||
mov rbx, QWORD PTR [r11+48]
|
||||
mov rbp, QWORD PTR [r11+56]
|
||||
mov rsi, QWORD PTR [r11+64]
|
||||
movaps xmm6, XMMWORD PTR [r11-16]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm9, XMMWORD PTR [r11-64]
|
||||
movaps xmm10, XMMWORD PTR [r11-80]
|
||||
movaps xmm11, XMMWORD PTR [r11-96]
|
||||
movaps xmm12, XMMWORD PTR [r11-112]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
66
src/crypto/cn/asm/win64/cn1/cnv1_single_main_loop.inc
Normal file
66
src/crypto/cn/asm/win64/cn1/cnv1_single_main_loop.inc
Normal file
|
@ -0,0 +1,66 @@
|
|||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov esi, 524288
|
||||
mov r11, QWORD PTR [rdx+32]
|
||||
xor r11, QWORD PTR [rdx]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
mov rbx, QWORD PTR [rdx+40]
|
||||
xor rbx, QWORD PTR [rdx+8]
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov rbp, QWORD PTR [rdx+240]
|
||||
mov r14, QWORD PTR [rdx+232]
|
||||
movd xmm2, rax
|
||||
pinsrq xmm2, rcx, 1
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_single:
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
movdqu xmm1, XMMWORD PTR [rdi+r8]
|
||||
movd xmm0, r11
|
||||
pinsrq xmm0, rbx, 1
|
||||
aesenc xmm1, xmm0
|
||||
movd r15, xmm1
|
||||
mov r9, r15
|
||||
and r9d, 2097136
|
||||
movdqa xmm0, xmm1
|
||||
pxor xmm0, xmm2
|
||||
movdqa xmm2, xmm1
|
||||
movd QWORD PTR [rdi+r8], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov eax, edx
|
||||
shr rax, 24
|
||||
mov ecx, DWORD PTR [r14+rax*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rdi+r8+8], rcx
|
||||
mov r10, QWORD PTR [rdi+r9]
|
||||
mov r8, QWORD PTR [rdi+r9+8]
|
||||
mov rax, r10
|
||||
mul r15
|
||||
add rbx, rax
|
||||
add r11, rdx
|
||||
mov QWORD PTR [rdi+r9], r11
|
||||
mov rax, rbx
|
||||
xor rbx, r8
|
||||
xor r11, r10
|
||||
xor rax, rbp
|
||||
mov QWORD PTR [rdi+r9+8], rax
|
||||
sub rsi, 1
|
||||
jne main_loop_cnv1_single
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
mov rbx, QWORD PTR [rsp+8]
|
||||
mov rbp, QWORD PTR [rsp+16]
|
||||
mov rsi, QWORD PTR [rsp+24]
|
||||
mov rdi, QWORD PTR [rsp+32]
|
|
@ -1,6 +1,9 @@
|
|||
#define ALIGN(x) .align 64
|
||||
.intel_syntax noprefix
|
||||
.section .text
|
||||
.global cnv1_single_mainloop_asm
|
||||
.global cnv1_double_mainloop_asm
|
||||
.global cnv1_quad_mainloop_asm
|
||||
.global cnv2_mainloop_ivybridge_asm
|
||||
.global cnv2_mainloop_ryzen_asm
|
||||
.global cnv2_mainloop_bulldozer_asm
|
||||
|
@ -9,6 +12,24 @@
|
|||
.global cnv2_rwz_double_mainloop_asm
|
||||
.global cnv2_upx_double_mainloop_zen3_asm
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm:
|
||||
#include "../cn1/cnv1_single_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_double_mainloop_asm:
|
||||
#include "../cn1/cnv1_double_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_quad_mainloop_asm:
|
||||
#include "../cn1/cnv1_quad_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm:
|
||||
#include "../cn2/cnv2_main_loop_ivybridge.inc"
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||
PUBLIC cnv1_single_mainloop_asm
|
||||
PUBLIC cnv1_double_mainloop_asm
|
||||
PUBLIC cnv1_quad_mainloop_asm
|
||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||
PUBLIC cnv2_mainloop_ryzen_asm
|
||||
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||
|
@ -6,28 +9,49 @@ PUBLIC cnv2_double_mainloop_sandybridge_asm
|
|||
PUBLIC cnv2_rwz_mainloop_asm
|
||||
PUBLIC cnv2_rwz_double_mainloop_asm
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_single_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_single_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_double_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_double_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_double_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_quad_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_quad_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_quad_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_mainloop_ivybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ryzen_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_ryzen.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_mainloop_ryzen_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_bulldozer_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_bulldozer.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_mainloop_bulldozer_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv2_double_mainloop_sandybridge_asm PROC
|
||||
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
|
||||
ret 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue