Initial ASM wrapper.
This commit is contained in:
parent
f163aad38c
commit
ba65a34a01
7 changed files with 58 additions and 29 deletions
|
@ -48,10 +48,10 @@
|
|||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm5, xmm0
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
|
||||
ALIGN 16
|
||||
main_loop_ivybridge:
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
|
@ -63,28 +63,29 @@ main_loop_ivybridge:
|
|||
movq xmm7, r8
|
||||
punpcklqdq xmm7, xmm0
|
||||
aesenc xmm6, xmm7
|
||||
movq rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movq rcx, xmm3
|
||||
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||
mov rax, rcx
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
mov r10, r9
|
||||
xor r10d, 32
|
||||
movq rcx, xmm3
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
xor rdi, rax
|
||||
movq rbp, xmm6
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
mov r10, rbp
|
||||
and r10d, 2097136
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
xor rdi, QWORD PTR [r10+rbx]
|
||||
lea r14, QWORD PTR [r10+rbx]
|
||||
xor r10d, 32
|
||||
xor rdi, QWORD PTR [r9+rbx]
|
||||
lea r14, QWORD PTR [r9+rbx]
|
||||
mov r12, QWORD PTR [r14+8]
|
||||
xor edx, edx
|
||||
lea r9d, DWORD PTR [ecx+ecx]
|
||||
|
@ -117,8 +118,15 @@ sqrt_fixup_ivybridge_ret:
|
|||
mul rbp
|
||||
movq xmm2, rdx
|
||||
xor rdx, [rcx+rbx]
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
movq xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
punpcklqdq xmm2, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
|
@ -135,13 +143,8 @@ sqrt_fixup_ivybridge_ret:
|
|||
movdqa xmm4, xmm6
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov r10, r8
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
and r10d, 2097136
|
||||
movdqu xmm6, [rdi+rbx]
|
||||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne main_loop_ivybridge
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue