Dataset initialization with AVX2 (WIP)
This commit is contained in:
parent
6b21a51a2f
commit
515a85e66c
17 changed files with 721 additions and 90 deletions
28
src/crypto/randomx/asm/program_sshash_avx2_constants.inc
Normal file
28
src/crypto/randomx/asm/program_sshash_avx2_constants.inc
Normal file
|
@ -0,0 +1,28 @@
|
|||
r0_avx2_increments:
|
||||
db 2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0
|
||||
mul_hi_avx2_data:
|
||||
db 0,0,0,0,1,0,0,0
|
||||
r0_avx2_mul:
|
||||
;#/ 6364136223846793005
|
||||
db 45, 127, 149, 76, 45, 244, 81, 88
|
||||
r1_avx2_add:
|
||||
;#/ 9298411001130361340
|
||||
db 252, 161, 245, 89, 138, 151, 10, 129
|
||||
r2_avx2_add:
|
||||
;#/ 12065312585734608966
|
||||
db 70, 216, 194, 56, 223, 153, 112, 167
|
||||
r3_avx2_add:
|
||||
;#/ 9306329213124626780
|
||||
db 92, 73, 34, 191, 28, 185, 38, 129
|
||||
r4_avx2_add:
|
||||
;#/ 5281919268842080866
|
||||
db 98, 138, 159, 23, 151, 37, 77, 73
|
||||
r5_avx2_add:
|
||||
;#/ 10536153434571861004
|
||||
db 12, 236, 170, 206, 185, 239, 55, 146
|
||||
r6_avx2_add:
|
||||
;#/ 3398623926847679864
|
||||
db 120, 45, 230, 108, 116, 86, 42, 47
|
||||
r7_avx2_add:
|
||||
;#/ 9549104520008361294
|
||||
db 78, 229, 44, 182, 247, 59, 133, 132
|
31
src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc
Normal file
31
src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc
Normal file
|
@ -0,0 +1,31 @@
|
|||
add rsp, 32
|
||||
pop r9
|
||||
|
||||
movdqu xmm0, xmmword ptr [rsp]
|
||||
movdqu xmm1, xmmword ptr [rsp + 16]
|
||||
movdqu xmm2, xmmword ptr [rsp + 32]
|
||||
movdqu xmm3, xmmword ptr [rsp + 48]
|
||||
movdqu xmm4, xmmword ptr [rsp + 64]
|
||||
movdqu xmm5, xmmword ptr [rsp + 80]
|
||||
movdqu xmm6, xmmword ptr [rsp + 96]
|
||||
movdqu xmm7, xmmword ptr [rsp + 112]
|
||||
movdqu xmm8, xmmword ptr [rsp + 128]
|
||||
movdqu xmm9, xmmword ptr [rsp + 144]
|
||||
movdqu xmm10, xmmword ptr [rsp + 160]
|
||||
movdqu xmm11, xmmword ptr [rsp + 176]
|
||||
movdqu xmm12, xmmword ptr [rsp + 192]
|
||||
movdqu xmm13, xmmword ptr [rsp + 208]
|
||||
movdqu xmm14, xmmword ptr [rsp + 224]
|
||||
movdqu xmm15, xmmword ptr [rsp + 240]
|
||||
vzeroupper
|
||||
add rsp, 256
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
37
src/crypto/randomx/asm/program_sshash_avx2_loop_begin.inc
Normal file
37
src/crypto/randomx/asm/program_sshash_avx2_loop_begin.inc
Normal file
|
@ -0,0 +1,37 @@
|
|||
;# prefetch RandomX dataset lines
|
||||
prefetchnta byte ptr [rsi]
|
||||
prefetchnta byte ptr [rsi+64]
|
||||
prefetchnta byte ptr [rsi+128]
|
||||
prefetchnta byte ptr [rsi+192]
|
||||
prefetchnta byte ptr [rsi+256]
|
||||
|
||||
;# prefetch RandomX cache lines
|
||||
mov rbx, rbp
|
||||
and rbx, RANDOMX_CACHE_MASK
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rax, [rbp+1]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
prefetchnta byte ptr [rax]
|
||||
mov [rsp], rax
|
||||
lea rax, [rbp+2]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
prefetchnta byte ptr [rax]
|
||||
mov [rsp+8], rax
|
||||
lea rax, [rbp+3]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
prefetchnta byte ptr [rax]
|
||||
mov [rsp+16], rax
|
||||
lea rax, [rbp+4]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
prefetchnta byte ptr [rax]
|
||||
mov [rsp+24], rax
|
38
src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc
Normal file
38
src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc
Normal file
|
@ -0,0 +1,38 @@
|
|||
mov qword ptr [rsi+0], r8
|
||||
vpunpcklqdq ymm8, ymm0, ymm1
|
||||
mov qword ptr [rsi+8], r9
|
||||
vpunpcklqdq ymm9, ymm2, ymm3
|
||||
mov qword ptr [rsi+16], r10
|
||||
vpunpcklqdq ymm10, ymm4, ymm5
|
||||
mov qword ptr [rsi+24], r11
|
||||
vpunpcklqdq ymm11, ymm6, ymm7
|
||||
mov qword ptr [rsi+32], r12
|
||||
vpunpckhqdq ymm12, ymm0, ymm1
|
||||
mov qword ptr [rsi+40], r13
|
||||
vpunpckhqdq ymm13, ymm2, ymm3
|
||||
mov qword ptr [rsi+48], r14
|
||||
vpunpckhqdq ymm14, ymm4, ymm5
|
||||
mov qword ptr [rsi+56], r15
|
||||
vpunpckhqdq ymm15, ymm6, ymm7
|
||||
|
||||
vperm2i128 ymm0, ymm8, ymm9, 32
|
||||
vperm2i128 ymm1, ymm10, ymm11, 32
|
||||
vmovdqu ymmword ptr [rsi+64], ymm0
|
||||
vmovdqu ymmword ptr [rsi+96], ymm1
|
||||
vperm2i128 ymm2, ymm12, ymm13, 32
|
||||
vperm2i128 ymm3, ymm14, ymm15, 32
|
||||
vmovdqu ymmword ptr [rsi+128], ymm2
|
||||
vmovdqu ymmword ptr [rsi+160], ymm3
|
||||
vperm2i128 ymm4, ymm8, ymm9, 49
|
||||
vperm2i128 ymm5, ymm10, ymm11, 49
|
||||
vmovdqu ymmword ptr [rsi+192], ymm4
|
||||
vmovdqu ymmword ptr [rsi+224], ymm5
|
||||
vperm2i128 ymm6, ymm12, ymm13, 49
|
||||
vperm2i128 ymm7, ymm14, ymm15, 49
|
||||
vmovdqu ymmword ptr [rsi+256], ymm6
|
||||
vmovdqu ymmword ptr [rsi+288], ymm7
|
||||
|
||||
add rbp, 5
|
||||
add rsi, 320
|
||||
cmp rbp, qword ptr [rsp+32]
|
||||
db 15, 130, 0, 0, 0, 0 ;# jb rel32
|
|
@ -0,0 +1,27 @@
|
|||
push rbx
|
||||
push rbp
|
||||
push rdi
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
;# save all XMM registers just to be safe for all calling conventions
|
||||
sub rsp, 256
|
||||
movdqu xmmword ptr [rsp], xmm0
|
||||
movdqu xmmword ptr [rsp + 16], xmm1
|
||||
movdqu xmmword ptr [rsp + 32], xmm2
|
||||
movdqu xmmword ptr [rsp + 48], xmm3
|
||||
movdqu xmmword ptr [rsp + 64], xmm4
|
||||
movdqu xmmword ptr [rsp + 80], xmm5
|
||||
movdqu xmmword ptr [rsp + 96], xmm6
|
||||
movdqu xmmword ptr [rsp + 112], xmm7
|
||||
movdqu xmmword ptr [rsp + 128], xmm8
|
||||
movdqu xmmword ptr [rsp + 144], xmm9
|
||||
movdqu xmmword ptr [rsp + 160], xmm10
|
||||
movdqu xmmword ptr [rsp + 176], xmm11
|
||||
movdqu xmmword ptr [rsp + 192], xmm12
|
||||
movdqu xmmword ptr [rsp + 208], xmm13
|
||||
movdqu xmmword ptr [rsp + 224], xmm14
|
||||
movdqu xmmword ptr [rsp + 240], xmm15
|
50
src/crypto/randomx/asm/program_sshash_avx2_ssh_load.inc
Normal file
50
src/crypto/randomx/asm/program_sshash_avx2_ssh_load.inc
Normal file
|
@ -0,0 +1,50 @@
|
|||
sub rsp, 40
|
||||
mov [rsp], rbx
|
||||
vmovdqu ymmword ptr [rsp+8], ymm14
|
||||
|
||||
mov rax, [rsp+40]
|
||||
mov rbx, [rsp+48]
|
||||
mov rcx, [rsp+56]
|
||||
mov rdx, [rsp+64]
|
||||
|
||||
vmovdqu ymm8, ymmword ptr [rax] ;# ymm8 = r0[1], r1[1], r2[1], r3[1]
|
||||
vmovdqu ymm9, ymmword ptr [rbx] ;# ymm9 = r0[2], r1[2], r2[2], r3[2]
|
||||
vmovdqu ymm10, ymmword ptr [rcx] ;# ymm10 = r0[3], r1[3], r2[3], r3[3]
|
||||
vmovdqu ymm11, ymmword ptr [rdx] ;# ymm11 = r0[4], r1[4], r2[4], r3[4]
|
||||
|
||||
vpunpcklqdq ymm12, ymm8, ymm9 ;# ymm12 = r0[1], r0[2], r2[1], r2[2]
|
||||
vpunpcklqdq ymm13, ymm10, ymm11 ;# ymm13 = r0[3], r0[4], r2[3], r2[4]
|
||||
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r0[1], r0[2], r0[3], r0[4]
|
||||
vpxor ymm0, ymm0, ymm14
|
||||
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r2[1], r2[2], r2[3], r2[4]
|
||||
vpxor ymm2, ymm2, ymm14
|
||||
|
||||
vpunpckhqdq ymm12, ymm8, ymm9 ;# ymm12 = r1[1], r1[2], r3[1], r3[2]
|
||||
vpunpckhqdq ymm13, ymm10, ymm11 ;# ymm13 = r1[3], r1[4], r3[3], r3[4]
|
||||
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r1[1], r1[2], r1[3], r1[4]
|
||||
vpxor ymm1, ymm1, ymm14
|
||||
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r3[1], r3[2], r3[3], r3[4]
|
||||
vpxor ymm3, ymm3, ymm14
|
||||
|
||||
vmovdqu ymm8, ymmword ptr [rax+32] ;# ymm8 = r4[1], r5[1], r6[1], r7[1]
|
||||
vmovdqu ymm9, ymmword ptr [rbx+32] ;# ymm9 = r4[2], r5[2], r6[2], r7[2]
|
||||
vmovdqu ymm10, ymmword ptr [rcx+32] ;# ymm10 = r4[3], r5[3], r6[3], r7[3]
|
||||
vmovdqu ymm11, ymmword ptr [rdx+32] ;# ymm11 = r4[4], r5[4], r6[4], r7[4]
|
||||
|
||||
vpunpcklqdq ymm12, ymm8, ymm9 ;# ymm12 = r4[1], r4[2], r6[1], r6[2]
|
||||
vpunpcklqdq ymm13, ymm10, ymm11 ;# ymm13 = r4[3], r4[4], r6[3], r6[4]
|
||||
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r4[1], r4[2], r4[3], r4[4]
|
||||
vpxor ymm4, ymm4, ymm14
|
||||
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r6[1], r6[2], r6[3], r6[4]
|
||||
vpxor ymm6, ymm6, ymm14
|
||||
|
||||
vpunpckhqdq ymm12, ymm8, ymm9 ;# ymm12 = r5[1], r5[2], r7[1], r7[2]
|
||||
vpunpckhqdq ymm13, ymm10, ymm11 ;# ymm13 = r5[3], r5[4], r7[3], r7[4]
|
||||
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r5[1], r5[2], r5[3], r5[4]
|
||||
vpxor ymm5, ymm5, ymm14
|
||||
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r7[1], r7[2], r7[3], r7[4]
|
||||
vpxor ymm7, ymm7, ymm14
|
||||
|
||||
mov rbx, [rsp]
|
||||
vmovdqu ymm14, ymmword ptr [rsp+8]
|
||||
add rsp, 40
|
29
src/crypto/randomx/asm/program_sshash_avx2_ssh_prefetch.inc
Normal file
29
src/crypto/randomx/asm/program_sshash_avx2_ssh_prefetch.inc
Normal file
|
@ -0,0 +1,29 @@
|
|||
vmovdqu ymmword ptr [rsp], ymm0
|
||||
|
||||
mov rax, [rsp]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
mov [rsp], rax
|
||||
prefetchnta byte ptr [rax]
|
||||
|
||||
mov rax, [rsp+8]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
mov [rsp+8], rax
|
||||
prefetchnta byte ptr [rax]
|
||||
|
||||
mov rax, [rsp+16]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
mov [rsp+16], rax
|
||||
prefetchnta byte ptr [rax]
|
||||
|
||||
mov rax, [rsp+24]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
mov [rsp+24], rax
|
||||
prefetchnta byte ptr [rax]
|
Loading…
Add table
Add a link
Reference in a new issue