Optimizations for AMD Bulldozer

- Added support for XOP instructions
- Enabled Ryzen code for Bulldozer because it's faster there too
This commit is contained in:
SChernykh 2020-01-15 13:04:26 +01:00
parent 665e43fecc
commit f80177cbd3
9 changed files with 69 additions and 10 deletions

View file

@ -0,0 +1,24 @@
lea rcx, [rsi+rax]
mov [rsp+8], rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
lea rcx, [rsi+rdx]
mov [rsp+16], rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
cvtdq2pd xmm3, qword ptr [rcx+24]
cvtdq2pd xmm4, qword ptr [rcx+32]
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
vpcmov xmm4, xmm4, xmm14, xmm13
vpcmov xmm5, xmm5, xmm14, xmm13
vpcmov xmm6, xmm6, xmm14, xmm13
vpcmov xmm7, xmm7, xmm14, xmm13

View file

@ -1,5 +1,5 @@
mantissaMask:
db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
db 0, 0, 192, 255, 255, 255, 255, 0, 0, 0, 192, 255, 255, 255, 255, 0
exp240:
db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
scaleMask: