Revert "RandomX: rewrote dataset read code"
This reverts commit ff82ca57f2
.
This commit is contained in:
parent
8a1ca690eb
commit
442353b31e
10 changed files with 69 additions and 16 deletions
|
@ -22,7 +22,6 @@
|
||||||
mov rsi, rdx ;# uint8_t* scratchpad
|
mov rsi, rdx ;# uint8_t* scratchpad
|
||||||
|
|
||||||
mov rax, rbp
|
mov rax, rbp
|
||||||
ror rbp, 32
|
|
||||||
|
|
||||||
;# zero integer registers
|
;# zero integer registers
|
||||||
xor r8, r8
|
xor r8, r8
|
||||||
|
|
|
@ -35,7 +35,6 @@
|
||||||
mov rbx, r9 ;# loop counter
|
mov rbx, r9 ;# loop counter
|
||||||
|
|
||||||
mov rax, rbp
|
mov rax, rbp
|
||||||
ror rbp, 32
|
|
||||||
|
|
||||||
;# zero integer registers
|
;# zero integer registers
|
||||||
xor r8, r8
|
xor r8, r8
|
||||||
|
|
|
@ -1,16 +1,17 @@
|
||||||
mov ecx, ebp ;# ecx = ma
|
|
||||||
and ecx, RANDOMX_DATASET_BASE_MASK
|
|
||||||
xor r8, qword ptr [rdi+rcx]
|
|
||||||
ror rbp, 32 ;# swap "ma" and "mx"
|
|
||||||
xor rbp, rax ;# modify "mx"
|
xor rbp, rax ;# modify "mx"
|
||||||
mov edx, ebp ;# edx = mx
|
mov edx, ebp ;# edx = mx
|
||||||
and edx, RANDOMX_DATASET_BASE_MASK
|
and edx, RANDOMX_DATASET_BASE_MASK
|
||||||
prefetchnta byte ptr [rdi+rdx]
|
prefetchnta byte ptr [rdi+rdx]
|
||||||
xor r9, qword ptr [rdi+rcx+8]
|
ror rbp, 32 ;# swap "ma" and "mx"
|
||||||
xor r10, qword ptr [rdi+rcx+16]
|
mov edx, ebp ;# edx = ma
|
||||||
xor r11, qword ptr [rdi+rcx+24]
|
and edx, RANDOMX_DATASET_BASE_MASK
|
||||||
xor r12, qword ptr [rdi+rcx+32]
|
lea rcx, [rdi+rdx] ;# dataset cache line
|
||||||
xor r13, qword ptr [rdi+rcx+40]
|
xor r8, qword ptr [rcx+0]
|
||||||
xor r14, qword ptr [rdi+rcx+48]
|
xor r9, qword ptr [rcx+8]
|
||||||
xor r15, qword ptr [rdi+rcx+56]
|
xor r10, qword ptr [rcx+16]
|
||||||
|
xor r11, qword ptr [rcx+24]
|
||||||
|
xor r12, qword ptr [rcx+32]
|
||||||
|
xor r13, qword ptr [rcx+40]
|
||||||
|
xor r14, qword ptr [rcx+48]
|
||||||
|
xor r15, qword ptr [rcx+56]
|
||||||
|
|
17
src/crypto/randomx/asm/program_read_dataset_ryzen.inc
Normal file
17
src/crypto/randomx/asm/program_read_dataset_ryzen.inc
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
mov rcx, rbp ;# ecx = ma
|
||||||
|
shr rcx, 32
|
||||||
|
and ecx, RANDOMX_DATASET_BASE_MASK
|
||||||
|
xor r8, qword ptr [rdi+rcx]
|
||||||
|
xor rbp, rax ;# modify "mx"
|
||||||
|
mov edx, ebp ;# edx = mx
|
||||||
|
and edx, RANDOMX_DATASET_BASE_MASK
|
||||||
|
prefetchnta byte ptr [rdi+rdx]
|
||||||
|
ror rbp, 32 ;# swap "ma" and "mx"
|
||||||
|
xor r9, qword ptr [rdi+rcx+8]
|
||||||
|
xor r10, qword ptr [rdi+rcx+16]
|
||||||
|
xor r11, qword ptr [rdi+rcx+24]
|
||||||
|
xor r12, qword ptr [rdi+rcx+32]
|
||||||
|
xor r13, qword ptr [rdi+rcx+40]
|
||||||
|
xor r14, qword ptr [rdi+rcx+48]
|
||||||
|
xor r15, qword ptr [rdi+rcx+56]
|
||||||
|
|
|
@ -115,7 +115,6 @@ namespace randomx {
|
||||||
#define codeLoopLoad ADDR(randomx_program_loop_load)
|
#define codeLoopLoad ADDR(randomx_program_loop_load)
|
||||||
#define codeLoopLoadXOP ADDR(randomx_program_loop_load_xop)
|
#define codeLoopLoadXOP ADDR(randomx_program_loop_load_xop)
|
||||||
#define codeProgamStart ADDR(randomx_program_start)
|
#define codeProgamStart ADDR(randomx_program_start)
|
||||||
#define codeReadDataset ADDR(randomx_program_read_dataset)
|
|
||||||
#define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init)
|
#define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init)
|
||||||
#define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin)
|
#define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin)
|
||||||
#define codeDatasetInit ADDR(randomx_dataset_init)
|
#define codeDatasetInit ADDR(randomx_dataset_init)
|
||||||
|
@ -136,7 +135,6 @@ namespace randomx {
|
||||||
#define prologueSize (codeLoopBegin - codePrologue)
|
#define prologueSize (codeLoopBegin - codePrologue)
|
||||||
#define loopLoadSize (codeLoopLoadXOP - codeLoopLoad)
|
#define loopLoadSize (codeLoopLoadXOP - codeLoopLoad)
|
||||||
#define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP)
|
#define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP)
|
||||||
#define readDatasetSize (codeReadDatasetLightSshInit - codeReadDataset)
|
|
||||||
#define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit)
|
#define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit)
|
||||||
#define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin)
|
#define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin)
|
||||||
#define loopStoreSize (codeLoopEnd - codeLoopStore)
|
#define loopStoreSize (codeLoopEnd - codeLoopStore)
|
||||||
|
@ -320,7 +318,20 @@ namespace randomx {
|
||||||
vm_flags = flags;
|
vm_flags = flags;
|
||||||
|
|
||||||
generateProgramPrologue(prog, pcfg);
|
generateProgramPrologue(prog, pcfg);
|
||||||
emit(codeReadDataset, readDatasetSize, code, codePos);
|
|
||||||
|
uint8_t* p;
|
||||||
|
uint32_t n;
|
||||||
|
if (flags & RANDOMX_FLAG_AMD) {
|
||||||
|
p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked;
|
||||||
|
n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
p = RandomX_CurrentConfig.codeReadDatasetTweaked;
|
||||||
|
n = RandomX_CurrentConfig.codeReadDatasetTweakedSize;
|
||||||
|
}
|
||||||
|
memcpy(code + codePos, p, n);
|
||||||
|
codePos += n;
|
||||||
|
|
||||||
generateProgramEpilogue(prog, pcfg);
|
generateProgramEpilogue(prog, pcfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,7 @@
|
||||||
.global DECL(randomx_program_loop_load_xop)
|
.global DECL(randomx_program_loop_load_xop)
|
||||||
.global DECL(randomx_program_start)
|
.global DECL(randomx_program_start)
|
||||||
.global DECL(randomx_program_read_dataset)
|
.global DECL(randomx_program_read_dataset)
|
||||||
|
.global DECL(randomx_program_read_dataset_ryzen)
|
||||||
.global DECL(randomx_program_read_dataset_sshash_init)
|
.global DECL(randomx_program_read_dataset_sshash_init)
|
||||||
.global DECL(randomx_program_read_dataset_sshash_fin)
|
.global DECL(randomx_program_read_dataset_sshash_fin)
|
||||||
.global DECL(randomx_program_loop_store)
|
.global DECL(randomx_program_loop_store)
|
||||||
|
@ -139,6 +140,9 @@ DECL(randomx_program_start):
|
||||||
DECL(randomx_program_read_dataset):
|
DECL(randomx_program_read_dataset):
|
||||||
#include "asm/program_read_dataset.inc"
|
#include "asm/program_read_dataset.inc"
|
||||||
|
|
||||||
|
DECL(randomx_program_read_dataset_ryzen):
|
||||||
|
#include "asm/program_read_dataset_ryzen.inc"
|
||||||
|
|
||||||
DECL(randomx_program_read_dataset_sshash_init):
|
DECL(randomx_program_read_dataset_sshash_init):
|
||||||
#include "asm/program_read_dataset_sshash_init.inc"
|
#include "asm/program_read_dataset_sshash_init.inc"
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,7 @@ PUBLIC randomx_program_loop_load
|
||||||
PUBLIC randomx_program_loop_load_xop
|
PUBLIC randomx_program_loop_load_xop
|
||||||
PUBLIC randomx_program_start
|
PUBLIC randomx_program_start
|
||||||
PUBLIC randomx_program_read_dataset
|
PUBLIC randomx_program_read_dataset
|
||||||
|
PUBLIC randomx_program_read_dataset_ryzen
|
||||||
PUBLIC randomx_program_read_dataset_sshash_init
|
PUBLIC randomx_program_read_dataset_sshash_init
|
||||||
PUBLIC randomx_program_read_dataset_sshash_fin
|
PUBLIC randomx_program_read_dataset_sshash_fin
|
||||||
PUBLIC randomx_dataset_init
|
PUBLIC randomx_dataset_init
|
||||||
|
@ -135,6 +136,10 @@ randomx_program_read_dataset PROC
|
||||||
include asm/program_read_dataset.inc
|
include asm/program_read_dataset.inc
|
||||||
randomx_program_read_dataset ENDP
|
randomx_program_read_dataset ENDP
|
||||||
|
|
||||||
|
randomx_program_read_dataset_ryzen PROC
|
||||||
|
include asm/program_read_dataset_ryzen.inc
|
||||||
|
randomx_program_read_dataset_ryzen ENDP
|
||||||
|
|
||||||
randomx_program_read_dataset_sshash_init PROC
|
randomx_program_read_dataset_sshash_init PROC
|
||||||
include asm/program_read_dataset_sshash_init.inc
|
include asm/program_read_dataset_sshash_init.inc
|
||||||
randomx_program_read_dataset_sshash_init ENDP
|
randomx_program_read_dataset_sshash_init ENDP
|
||||||
|
|
|
@ -40,6 +40,7 @@ extern "C" {
|
||||||
void randomx_program_loop_load_xop();
|
void randomx_program_loop_load_xop();
|
||||||
void randomx_program_start();
|
void randomx_program_start();
|
||||||
void randomx_program_read_dataset();
|
void randomx_program_read_dataset();
|
||||||
|
void randomx_program_read_dataset_ryzen();
|
||||||
void randomx_program_read_dataset_sshash_init();
|
void randomx_program_read_dataset_sshash_init();
|
||||||
void randomx_program_read_dataset_sshash_fin();
|
void randomx_program_read_dataset_sshash_fin();
|
||||||
void randomx_program_loop_store();
|
void randomx_program_loop_store();
|
||||||
|
|
|
@ -191,6 +191,18 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
||||||
const uint8_t* b = addr(randomx_sshash_end);
|
const uint8_t* b = addr(randomx_sshash_end);
|
||||||
memcpy(codeShhPrefetchTweaked, a, b - a);
|
memcpy(codeShhPrefetchTweaked, a, b - a);
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
const uint8_t* a = addr(randomx_program_read_dataset);
|
||||||
|
const uint8_t* b = addr(randomx_program_read_dataset_ryzen);
|
||||||
|
memcpy(codeReadDatasetTweaked, a, b - a);
|
||||||
|
codeReadDatasetTweakedSize = b - a;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
const uint8_t* a = addr(randomx_program_read_dataset_ryzen);
|
||||||
|
const uint8_t* b = addr(randomx_program_read_dataset_sshash_init);
|
||||||
|
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
|
||||||
|
codeReadDatasetRyzenTweakedSize = b - a;
|
||||||
|
}
|
||||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
if (xmrig::Cpu::info()->hasBMI2()) {
|
||||||
const uint8_t* a = addr(randomx_prefetch_scratchpad_bmi2);
|
const uint8_t* a = addr(randomx_prefetch_scratchpad_bmi2);
|
||||||
const uint8_t* b = addr(randomx_prefetch_scratchpad_end);
|
const uint8_t* b = addr(randomx_prefetch_scratchpad_end);
|
||||||
|
|
|
@ -126,6 +126,10 @@ struct RandomX_ConfigurationBase
|
||||||
rx_vec_i128 fillAes4Rx4_Key[8];
|
rx_vec_i128 fillAes4Rx4_Key[8];
|
||||||
|
|
||||||
uint8_t codeShhPrefetchTweaked[20];
|
uint8_t codeShhPrefetchTweaked[20];
|
||||||
|
uint8_t codeReadDatasetTweaked[64];
|
||||||
|
uint32_t codeReadDatasetTweakedSize;
|
||||||
|
uint8_t codeReadDatasetRyzenTweaked[72];
|
||||||
|
uint32_t codeReadDatasetRyzenTweakedSize;
|
||||||
uint8_t codePrefetchScratchpadTweaked[28];
|
uint8_t codePrefetchScratchpadTweaked[28];
|
||||||
uint32_t codePrefetchScratchpadTweakedSize;
|
uint32_t codePrefetchScratchpadTweakedSize;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue