From 442353b31e826b2734b66ad4557ffd0a0e17c2a4 Mon Sep 17 00:00:00 2001 From: MoneroOcean Date: Mon, 31 May 2021 16:01:50 +0000 Subject: [PATCH] Revert "RandomX: rewrote dataset read code" This reverts commit ff82ca57f25e8cbb833adb4aae2204db43bc1ead. --- .../randomx/asm/program_prologue_linux.inc | 1 - .../randomx/asm/program_prologue_win64.inc | 1 - .../randomx/asm/program_read_dataset.inc | 23 ++++++++++--------- .../asm/program_read_dataset_ryzen.inc | 17 ++++++++++++++ src/crypto/randomx/jit_compiler_x86.cpp | 17 +++++++++++--- src/crypto/randomx/jit_compiler_x86_static.S | 4 ++++ .../randomx/jit_compiler_x86_static.asm | 5 ++++ .../randomx/jit_compiler_x86_static.hpp | 1 + src/crypto/randomx/randomx.cpp | 12 ++++++++++ src/crypto/randomx/randomx.h | 4 ++++ 10 files changed, 69 insertions(+), 16 deletions(-) create mode 100644 src/crypto/randomx/asm/program_read_dataset_ryzen.inc diff --git a/src/crypto/randomx/asm/program_prologue_linux.inc b/src/crypto/randomx/asm/program_prologue_linux.inc index fcd09fd3..4e1685de 100644 --- a/src/crypto/randomx/asm/program_prologue_linux.inc +++ b/src/crypto/randomx/asm/program_prologue_linux.inc @@ -22,7 +22,6 @@ mov rsi, rdx ;# uint8_t* scratchpad mov rax, rbp - ror rbp, 32 ;# zero integer registers xor r8, r8 diff --git a/src/crypto/randomx/asm/program_prologue_win64.inc b/src/crypto/randomx/asm/program_prologue_win64.inc index d70e0491..a9386265 100644 --- a/src/crypto/randomx/asm/program_prologue_win64.inc +++ b/src/crypto/randomx/asm/program_prologue_win64.inc @@ -35,7 +35,6 @@ mov rbx, r9 ;# loop counter mov rax, rbp - ror rbp, 32 ;# zero integer registers xor r8, r8 diff --git a/src/crypto/randomx/asm/program_read_dataset.inc b/src/crypto/randomx/asm/program_read_dataset.inc index 9c61092f..b81d0c32 100644 --- a/src/crypto/randomx/asm/program_read_dataset.inc +++ b/src/crypto/randomx/asm/program_read_dataset.inc @@ -1,16 +1,17 @@ - mov ecx, ebp ;# ecx = ma - and ecx, RANDOMX_DATASET_BASE_MASK - xor r8, qword ptr [rdi+rcx] - ror rbp, 32 ;# swap "ma" and "mx" xor rbp, rax ;# modify "mx" mov edx, ebp ;# edx = mx and edx, RANDOMX_DATASET_BASE_MASK prefetchnta byte ptr [rdi+rdx] - xor r9, qword ptr [rdi+rcx+8] - xor r10, qword ptr [rdi+rcx+16] - xor r11, qword ptr [rdi+rcx+24] - xor r12, qword ptr [rdi+rcx+32] - xor r13, qword ptr [rdi+rcx+40] - xor r14, qword ptr [rdi+rcx+48] - xor r15, qword ptr [rdi+rcx+56] + ror rbp, 32 ;# swap "ma" and "mx" + mov edx, ebp ;# edx = ma + and edx, RANDOMX_DATASET_BASE_MASK + lea rcx, [rdi+rdx] ;# dataset cache line + xor r8, qword ptr [rcx+0] + xor r9, qword ptr [rcx+8] + xor r10, qword ptr [rcx+16] + xor r11, qword ptr [rcx+24] + xor r12, qword ptr [rcx+32] + xor r13, qword ptr [rcx+40] + xor r14, qword ptr [rcx+48] + xor r15, qword ptr [rcx+56] \ No newline at end of file diff --git a/src/crypto/randomx/asm/program_read_dataset_ryzen.inc b/src/crypto/randomx/asm/program_read_dataset_ryzen.inc new file mode 100644 index 00000000..9a3aec3d --- /dev/null +++ b/src/crypto/randomx/asm/program_read_dataset_ryzen.inc @@ -0,0 +1,17 @@ + mov rcx, rbp ;# ecx = ma + shr rcx, 32 + and ecx, RANDOMX_DATASET_BASE_MASK + xor r8, qword ptr [rdi+rcx] + xor rbp, rax ;# modify "mx" + mov edx, ebp ;# edx = mx + and edx, RANDOMX_DATASET_BASE_MASK + prefetchnta byte ptr [rdi+rdx] + ror rbp, 32 ;# swap "ma" and "mx" + xor r9, qword ptr [rdi+rcx+8] + xor r10, qword ptr [rdi+rcx+16] + xor r11, qword ptr [rdi+rcx+24] + xor r12, qword ptr [rdi+rcx+32] + xor r13, qword ptr [rdi+rcx+40] + xor r14, qword ptr [rdi+rcx+48] + xor r15, qword ptr [rdi+rcx+56] + \ No newline at end of file diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index f4d3a542..825a526e 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -115,7 +115,6 @@ namespace randomx { #define codeLoopLoad ADDR(randomx_program_loop_load) #define codeLoopLoadXOP ADDR(randomx_program_loop_load_xop) #define codeProgamStart ADDR(randomx_program_start) - #define codeReadDataset ADDR(randomx_program_read_dataset) #define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init) #define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin) #define codeDatasetInit ADDR(randomx_dataset_init) @@ -136,7 +135,6 @@ namespace randomx { #define prologueSize (codeLoopBegin - codePrologue) #define loopLoadSize (codeLoopLoadXOP - codeLoopLoad) #define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP) - #define readDatasetSize (codeReadDatasetLightSshInit - codeReadDataset) #define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit) #define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin) #define loopStoreSize (codeLoopEnd - codeLoopStore) @@ -320,7 +318,20 @@ namespace randomx { vm_flags = flags; generateProgramPrologue(prog, pcfg); - emit(codeReadDataset, readDatasetSize, code, codePos); + + uint8_t* p; + uint32_t n; + if (flags & RANDOMX_FLAG_AMD) { + p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked; + n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize; + } + else { + p = RandomX_CurrentConfig.codeReadDatasetTweaked; + n = RandomX_CurrentConfig.codeReadDatasetTweakedSize; + } + memcpy(code + codePos, p, n); + codePos += n; + generateProgramEpilogue(prog, pcfg); } diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index c55db6c0..c7b31e5a 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -48,6 +48,7 @@ .global DECL(randomx_program_loop_load_xop) .global DECL(randomx_program_start) .global DECL(randomx_program_read_dataset) +.global DECL(randomx_program_read_dataset_ryzen) .global DECL(randomx_program_read_dataset_sshash_init) .global DECL(randomx_program_read_dataset_sshash_fin) .global DECL(randomx_program_loop_store) @@ -139,6 +140,9 @@ DECL(randomx_program_start): DECL(randomx_program_read_dataset): #include "asm/program_read_dataset.inc" +DECL(randomx_program_read_dataset_ryzen): + #include "asm/program_read_dataset_ryzen.inc" + DECL(randomx_program_read_dataset_sshash_init): #include "asm/program_read_dataset_sshash_init.inc" diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index a5edc149..e7d6cbb3 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -39,6 +39,7 @@ PUBLIC randomx_program_loop_load PUBLIC randomx_program_loop_load_xop PUBLIC randomx_program_start PUBLIC randomx_program_read_dataset +PUBLIC randomx_program_read_dataset_ryzen PUBLIC randomx_program_read_dataset_sshash_init PUBLIC randomx_program_read_dataset_sshash_fin PUBLIC randomx_dataset_init @@ -135,6 +136,10 @@ randomx_program_read_dataset PROC include asm/program_read_dataset.inc randomx_program_read_dataset ENDP +randomx_program_read_dataset_ryzen PROC + include asm/program_read_dataset_ryzen.inc +randomx_program_read_dataset_ryzen ENDP + randomx_program_read_dataset_sshash_init PROC include asm/program_read_dataset_sshash_init.inc randomx_program_read_dataset_sshash_init ENDP diff --git a/src/crypto/randomx/jit_compiler_x86_static.hpp b/src/crypto/randomx/jit_compiler_x86_static.hpp index 372a69f1..d4f79d21 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.hpp +++ b/src/crypto/randomx/jit_compiler_x86_static.hpp @@ -40,6 +40,7 @@ extern "C" { void randomx_program_loop_load_xop(); void randomx_program_start(); void randomx_program_read_dataset(); + void randomx_program_read_dataset_ryzen(); void randomx_program_read_dataset_sshash_init(); void randomx_program_read_dataset_sshash_fin(); void randomx_program_loop_store(); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index d605d74c..73395319 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -191,6 +191,18 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() const uint8_t* b = addr(randomx_sshash_end); memcpy(codeShhPrefetchTweaked, a, b - a); } + { + const uint8_t* a = addr(randomx_program_read_dataset); + const uint8_t* b = addr(randomx_program_read_dataset_ryzen); + memcpy(codeReadDatasetTweaked, a, b - a); + codeReadDatasetTweakedSize = b - a; + } + { + const uint8_t* a = addr(randomx_program_read_dataset_ryzen); + const uint8_t* b = addr(randomx_program_read_dataset_sshash_init); + memcpy(codeReadDatasetRyzenTweaked, a, b - a); + codeReadDatasetRyzenTweakedSize = b - a; + } if (xmrig::Cpu::info()->hasBMI2()) { const uint8_t* a = addr(randomx_prefetch_scratchpad_bmi2); const uint8_t* b = addr(randomx_prefetch_scratchpad_end); diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index b06e068c..8b3a51df 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -126,6 +126,10 @@ struct RandomX_ConfigurationBase rx_vec_i128 fillAes4Rx4_Key[8]; uint8_t codeShhPrefetchTweaked[20]; + uint8_t codeReadDatasetTweaked[64]; + uint32_t codeReadDatasetTweakedSize; + uint8_t codeReadDatasetRyzenTweaked[72]; + uint32_t codeReadDatasetRyzenTweakedSize; uint8_t codePrefetchScratchpadTweaked[28]; uint32_t codePrefetchScratchpadTweakedSize;