Merge xmrig v6.12.2 into master
This commit is contained in:
commit
28ad107de8
28 changed files with 987 additions and 506 deletions
|
@ -22,6 +22,7 @@
|
|||
mov rsi, rdx ;# uint8_t* scratchpad
|
||||
|
||||
mov rax, rbp
|
||||
ror rbp, 32
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
mov rbx, r9 ;# loop counter
|
||||
|
||||
mov rax, rbp
|
||||
ror rbp, 32
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
mov ecx, ebp ;# ecx = ma
|
||||
and ecx, RANDOMX_DATASET_BASE_MASK
|
||||
xor r8, qword ptr [rdi+rcx]
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
xor rbp, rax ;# modify "mx"
|
||||
mov edx, ebp ;# edx = mx
|
||||
and edx, RANDOMX_DATASET_BASE_MASK
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov edx, ebp ;# edx = ma
|
||||
and edx, RANDOMX_DATASET_BASE_MASK
|
||||
lea rcx, [rdi+rdx] ;# dataset cache line
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
xor r9, qword ptr [rdi+rcx+8]
|
||||
xor r10, qword ptr [rdi+rcx+16]
|
||||
xor r11, qword ptr [rdi+rcx+24]
|
||||
xor r12, qword ptr [rdi+rcx+32]
|
||||
xor r13, qword ptr [rdi+rcx+40]
|
||||
xor r14, qword ptr [rdi+rcx+48]
|
||||
xor r15, qword ptr [rdi+rcx+56]
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
mov rcx, rbp ;# ecx = ma
|
||||
shr rcx, 32
|
||||
and ecx, RANDOMX_DATASET_BASE_MASK
|
||||
xor r8, qword ptr [rdi+rcx]
|
||||
xor rbp, rax ;# modify "mx"
|
||||
mov edx, ebp ;# edx = mx
|
||||
and edx, RANDOMX_DATASET_BASE_MASK
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
xor r9, qword ptr [rdi+rcx+8]
|
||||
xor r10, qword ptr [rdi+rcx+16]
|
||||
xor r11, qword ptr [rdi+rcx+24]
|
||||
xor r12, qword ptr [rdi+rcx+32]
|
||||
xor r13, qword ptr [rdi+rcx+40]
|
||||
xor r14, qword ptr [rdi+rcx+48]
|
||||
xor r15, qword ptr [rdi+rcx+56]
|
||||
|
|
@ -7,4 +7,4 @@
|
|||
xor r13, qword ptr [rsp+16]
|
||||
xor r14, qword ptr [rsp+8]
|
||||
xor r15, qword ptr [rsp+0]
|
||||
add rsp, 72
|
||||
add rsp, 200
|
|
@ -1,4 +1,4 @@
|
|||
sub rsp, 72
|
||||
sub rsp, 200
|
||||
mov qword ptr [rsp+64], rbx
|
||||
mov qword ptr [rsp+56], r8
|
||||
mov qword ptr [rsp+48], r9
|
||||
|
@ -8,10 +8,10 @@
|
|||
mov qword ptr [rsp+16], r13
|
||||
mov qword ptr [rsp+8], r14
|
||||
mov qword ptr [rsp+0], r15
|
||||
xor rbp, rax ;# modify "mx"
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov ebx, ebp ;# ecx = ma
|
||||
and ebx, RANDOMX_DATASET_BASE_MASK
|
||||
shr ebx, 6 ;# ebx = Dataset block number
|
||||
xor rbp, rax ;# modify "mx"
|
||||
mov rbx, rbp ;# ebx = ma
|
||||
shr rbx, 38
|
||||
and ebx, RANDOMX_DATASET_BASE_MASK / 64 ;# ebx = Dataset block number
|
||||
;# add ebx, datasetOffset / 64
|
||||
;# call 32768
|
|
@ -304,6 +304,9 @@ literal_v14: .fill 2,8,0
|
|||
literal_v15: .fill 2,8,0
|
||||
|
||||
DECL(randomx_program_aarch64_vm_instructions_end):
|
||||
# Calculate dataset pointer for dataset read
|
||||
# Do it here to break false dependency from readReg2 and readReg3 (see next line)
|
||||
lsr x10, x9, 32
|
||||
|
||||
# mx ^= r[readReg2] ^ r[readReg3];
|
||||
eor x9, x9, x18
|
||||
|
@ -321,8 +324,6 @@ DECL(randomx_program_aarch64_cacheline_align_mask1):
|
|||
# mx <-> ma
|
||||
ror x9, x9, 32
|
||||
|
||||
# Calculate dataset pointer for dataset read
|
||||
mov w10, w9
|
||||
DECL(randomx_program_aarch64_cacheline_align_mask2):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and x10, x10, 1
|
||||
|
|
|
@ -110,13 +110,12 @@ namespace randomx {
|
|||
#define ADDR(x) ((uint8_t*)&x)
|
||||
# endif
|
||||
|
||||
#define codePrefetchScratchpad ADDR(randomx_prefetch_scratchpad)
|
||||
#define codePrefetchScratchpadEnd ADDR(randomx_prefetch_scratchpad_end)
|
||||
#define codePrologue ADDR(randomx_program_prologue)
|
||||
#define codeLoopBegin ADDR(randomx_program_loop_begin)
|
||||
#define codeLoopLoad ADDR(randomx_program_loop_load)
|
||||
#define codeLoopLoadXOP ADDR(randomx_program_loop_load_xop)
|
||||
#define codeProgamStart ADDR(randomx_program_start)
|
||||
#define codeReadDataset ADDR(randomx_program_read_dataset)
|
||||
#define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init)
|
||||
#define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin)
|
||||
#define codeDatasetInit ADDR(randomx_dataset_init)
|
||||
|
@ -134,10 +133,10 @@ namespace randomx {
|
|||
#define codeShhEnd ADDR(randomx_sshash_end)
|
||||
#define codeShhInit ADDR(randomx_sshash_init)
|
||||
|
||||
#define prefetchScratchpadSize (codePrefetchScratchpadEnd - codePrefetchScratchpad)
|
||||
#define prologueSize (codeLoopBegin - codePrologue)
|
||||
#define loopLoadSize (codeLoopLoadXOP - codeLoopLoad)
|
||||
#define loopLoadXOPSize (codeProgamStart - codeLoopLoadXOP)
|
||||
#define readDatasetSize (codeReadDatasetLightSshInit - codeReadDataset)
|
||||
#define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit)
|
||||
#define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin)
|
||||
#define loopStoreSize (codeLoopEnd - codeLoopStore)
|
||||
|
@ -321,26 +320,13 @@ namespace randomx {
|
|||
vm_flags = flags;
|
||||
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
|
||||
uint8_t* p;
|
||||
uint32_t n;
|
||||
if (flags & RANDOMX_FLAG_AMD) {
|
||||
p = RandomX_CurrentConfig.codeReadDatasetRyzenTweaked;
|
||||
n = RandomX_CurrentConfig.codeReadDatasetRyzenTweakedSize;
|
||||
}
|
||||
else {
|
||||
p = RandomX_CurrentConfig.codeReadDatasetTweaked;
|
||||
n = RandomX_CurrentConfig.codeReadDatasetTweakedSize;
|
||||
}
|
||||
memcpy(code + codePos, p, n);
|
||||
codePos += n;
|
||||
|
||||
emit(codeReadDataset, readDatasetSize, code, codePos);
|
||||
generateProgramEpilogue(prog, pcfg);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
emit(RandomX_CurrentConfig.codeReadDatasetLightSshInitTweaked, readDatasetLightInitSize, code, codePos);
|
||||
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize, code, codePos);
|
||||
*(uint32_t*)(code + codePos) = 0xc381;
|
||||
codePos += 2;
|
||||
emit32(datasetOffset / CacheLineSize, code, codePos);
|
||||
|
@ -467,7 +453,7 @@ namespace randomx {
|
|||
void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) {
|
||||
*(uint64_t*)(code + codePos) = 0xc03349c08b49ull + (static_cast<uint64_t>(pcfg.readReg0) << 16) + (static_cast<uint64_t>(pcfg.readReg1) << 40);
|
||||
codePos += 6;
|
||||
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos);
|
||||
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, RandomX_CurrentConfig.codePrefetchScratchpadTweakedSize, code, codePos);
|
||||
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||
codePos += loopStoreSize;
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#endif
|
||||
|
||||
.global DECL(randomx_prefetch_scratchpad)
|
||||
.global DECL(randomx_prefetch_scratchpad_bmi2)
|
||||
.global DECL(randomx_prefetch_scratchpad_end)
|
||||
.global DECL(randomx_program_prologue)
|
||||
.global DECL(randomx_program_prologue_first_load)
|
||||
|
@ -47,7 +48,6 @@
|
|||
.global DECL(randomx_program_loop_load_xop)
|
||||
.global DECL(randomx_program_start)
|
||||
.global DECL(randomx_program_read_dataset)
|
||||
.global DECL(randomx_program_read_dataset_ryzen)
|
||||
.global DECL(randomx_program_read_dataset_sshash_init)
|
||||
.global DECL(randomx_program_read_dataset_sshash_fin)
|
||||
.global DECL(randomx_program_loop_store)
|
||||
|
@ -80,6 +80,13 @@ DECL(randomx_prefetch_scratchpad):
|
|||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
prefetcht0 [rsi+rdx]
|
||||
|
||||
DECL(randomx_prefetch_scratchpad_bmi2):
|
||||
rorx rdx, rax, 32
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
prefetcht0 [rsi+rax]
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
prefetcht0 [rsi+rdx]
|
||||
|
||||
DECL(randomx_prefetch_scratchpad_end):
|
||||
|
||||
.balign 64
|
||||
|
@ -132,9 +139,6 @@ DECL(randomx_program_start):
|
|||
DECL(randomx_program_read_dataset):
|
||||
#include "asm/program_read_dataset.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_ryzen):
|
||||
#include "asm/program_read_dataset_ryzen.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_sshash_init):
|
||||
#include "asm/program_read_dataset_sshash_init.inc"
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ IFDEF RAX
|
|||
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||
|
||||
PUBLIC randomx_prefetch_scratchpad
|
||||
PUBLIC randomx_prefetch_scratchpad_bmi2
|
||||
PUBLIC randomx_prefetch_scratchpad_end
|
||||
PUBLIC randomx_program_prologue
|
||||
PUBLIC randomx_program_prologue_first_load
|
||||
|
@ -38,7 +39,6 @@ PUBLIC randomx_program_loop_load
|
|||
PUBLIC randomx_program_loop_load_xop
|
||||
PUBLIC randomx_program_start
|
||||
PUBLIC randomx_program_read_dataset
|
||||
PUBLIC randomx_program_read_dataset_ryzen
|
||||
PUBLIC randomx_program_read_dataset_sshash_init
|
||||
PUBLIC randomx_program_read_dataset_sshash_fin
|
||||
PUBLIC randomx_dataset_init
|
||||
|
@ -70,6 +70,14 @@ randomx_prefetch_scratchpad PROC
|
|||
prefetcht0 [rsi+rdx]
|
||||
randomx_prefetch_scratchpad ENDP
|
||||
|
||||
randomx_prefetch_scratchpad_bmi2 PROC
|
||||
rorx rdx, rax, 32
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
prefetcht0 [rsi+rax]
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
prefetcht0 [rsi+rdx]
|
||||
randomx_prefetch_scratchpad_bmi2 ENDP
|
||||
|
||||
randomx_prefetch_scratchpad_end PROC
|
||||
randomx_prefetch_scratchpad_end ENDP
|
||||
|
||||
|
@ -127,10 +135,6 @@ randomx_program_read_dataset PROC
|
|||
include asm/program_read_dataset.inc
|
||||
randomx_program_read_dataset ENDP
|
||||
|
||||
randomx_program_read_dataset_ryzen PROC
|
||||
include asm/program_read_dataset_ryzen.inc
|
||||
randomx_program_read_dataset_ryzen ENDP
|
||||
|
||||
randomx_program_read_dataset_sshash_init PROC
|
||||
include asm/program_read_dataset_sshash_init.inc
|
||||
randomx_program_read_dataset_sshash_init ENDP
|
||||
|
|
|
@ -30,6 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
extern "C" {
|
||||
void randomx_prefetch_scratchpad();
|
||||
void randomx_prefetch_scratchpad_bmi2();
|
||||
void randomx_prefetch_scratchpad_end();
|
||||
void randomx_program_prologue();
|
||||
void randomx_program_prologue_first_load();
|
||||
|
@ -39,7 +40,6 @@ extern "C" {
|
|||
void randomx_program_loop_load_xop();
|
||||
void randomx_program_start();
|
||||
void randomx_program_read_dataset();
|
||||
void randomx_program_read_dataset_ryzen();
|
||||
void randomx_program_read_dataset_sshash_init();
|
||||
void randomx_program_read_dataset_sshash_fin();
|
||||
void randomx_program_loop_store();
|
||||
|
|
|
@ -191,27 +191,17 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase()
|
|||
const uint8_t* b = addr(randomx_sshash_end);
|
||||
memcpy(codeShhPrefetchTweaked, a, b - a);
|
||||
}
|
||||
{
|
||||
const uint8_t* a = addr(randomx_program_read_dataset);
|
||||
const uint8_t* b = addr(randomx_program_read_dataset_ryzen);
|
||||
memcpy(codeReadDatasetTweaked, a, b - a);
|
||||
codeReadDatasetTweakedSize = b - a;
|
||||
}
|
||||
{
|
||||
const uint8_t* a = addr(randomx_program_read_dataset_ryzen);
|
||||
const uint8_t* b = addr(randomx_program_read_dataset_sshash_init);
|
||||
memcpy(codeReadDatasetRyzenTweaked, a, b - a);
|
||||
codeReadDatasetRyzenTweakedSize = b - a;
|
||||
}
|
||||
{
|
||||
const uint8_t* a = addr(randomx_program_read_dataset_sshash_init);
|
||||
const uint8_t* b = addr(randomx_program_read_dataset_sshash_fin);
|
||||
memcpy(codeReadDatasetLightSshInitTweaked, a, b - a);
|
||||
}
|
||||
{
|
||||
const uint8_t* a = addr(randomx_prefetch_scratchpad);
|
||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
||||
const uint8_t* a = addr(randomx_prefetch_scratchpad_bmi2);
|
||||
const uint8_t* b = addr(randomx_prefetch_scratchpad_end);
|
||||
memcpy(codePrefetchScratchpadTweaked, a, b - a);
|
||||
codePrefetchScratchpadTweakedSize = b - a;
|
||||
}
|
||||
else {
|
||||
const uint8_t* a = addr(randomx_prefetch_scratchpad);
|
||||
const uint8_t* b = addr(randomx_prefetch_scratchpad_bmi2);
|
||||
memcpy(codePrefetchScratchpadTweaked, a, b - a);
|
||||
codePrefetchScratchpadTweakedSize = b - a;
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
@ -250,13 +240,15 @@ void RandomX_ConfigurationBase::Apply()
|
|||
*(uint32_t*)(codeReadDatasetTweaked + 23) = DatasetBaseMask;
|
||||
*(uint32_t*)(codeReadDatasetLightSshInitTweaked + 59) = DatasetBaseMask;
|
||||
|
||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 4) = ScratchpadL3Mask64_Calculated;
|
||||
*(uint32_t*)(codePrefetchScratchpadTweaked + 18) = ScratchpadL3Mask64_Calculated;
|
||||
const bool hasBMI2 = xmrig::Cpu::info()->hasBMI2();
|
||||
|
||||
*(uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 7 : 4)) = ScratchpadL3Mask64_Calculated;
|
||||
*(uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 17 : 18)) = ScratchpadL3Mask64_Calculated;
|
||||
|
||||
// Apply scratchpad prefetch mode
|
||||
{
|
||||
uint32_t* a = (uint32_t*)(codePrefetchScratchpadTweaked + 8);
|
||||
uint32_t* b = (uint32_t*)(codePrefetchScratchpadTweaked + 22);
|
||||
uint32_t* a = (uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 11 : 8));
|
||||
uint32_t* b = (uint32_t*)(codePrefetchScratchpadTweaked + (hasBMI2 ? 21 : 22));
|
||||
|
||||
switch (scratchpadPrefetchMode)
|
||||
{
|
||||
|
@ -323,7 +315,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
|
|||
INST_HANDLE(IMUL_M, IMUL_R);
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
||||
if (hasBMI2) {
|
||||
INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M);
|
||||
INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R);
|
||||
}
|
||||
|
@ -365,7 +357,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
|
|||
#endif
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
||||
if (hasBMI2) {
|
||||
INST_HANDLE2(CFROUND, CFROUND_BMI2, CBRANCH);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -126,12 +126,8 @@ struct RandomX_ConfigurationBase
|
|||
rx_vec_i128 fillAes4Rx4_Key[8];
|
||||
|
||||
uint8_t codeShhPrefetchTweaked[20];
|
||||
uint8_t codeReadDatasetTweaked[64];
|
||||
uint32_t codeReadDatasetTweakedSize;
|
||||
uint8_t codeReadDatasetRyzenTweaked[72];
|
||||
uint32_t codeReadDatasetRyzenTweakedSize;
|
||||
uint8_t codeReadDatasetLightSshInitTweaked[68];
|
||||
uint8_t codePrefetchScratchpadTweaked[32];
|
||||
uint8_t codePrefetchScratchpadTweaked[28];
|
||||
uint32_t codePrefetchScratchpadTweakedSize;
|
||||
|
||||
uint32_t CacheLineAlignMask_Calculated;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue