This commit is contained in:
MoneroOcean 2020-02-25 06:20:29 -10:00
commit b6674abb41
59 changed files with 2180 additions and 1534 deletions

View file

@ -234,7 +234,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
constexpr int PREFETCH_DISTANCE = 4096;
constexpr int PREFETCH_DISTANCE = 7168;
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
scratchpadEnd -= PREFETCH_DISTANCE;
@ -258,8 +258,25 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
rx_prefetch_t0(prefetchPtr);
scratchpadPtr += 64;
prefetchPtr += 64;
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 4));
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 5));
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 6));
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 7));
fill_state0 = aesdec<softAes>(fill_state0, key0);
fill_state1 = aesenc<softAes>(fill_state1, key1);
fill_state2 = aesdec<softAes>(fill_state2, key2);
fill_state3 = aesenc<softAes>(fill_state3, key3);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 4, fill_state0);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 5, fill_state1);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 6, fill_state2);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 7, fill_state3);
rx_prefetch_t0(prefetchPtr + 64);
scratchpadPtr += 128;
prefetchPtr += 128;
}
prefetchPtr = (const char*) scratchpad;
scratchpadEnd += PREFETCH_DISTANCE;

View file

@ -49,6 +49,7 @@ namespace randomx {
JitCompilerA64();
~JitCompilerA64();
void prepare() {}
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);

View file

@ -44,6 +44,7 @@ namespace randomx {
JitCompilerFallback() {
throw std::runtime_error("JIT compilation is not supported on this platform");
}
void prepare() {}
void generateProgram(Program&, ProgramConfiguration&, uint32_t) {
}

View file

@ -325,6 +325,13 @@ namespace randomx {
freePagedMemory(allocatedCode, CodeSize);
}
void JitCompilerX86::prepare() {
for (int i = 0; i < sizeof(engine); i += 64)
rx_prefetch_nta((const char*)(&engine) + i);
for (int i = 0; i < sizeof(RandomX_CurrentConfig); i += 64)
rx_prefetch_nta((const char*)(&RandomX_CurrentConfig) + i);
}
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
vm_flags = flags;
@ -419,11 +426,29 @@ namespace randomx {
r[j] = k;
}
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; ++i) {
Instruction& instr = prog(i);
const uint8_t opcode = instr.opcode;
*((uint64_t*)&instr) &= (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
(this->*(engine[opcode]))(instr);
constexpr uint64_t instr_mask = (uint64_t(-1) - (0xFFFF << 8)) | ((RegistersCount - 1) << 8) | ((RegistersCount - 1) << 16);
for (int i = 0, n = static_cast<int>(RandomX_CurrentConfig.ProgramSize); i < n; i += 4) {
Instruction& instr1 = prog(i);
Instruction& instr2 = prog(i + 1);
Instruction& instr3 = prog(i + 2);
Instruction& instr4 = prog(i + 3);
InstructionGeneratorX86 gen1 = engine[instr1.opcode];
InstructionGeneratorX86 gen2 = engine[instr2.opcode];
InstructionGeneratorX86 gen3 = engine[instr3.opcode];
InstructionGeneratorX86 gen4 = engine[instr4.opcode];
*((uint64_t*)&instr1) &= instr_mask;
(this->*gen1)(instr1);
*((uint64_t*)&instr2) &= instr_mask;
(this->*gen2)(instr2);
*((uint64_t*)&instr3) &= instr_mask;
(this->*gen3)(instr3);
*((uint64_t*)&instr4) &= instr_mask;
(this->*gen4)(instr4);
}
emit(REX_MOV_RR, code, codePos);
@ -609,13 +634,14 @@ namespace randomx {
int pos = codePos;
uint8_t* const p = code + pos;
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst;
*(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24);
const uint32_t dst = instr.dst;
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst;
*(uint32_t*)(p) = template_IADD_RS[dst] | (sib << 24);
*(uint32_t*)(p + 4) = instr.getImm32();
pos += ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4);
pos += ((dst == RegisterNeedsDisplacement) ? 8 : 4);
registerUsage[instr.dst] = pos;
registerUsage[dst] = pos;
codePos = pos;
}
@ -1092,6 +1118,29 @@ namespace randomx {
codePos = pos;
}
void JitCompilerX86::h_CFROUND_BMI2(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
const uint64_t src = instr.src;
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
if (vm_flags & RANDOMX_FLAG_AMD) {
*(uint64_t*)(p + pos + 6) = 0x742024443B0CE083ULL;
*(uint8_t*)(p + pos + 14) = 8;
*(uint64_t*)(p + pos + 15) = 0x202444890414AE0FULL;
pos += 23;
}
else {
*(uint64_t*)(p + pos + 6) = 0x0414AE0F0CE083ULL;
pos += 13;
}
codePos = pos;
}
void JitCompilerX86::h_CBRANCH(const Instruction& instr) {
uint8_t* const p = code;
int pos = codePos;
@ -1152,6 +1201,6 @@ namespace randomx {
emit(NOP1, code, codePos);
}
InstructionGeneratorX86 JitCompilerX86::engine[256] = {};
alignas(64) InstructionGeneratorX86 JitCompilerX86::engine[256] = {};
}

View file

@ -49,6 +49,7 @@ namespace randomx {
public:
JitCompilerX86();
~JitCompilerX86();
void prepare();
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
template<size_t N>
@ -65,7 +66,7 @@ namespace randomx {
}
size_t getCodeSize();
static InstructionGeneratorX86 engine[256];
alignas(64) static InstructionGeneratorX86 engine[256];
int registerUsage[RegistersCount];
uint8_t* allocatedCode;
uint8_t* code;
@ -146,6 +147,7 @@ namespace randomx {
void h_FSQRT_R(const Instruction&);
void h_CBRANCH(const Instruction&);
void h_CFROUND(const Instruction&);
void h_CFROUND_BMI2(const Instruction&);
void h_ISTORE(const Instruction&);
void h_NOP(const Instruction&);
};

View file

@ -280,7 +280,17 @@ void RandomX_ConfigurationBase::Apply()
INST_HANDLE(FDIV_M, FMUL_R);
INST_HANDLE(FSQRT_R, FDIV_M);
INST_HANDLE(CBRANCH, FSQRT_R);
INST_HANDLE(CFROUND, CBRANCH);
#if defined(_M_X64) || defined(__x86_64__)
if (xmrig::Cpu::info()->hasBMI2()) {
INST_HANDLE2(CFROUND, CFROUND_BMI2, CBRANCH);
}
else
#endif
{
INST_HANDLE(CFROUND, CBRANCH);
}
INST_HANDLE(ISTORE, CFROUND);
INST_HANDLE(NOP, ISTORE);
#undef INST_HANDLE
@ -292,7 +302,7 @@ RandomX_ConfigurationLoki RandomX_LokiConfig;
RandomX_ConfigurationArqma RandomX_ArqmaConfig;
RandomX_ConfigurationSafex RandomX_SafexConfig;
RandomX_ConfigurationBase RandomX_CurrentConfig;
alignas(64) RandomX_ConfigurationBase RandomX_CurrentConfig;
extern "C" {

View file

@ -41,6 +41,7 @@ namespace randomx {
template<bool softAes>
void CompiledVm<softAes>::run(void* seed) {
compiler.prepare();
VmBase<softAes>::generateProgram(seed);
randomx_vm::initialize();
compiler.generateProgram(program, config, randomx_vm::getFlags());