RandomX: optimized program generation

This commit is contained in:
SChernykh 2023-05-21 00:39:32 +02:00
parent 7b51e23aa0
commit 5dcbab7e3a
4 changed files with 113 additions and 73 deletions

View file

@ -312,11 +312,19 @@ namespace randomx {
freePagedMemory(allocatedCode, allocatedSize);
}
template<size_t N>
static FORCE_INLINE void prefetch_data(const void* data) {
rx_prefetch_nta(data);
prefetch_data<N - 1>(reinterpret_cast<const char*>(data) + 64);
}
template<> FORCE_INLINE void prefetch_data<0>(const void*) {}
template<typename T> static FORCE_INLINE void prefetch_data(const T& data) { prefetch_data<(sizeof(T) + 63) / 64>(&data); }
void JitCompilerX86::prepare() {
for (size_t i = 0; i < sizeof(engine); i += 64)
rx_prefetch_nta((const char*)(&engine) + i);
for (size_t i = 0; i < sizeof(RandomX_CurrentConfig); i += 64)
rx_prefetch_nta((const char*)(&RandomX_CurrentConfig) + i);
prefetch_data(engine);
prefetch_data(RandomX_CurrentConfig);
}
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
@ -748,7 +756,7 @@ namespace randomx {
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos);
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, uint32_t& codePos) {
const uint32_t dst = static_cast<uint32_t>(instr.dst % RegistersCount) << 16;
const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
@ -768,8 +776,8 @@ namespace randomx {
uint32_t pos = codePos;
uint8_t* const p = code + pos;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t sib = (instr.getModShift() << 6) | ((instr.src % RegistersCount) << 3) | dst;
const uint32_t dst = instr.dst;
const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | dst;
uint32_t k = 0x048d4f + (dst << 19);
if (dst == RegisterNeedsDisplacement)
@ -788,8 +796,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
if (src != dst) {
genAddressReg<true>(instr, src, p, pos);
@ -809,8 +817,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
if (src != dst) {
*(uint32_t*)(p + pos) = 0xc02b4d + (dst << 19) + (src << 16);
@ -830,8 +838,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
if (src != dst) {
genAddressReg<true>(instr, src, p, pos);
@ -851,8 +859,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
if (src != dst) {
emit32(0xc0af0f4d + ((dst * 8 + src) << 24), p, pos);
@ -871,8 +879,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
if (src != dst) {
genAddressReg<true>(instr, src, p, pos);
@ -892,8 +900,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
*(uint32_t*)(p + pos) = 0xc08b49 + (dst << 16);
*(uint32_t*)(p + pos + 3) = 0xe0f749 + (src << 16);
@ -908,8 +916,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
*(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16);
*(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24);
@ -923,8 +931,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
if (src != dst) {
genAddressReg<false>(instr, src, p, pos);
@ -947,8 +955,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
if (src != dst) {
genAddressReg<false>(instr, src, p, pos);
@ -970,8 +978,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
*(uint64_t*)(p + pos) = 0x8b4ce8f749c08b49ull + (dst << 16) + (src << 40);
pos += 8;
@ -985,8 +993,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
if (src != dst) {
genAddressReg<false>(instr, src, p, pos);
@ -1011,7 +1019,7 @@ namespace randomx {
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t dst = instr.dst;
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
if (imul_rcp_storage_used < 16) {
@ -1040,7 +1048,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t dst = instr.dst;
*(uint32_t*)(p + pos) = 0xd8f749 + (dst << 16);
pos += 3;
@ -1052,8 +1060,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
if (src != dst) {
*(uint32_t*)(p + pos) = 0xc0334d + (((dst << 3) + src) << 16);
@ -1073,8 +1081,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
if (src != dst) {
genAddressReg<true>(instr, src, p, pos);
@ -1094,8 +1102,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
if (src != dst) {
*(uint64_t*)(p + pos) = 0xc8d349c88b41ull + (src << 16) + (dst << 40);
@ -1115,8 +1123,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t src = instr.src;
const uint64_t dst = instr.dst;
if (src != dst) {
*(uint64_t*)(p + pos) = 0xc0d349c88b41ull + (src << 16) + (dst << 40);
@ -1136,8 +1144,8 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t dst = instr.dst % RegistersCount;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst;
if (src != dst) {
*(uint32_t*)(p + pos) = 0xc0874d + (((dst << 3) + src) << 16);
@ -1153,7 +1161,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const uint64_t dst = instr.dst % RegistersCount;
const uint64_t dst = instr.dst;
*(uint64_t*)(p + pos) = 0x01c0c60f66ull + (((dst << 3) + dst) << 24);
pos += 5;
@ -1182,7 +1190,7 @@ namespace randomx {
prevFPOperation = pos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, src, p, pos);
@ -1214,7 +1222,7 @@ namespace randomx {
prevFPOperation = pos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t src = instr.src;
const uint32_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, src, p, pos);
@ -1257,7 +1265,7 @@ namespace randomx {
prevFPOperation = pos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t src = instr.src;
const uint64_t dst = instr.dst % RegisterCountFlt;
genAddressReg<true>(instr, src, p, pos);
@ -1307,7 +1315,7 @@ namespace randomx {
uint32_t pos = codePos;
prevCFROUND = pos;
const uint32_t src = instr.src % RegistersCount;
const uint32_t src = instr.src;
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
@ -1343,7 +1351,7 @@ namespace randomx {
uint32_t pos = codePos;
prevCFROUND = pos;
const uint64_t src = instr.src % RegistersCount;
const uint64_t src = instr.src;
const uint64_t rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
*(uint64_t*)(p + pos) = 0xC0F0FBC3C4ULL | (src << 32) | (rotate << 40);
@ -1367,7 +1375,7 @@ namespace randomx {
uint8_t* const p = code;
uint32_t pos = codePos;
const int reg = instr.dst % RegistersCount;
const int reg = instr.dst;
int32_t jmp_offset = registerUsage[reg];
// if it jumps over the previous FP instruction that uses rounding, treat it as if FP instruction happened now
@ -1426,7 +1434,7 @@ namespace randomx {
uint32_t pos = codePos;
genAddressRegDst(instr, p, pos);
emit32(0x0604894c + (static_cast<uint32_t>(instr.src % RegistersCount) << 19), p, pos);
emit32(0x0604894c + (static_cast<uint32_t>(instr.src) << 19), p, pos);
codePos = pos;
}