This commit is contained in:
commit
14568121e2
61 changed files with 658 additions and 158 deletions
|
@ -109,4 +109,26 @@ extern "C" {
|
|||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
||||
}
|
||||
|
||||
void defyx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
|
||||
//rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||
sipesh(tempHash, sizeof(tempHash), input, inputSize, input, inputSize, 0, 0);
|
||||
k12(input, inputSize, tempHash);
|
||||
machine->initScratchpad(tempHash);
|
||||
}
|
||||
|
||||
void defyx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
|
||||
machine->resetRoundingMode();
|
||||
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||
machine->run(&tempHash);
|
||||
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
}
|
||||
machine->run(&tempHash);
|
||||
|
||||
// Finish current hash and fill the scratchpad for the next hash at the same time
|
||||
//rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
|
||||
sipesh(tempHash, sizeof(tempHash), input, inputSize, input, inputSize, 0, 0);
|
||||
k12(input, inputSize, tempHash);
|
||||
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -50,6 +50,9 @@ extern "C" {
|
|||
*/
|
||||
RANDOMX_EXPORT void defyx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
|
||||
|
||||
RANDOMX_EXPORT void defyx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize);
|
||||
RANDOMX_EXPORT void defyx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -212,3 +212,84 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
|||
|
||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
// initial state
|
||||
rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
|
||||
rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
|
||||
rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
|
||||
rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
|
||||
|
||||
const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
|
||||
const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
|
||||
const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
|
||||
const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
|
||||
|
||||
rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
|
||||
rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
|
||||
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
|
||||
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
|
||||
|
||||
constexpr int PREFETCH_DISTANCE = 4096;
|
||||
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
|
||||
scratchpadEnd -= PREFETCH_DISTANCE;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (scratchpadPtr < scratchpadEnd) {
|
||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
|
||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
|
||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
|
||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
|
||||
|
||||
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
||||
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
||||
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
||||
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
|
||||
|
||||
rx_prefetch_t0(prefetchPtr);
|
||||
|
||||
scratchpadPtr += 64;
|
||||
prefetchPtr += 64;
|
||||
}
|
||||
prefetchPtr = (const char*) scratchpad;
|
||||
scratchpadEnd += PREFETCH_DISTANCE;
|
||||
}
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
|
||||
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
|
||||
|
||||
hash_state0 = aesenc<softAes>(hash_state0, xkey0);
|
||||
hash_state1 = aesdec<softAes>(hash_state1, xkey0);
|
||||
hash_state2 = aesenc<softAes>(hash_state2, xkey0);
|
||||
hash_state3 = aesdec<softAes>(hash_state3, xkey0);
|
||||
|
||||
hash_state0 = aesenc<softAes>(hash_state0, xkey1);
|
||||
hash_state1 = aesdec<softAes>(hash_state1, xkey1);
|
||||
hash_state2 = aesenc<softAes>(hash_state2, xkey1);
|
||||
hash_state3 = aesdec<softAes>(hash_state3, xkey1);
|
||||
|
||||
//output hash
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
||||
}
|
||||
|
||||
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
|
|
|
@ -38,3 +38,6 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
|||
|
||||
template<bool softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
|
|
|
@ -102,6 +102,7 @@ typedef __m128d rx_vec_f128;
|
|||
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
|
||||
#define rx_aligned_free(a) _mm_free(a)
|
||||
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
|
||||
#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
|
||||
|
||||
#define rx_load_vec_f128 _mm_load_pd
|
||||
#define rx_store_vec_f128 _mm_store_pd
|
||||
|
@ -201,6 +202,7 @@ typedef union{
|
|||
#define rx_aligned_alloc(a, b) malloc(a)
|
||||
#define rx_aligned_free(a) free(a)
|
||||
#define rx_prefetch_nta(x)
|
||||
#define rx_prefetch_t0(x)
|
||||
|
||||
/* Splat 64-bit long long to 2 64-bit long longs */
|
||||
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
|
||||
|
@ -399,6 +401,10 @@ inline void rx_prefetch_nta(void* ptr) {
|
|||
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
|
||||
}
|
||||
|
||||
inline void rx_prefetch_t0(const void* ptr) {
|
||||
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||
return vld1q_f64((const float64_t*)pd);
|
||||
}
|
||||
|
@ -532,6 +538,7 @@ typedef union {
|
|||
#define rx_aligned_alloc(a, b) malloc(a)
|
||||
#define rx_aligned_free(a) free(a)
|
||||
#define rx_prefetch_nta(x)
|
||||
#define rx_prefetch_t0(x)
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||
rx_vec_f128 x;
|
||||
|
|
|
@ -29,6 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <climits>
|
||||
#include <atomic>
|
||||
#include "crypto/randomx/jit_compiler_x86.hpp"
|
||||
#include "crypto/randomx/jit_compiler_x86_static.hpp"
|
||||
#include "crypto/randomx/superscalar.hpp"
|
||||
|
@ -36,6 +37,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "crypto/randomx/reciprocal.h"
|
||||
#include "crypto/randomx/virtual_memory.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
#else
|
||||
# include <cpuid.h>
|
||||
#endif
|
||||
|
||||
namespace randomx {
|
||||
/*
|
||||
|
||||
|
@ -108,7 +115,7 @@ namespace randomx {
|
|||
const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
|
||||
const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
|
||||
|
||||
const int32_t epilogueOffset = CodeSize - epilogueSize;
|
||||
const int32_t epilogueOffset = (CodeSize - epilogueSize) & ~63;
|
||||
constexpr int32_t superScalarHashOffset = 32768;
|
||||
|
||||
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
|
||||
|
@ -183,6 +190,7 @@ namespace randomx {
|
|||
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
|
||||
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
|
||||
static const uint8_t JZ[] = { 0x0f, 0x84 };
|
||||
static const uint8_t JZ_SHORT = 0x74;
|
||||
static const uint8_t RET = 0xc3;
|
||||
static const uint8_t LEA_32[] = { 0x41, 0x8d };
|
||||
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
|
||||
|
@ -197,20 +205,100 @@ namespace randomx {
|
|||
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
||||
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||
|
||||
// static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
|
||||
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
|
||||
|
||||
static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
|
||||
{},
|
||||
{0x2E},
|
||||
{0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
};
|
||||
|
||||
bool JitCompilerX86::BranchesWithin32B = false;
|
||||
|
||||
size_t JitCompilerX86::getCodeSize() {
|
||||
return codePos < prologueSize ? 0 : codePos - prologueSize;
|
||||
}
|
||||
|
||||
static inline void cpuid(uint32_t level, int32_t output[4])
|
||||
{
|
||||
memset(output, 0, sizeof(int32_t) * 4);
|
||||
|
||||
# ifdef _MSC_VER
|
||||
__cpuid(output, static_cast<int>(level));
|
||||
# else
|
||||
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
|
||||
# endif
|
||||
}
|
||||
|
||||
// CPU-specific tweaks
|
||||
void JitCompilerX86::applyTweaks() {
|
||||
int32_t info[4];
|
||||
cpuid(0, info);
|
||||
|
||||
int32_t manufacturer[4];
|
||||
manufacturer[0] = info[1];
|
||||
manufacturer[1] = info[3];
|
||||
manufacturer[2] = info[2];
|
||||
manufacturer[3] = 0;
|
||||
|
||||
if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) {
|
||||
struct
|
||||
{
|
||||
unsigned int stepping : 4;
|
||||
unsigned int model : 4;
|
||||
unsigned int family : 4;
|
||||
unsigned int processor_type : 2;
|
||||
unsigned int reserved1 : 2;
|
||||
unsigned int ext_model : 4;
|
||||
unsigned int ext_family : 8;
|
||||
unsigned int reserved2 : 4;
|
||||
} processor_info;
|
||||
|
||||
cpuid(1, info);
|
||||
memcpy(&processor_info, info, sizeof(processor_info));
|
||||
|
||||
// Intel JCC erratum mitigation
|
||||
if (processor_info.family == 6) {
|
||||
const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
|
||||
const uint32_t stepping = processor_info.stepping;
|
||||
|
||||
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
||||
BranchesWithin32B =
|
||||
((model == 0x4E) && (stepping == 0x3)) ||
|
||||
((model == 0x55) && (stepping == 0x4)) ||
|
||||
((model == 0x5E) && (stepping == 0x3)) ||
|
||||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
|
||||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
|
||||
((model == 0xA6) && (stepping == 0x0)) ||
|
||||
((model == 0xAE) && (stepping == 0xA));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::atomic<size_t> codeOffset;
|
||||
|
||||
JitCompilerX86::JitCompilerX86() {
|
||||
code = (uint8_t*)allocExecutableMemory(CodeSize);
|
||||
applyTweaks();
|
||||
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2);
|
||||
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
|
||||
code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize);
|
||||
memcpy(code, codePrologue, prologueSize);
|
||||
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
|
||||
}
|
||||
|
||||
JitCompilerX86::~JitCompilerX86() {
|
||||
freePagedMemory(code, CodeSize);
|
||||
freePagedMemory(allocatedCode, CodeSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
|
||||
|
@ -307,6 +395,22 @@ namespace randomx {
|
|||
emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos);
|
||||
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||
codePos += loopStoreSize;
|
||||
|
||||
if (BranchesWithin32B) {
|
||||
const uint32_t branch_begin = static_cast<uint32_t>(codePos);
|
||||
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + 9);
|
||||
|
||||
// If the jump crosses or touches 32-byte boundary, align it
|
||||
if ((branch_begin ^ branch_end) >= 32) {
|
||||
uint32_t alignment_size = 32 - (branch_begin & 31);
|
||||
if (alignment_size > 8) {
|
||||
emit(NOPX[alignment_size - 9], alignment_size - 8, code, codePos);
|
||||
alignment_size = 8;
|
||||
}
|
||||
emit(NOPX[alignment_size - 1], alignment_size, code, codePos);
|
||||
}
|
||||
}
|
||||
|
||||
emit(SUB_EBX, code, codePos);
|
||||
emit(JNZ, code, codePos);
|
||||
emit32(prologueSize - codePos - 4, code, codePos);
|
||||
|
@ -408,12 +512,13 @@ namespace randomx {
|
|||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos, bool rax) {
|
||||
emit(LEA_32, code, codePos);
|
||||
emitByte(0x80 + instr.src + (rax ? 0 : 8), code, codePos);
|
||||
if (instr.src == RegisterNeedsSib) {
|
||||
emitByte(0x24, code, codePos);
|
||||
}
|
||||
template<bool rax>
|
||||
FORCE_INLINE void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos) {
|
||||
const uint32_t src = *((uint32_t*)&instr) & 0xFF0000;
|
||||
|
||||
*(uint32_t*)(code + codePos) = (rax ? 0x24808d41 : 0x24888d41) + src;
|
||||
codePos += (src == (RegisterNeedsSib << 16)) ? 4 : 3;
|
||||
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
if (rax)
|
||||
emitByte(AND_EAX_I, code, codePos);
|
||||
|
@ -422,12 +527,14 @@ namespace randomx {
|
|||
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos);
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, int& codePos) {
|
||||
emit(LEA_32, code, codePos);
|
||||
emitByte(0x80 + instr.dst, code, codePos);
|
||||
if (instr.dst == RegisterNeedsSib) {
|
||||
emitByte(0x24, code, codePos);
|
||||
}
|
||||
template void JitCompilerX86::genAddressReg<false>(const Instruction& instr, uint8_t* code, int& codePos);
|
||||
template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, uint8_t* code, int& codePos);
|
||||
|
||||
FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, int& codePos) {
|
||||
const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
|
||||
*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
|
||||
codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
|
||||
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
emitByte(AND_EAX_I, code, codePos);
|
||||
if (instr.getModCond() < StoreL3Condition) {
|
||||
|
@ -438,7 +545,7 @@ namespace randomx {
|
|||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressImm(const Instruction& instr, uint8_t* code, int& codePos) {
|
||||
FORCE_INLINE void JitCompilerX86::genAddressImm(const Instruction& instr, uint8_t* code, int& codePos) {
|
||||
emit32(instr.getImm32() & ScratchpadL3Mask, code, codePos);
|
||||
}
|
||||
|
||||
|
@ -483,7 +590,7 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, p, pos);
|
||||
genAddressReg<true>(instr, p, pos);
|
||||
emit32(template_IADD_M[instr.dst], p, pos);
|
||||
}
|
||||
else {
|
||||
|
@ -523,7 +630,7 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, p, pos);
|
||||
genAddressReg<true>(instr, p, pos);
|
||||
emit(REX_SUB_RM, p, pos);
|
||||
emitByte(0x04 + 8 * instr.dst, p, pos);
|
||||
emitByte(0x06, p, pos);
|
||||
|
@ -561,7 +668,7 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, p, pos);
|
||||
genAddressReg<true>(instr, p, pos);
|
||||
emit(REX_IMUL_RM, p, pos);
|
||||
emitByte(0x04 + 8 * instr.dst, p, pos);
|
||||
emitByte(0x06, p, pos);
|
||||
|
@ -596,7 +703,7 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, p, pos, false);
|
||||
genAddressReg<false>(instr, p, pos);
|
||||
emit(REX_MOV_RR64, p, pos);
|
||||
emitByte(0xc0 + instr.dst, p, pos);
|
||||
emit(REX_MUL_MEM, p, pos);
|
||||
|
@ -635,7 +742,7 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, p, pos, false);
|
||||
genAddressReg<false>(instr, p, pos);
|
||||
emit(REX_MOV_RR64, p, pos);
|
||||
emitByte(0xc0 + instr.dst, p, pos);
|
||||
emit(REX_IMUL_MEM, p, pos);
|
||||
|
@ -704,7 +811,7 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, p, pos);
|
||||
genAddressReg<true>(instr, p, pos);
|
||||
emit(REX_XOR_RM, p, pos);
|
||||
emitByte(0x04 + 8 * instr.dst, p, pos);
|
||||
emitByte(0x06, p, pos);
|
||||
|
@ -801,7 +908,7 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
genAddressReg(instr, p, pos);
|
||||
genAddressReg<true>(instr, p, pos);
|
||||
emit(REX_CVTDQ2PD_XMM12, p, pos);
|
||||
emit(REX_ADDPD, p, pos);
|
||||
emitByte(0xc4 + 8 * dst, p, pos);
|
||||
|
@ -826,7 +933,7 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
genAddressReg(instr, p, pos);
|
||||
genAddressReg<true>(instr, p, pos);
|
||||
emit(REX_CVTDQ2PD_XMM12, p, pos);
|
||||
emit(REX_SUBPD, p, pos);
|
||||
emitByte(0xc4 + 8 * dst, p, pos);
|
||||
|
@ -862,7 +969,7 @@ namespace randomx {
|
|||
int pos = codePos;
|
||||
|
||||
const uint32_t dst = instr.dst % RegisterCountFlt;
|
||||
genAddressReg(instr, p, pos);
|
||||
genAddressReg<true>(instr, p, pos);
|
||||
emit(REX_CVTDQ2PD_XMM12, p, pos);
|
||||
emit(REX_ANDPS_XMM12, p, pos);
|
||||
emit(REX_DIVPD, p, pos);
|
||||
|
@ -902,19 +1009,39 @@ namespace randomx {
|
|||
uint8_t* const p = code;
|
||||
int pos = codePos;
|
||||
|
||||
int reg = instr.dst;
|
||||
const int reg = instr.dst;
|
||||
int32_t jmp_offset = registerUsage[reg] - (pos + 16);
|
||||
|
||||
if (BranchesWithin32B) {
|
||||
const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
|
||||
const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
|
||||
|
||||
// If the jump crosses or touches 32-byte boundary, align it
|
||||
if ((branch_begin ^ branch_end) >= 32) {
|
||||
const uint32_t alignment_size = 32 - (branch_begin & 31);
|
||||
jmp_offset -= alignment_size;
|
||||
emit(JMP_ALIGN_PREFIX[alignment_size], alignment_size, p, pos);
|
||||
}
|
||||
}
|
||||
|
||||
emit(REX_ADD_I, p, pos);
|
||||
emitByte(0xc0 + reg, p, pos);
|
||||
int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
||||
uint32_t imm = instr.getImm32() | (1UL << shift);
|
||||
if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0)
|
||||
imm &= ~(1UL << (shift - 1));
|
||||
const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
|
||||
const uint32_t imm = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1));
|
||||
emit32(imm, p, pos);
|
||||
emit(REX_TEST, p, pos);
|
||||
emitByte(0xc0 + reg, p, pos);
|
||||
emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift, p, pos);
|
||||
emit(JZ, p, pos);
|
||||
emit32(registerUsage[reg] - (pos + 4), p, pos);
|
||||
|
||||
if (jmp_offset >= -128) {
|
||||
emitByte(JZ_SHORT, p, pos);
|
||||
emitByte(jmp_offset, p, pos);
|
||||
}
|
||||
else {
|
||||
emit(JZ, p, pos);
|
||||
emit32(jmp_offset - 4, p, pos);
|
||||
}
|
||||
|
||||
//mark all registers as used
|
||||
uint64_t* r = (uint64_t*) registerUsage;
|
||||
uint64_t k = pos;
|
||||
|
|
|
@ -67,12 +67,17 @@ namespace randomx {
|
|||
|
||||
static InstructionGeneratorX86 engine[256];
|
||||
int registerUsage[RegistersCount];
|
||||
uint8_t* allocatedCode;
|
||||
uint8_t* code;
|
||||
int32_t codePos;
|
||||
|
||||
static bool BranchesWithin32B;
|
||||
|
||||
static void applyTweaks();
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||
static void genAddressReg(const Instruction&, uint8_t* code, int& codePos, bool rax = true);
|
||||
template<bool rax>
|
||||
static void genAddressReg(const Instruction&, uint8_t* code, int& codePos);
|
||||
static void genAddressRegDst(const Instruction&, uint8_t* code, int& codePos);
|
||||
static void genAddressImm(const Instruction&, uint8_t* code, int& codePos);
|
||||
static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos);
|
||||
|
|
|
@ -473,4 +473,22 @@ extern "C" {
|
|||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
||||
}
|
||||
|
||||
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
|
||||
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||
machine->initScratchpad(tempHash);
|
||||
}
|
||||
|
||||
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
|
||||
machine->resetRoundingMode();
|
||||
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
|
||||
machine->run(&tempHash);
|
||||
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
}
|
||||
machine->run(&tempHash);
|
||||
|
||||
// Finish current hash and fill the scratchpad for the next hash at the same time
|
||||
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
|
||||
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -338,6 +338,9 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
|
|||
*/
|
||||
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
|
||||
|
||||
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize);
|
||||
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -114,6 +114,12 @@ namespace randomx {
|
|||
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void VmBase<softAes>::hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) {
|
||||
hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, ®.a, fill_state);
|
||||
rx_blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void VmBase<softAes>::initScratchpad(void* seed) {
|
||||
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
|
||||
|
|
|
@ -39,6 +39,7 @@ public:
|
|||
virtual ~randomx_vm() = 0;
|
||||
virtual void setScratchpad(uint8_t *scratchpad) = 0;
|
||||
virtual void getFinalResult(void* out, size_t outSize) = 0;
|
||||
virtual void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) = 0;
|
||||
virtual void setDataset(randomx_dataset* dataset) { }
|
||||
virtual void setCache(randomx_cache* cache) { }
|
||||
virtual void initScratchpad(void* seed) = 0;
|
||||
|
@ -82,6 +83,7 @@ namespace randomx {
|
|||
void setScratchpad(uint8_t *scratchpad) override;
|
||||
void initScratchpad(void* seed) override;
|
||||
void getFinalResult(void* out, size_t outSize) override;
|
||||
void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) override;
|
||||
|
||||
protected:
|
||||
void generateProgram(void* seed);
|
||||
|
|
|
@ -70,7 +70,7 @@ bool xmrig::Rx::init(const Job &job, const RxConfig &config, bool hugePages)
|
|||
return true;
|
||||
}
|
||||
|
||||
d_ptr->queue.enqueue(job, config.nodeset(), config.threads(), hugePages);
|
||||
d_ptr->queue.enqueue(job, config.nodeset(), config.threads(), hugePages, config.mode());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -69,11 +69,11 @@ public:
|
|||
}
|
||||
|
||||
|
||||
inline void createDataset(bool hugePages)
|
||||
inline void createDataset(bool hugePages, RxConfig::Mode mode)
|
||||
{
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
|
||||
m_dataset = new RxDataset(hugePages, true);
|
||||
m_dataset = new RxDataset(hugePages, true, mode);
|
||||
printAllocStatus(ts);
|
||||
}
|
||||
|
||||
|
@ -150,19 +150,19 @@ xmrig::RxDataset *xmrig::RxBasicStorage::dataset(const Job &job, uint32_t) const
|
|||
std::pair<uint32_t, uint32_t> xmrig::RxBasicStorage::hugePages() const
|
||||
{
|
||||
if (!d_ptr->dataset()) {
|
||||
return { 0u, 0u };
|
||||
return { 0U, 0U };
|
||||
}
|
||||
|
||||
return d_ptr->dataset()->hugePages();
|
||||
}
|
||||
|
||||
|
||||
void xmrig::RxBasicStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages)
|
||||
void xmrig::RxBasicStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode mode)
|
||||
{
|
||||
d_ptr->setSeed(seed);
|
||||
|
||||
if (!d_ptr->dataset()) {
|
||||
d_ptr->createDataset(hugePages);
|
||||
d_ptr->createDataset(hugePages, mode);
|
||||
}
|
||||
|
||||
d_ptr->initDataset(threads);
|
||||
|
|
|
@ -50,7 +50,7 @@ public:
|
|||
protected:
|
||||
RxDataset *dataset(const Job &job, uint32_t nodeId) const override;
|
||||
std::pair<uint32_t, uint32_t> hugePages() const override;
|
||||
void init(const RxSeed &seed, uint32_t threads, bool hugePages) override;
|
||||
void init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode mode) override;
|
||||
|
||||
private:
|
||||
RxBasicStoragePrivate *d_ptr;
|
||||
|
|
|
@ -25,9 +25,54 @@
|
|||
|
||||
#include "crypto/rx/RxConfig.h"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
#include "rapidjson/document.h"
|
||||
|
||||
|
||||
#include <array>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define strcasecmp _stricmp
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
static const std::array<const char *, RxConfig::ModeMax> modeNames = { "auto", "fast", "light" };
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
const char *xmrig::RxConfig::modeName() const
|
||||
{
|
||||
return modeNames[m_mode];
|
||||
}
|
||||
|
||||
|
||||
uint32_t xmrig::RxConfig::threads() const
|
||||
{
|
||||
return m_threads < 1 ? static_cast<uint32_t>(Cpu::info()->threads()) : static_cast<uint32_t>(m_threads);
|
||||
}
|
||||
|
||||
|
||||
xmrig::RxConfig::Mode xmrig::RxConfig::readMode(const rapidjson::Value &value) const
|
||||
{
|
||||
if (value.IsUint()) {
|
||||
return static_cast<Mode>(std::min(value.GetUint(), ModeMax - 1));
|
||||
}
|
||||
|
||||
if (value.IsString()) {
|
||||
auto mode = value.GetString();
|
||||
|
||||
for (size_t i = 0; i < modeNames.size(); i++) {
|
||||
if (strcasecmp(mode, modeNames[i]) == 0) {
|
||||
return static_cast<Mode>(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return AutoMode;
|
||||
}
|
||||
|
|
|
@ -38,6 +38,13 @@ namespace xmrig {
|
|||
class RxConfig
|
||||
{
|
||||
public:
|
||||
enum Mode : uint32_t {
|
||||
AutoMode,
|
||||
FastMode,
|
||||
LightMode,
|
||||
ModeMax
|
||||
};
|
||||
|
||||
bool read(const rapidjson::Value &value);
|
||||
rapidjson::Value toJSON(rapidjson::Document &doc) const;
|
||||
|
||||
|
@ -47,11 +54,17 @@ public:
|
|||
inline std::vector<uint32_t> nodeset() const { return std::vector<uint32_t>(); }
|
||||
# endif
|
||||
|
||||
const char *modeName() const;
|
||||
uint32_t threads() const;
|
||||
|
||||
inline Mode mode() const { return m_mode; }
|
||||
|
||||
private:
|
||||
Mode readMode(const rapidjson::Value &value) const;
|
||||
|
||||
bool m_numa = true;
|
||||
int m_threads = -1;
|
||||
Mode m_mode = AutoMode;
|
||||
|
||||
# ifdef XMRIG_FEATURE_HWLOC
|
||||
std::vector<uint32_t> m_nodeset;
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
namespace xmrig {
|
||||
|
||||
static const char *kInit = "init";
|
||||
static const char *kMode = "mode";
|
||||
|
||||
}
|
||||
|
||||
|
@ -42,6 +43,7 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const
|
|||
|
||||
Value obj(kObjectType);
|
||||
obj.AddMember(StringRef(kInit), m_threads, allocator);
|
||||
obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator);
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
@ -51,6 +53,7 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
|
|||
{
|
||||
if (value.IsObject()) {
|
||||
m_threads = Json::getInt(value, kInit, m_threads);
|
||||
m_mode = readMode(Json::getValue(value, kMode));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
namespace xmrig {
|
||||
|
||||
static const char *kInit = "init";
|
||||
static const char *kMode = "mode";
|
||||
static const char *kNUMA = "numa";
|
||||
|
||||
}
|
||||
|
@ -46,6 +47,7 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const
|
|||
Value obj(kObjectType);
|
||||
|
||||
obj.AddMember(StringRef(kInit), m_threads, allocator);
|
||||
obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator);
|
||||
|
||||
if (!m_nodeset.empty()) {
|
||||
Value numa(kArrayType);
|
||||
|
@ -68,6 +70,13 @@ bool xmrig::RxConfig::read(const rapidjson::Value &value)
|
|||
{
|
||||
if (value.IsObject()) {
|
||||
m_threads = Json::getInt(value, kInit, m_threads);
|
||||
m_mode = readMode(Json::getValue(value, kMode));
|
||||
|
||||
if (m_mode == LightMode) {
|
||||
m_numa = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto &numa = Json::getValue(value, kNUMA);
|
||||
if (numa.IsArray()) {
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
|
||||
|
||||
#include "crypto/rx/RxDataset.h"
|
||||
#include "backend/common/Tags.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
#include "crypto/rx/RxAlgo.h"
|
||||
|
@ -33,12 +35,14 @@
|
|||
|
||||
|
||||
#include <thread>
|
||||
#include <uv.h>
|
||||
|
||||
|
||||
static_assert(RANDOMX_FLAG_LARGE_PAGES == 1, "RANDOMX_FLAG_LARGE_PAGES flag mismatch");
|
||||
|
||||
|
||||
xmrig::RxDataset::RxDataset(bool hugePages, bool cache)
|
||||
xmrig::RxDataset::RxDataset(bool hugePages, bool cache, RxConfig::Mode mode) :
|
||||
m_mode(mode)
|
||||
{
|
||||
allocate(hugePages);
|
||||
|
||||
|
@ -118,7 +122,7 @@ size_t xmrig::RxDataset::size(bool cache) const
|
|||
|
||||
std::pair<uint32_t, uint32_t> xmrig::RxDataset::hugePages(bool cache) const
|
||||
{
|
||||
constexpr size_t twoMiB = 2u * 1024u * 1024u;
|
||||
constexpr size_t twoMiB = 2U * 1024U * 1024U;
|
||||
constexpr size_t cacheSize = VirtualMemory::align(RxCache::maxSize(), twoMiB) / twoMiB;
|
||||
size_t total = VirtualMemory::align(maxSize(), twoMiB) / twoMiB;
|
||||
|
||||
|
@ -157,6 +161,18 @@ void xmrig::RxDataset::setRaw(const void *raw)
|
|||
|
||||
void xmrig::RxDataset::allocate(bool hugePages)
|
||||
{
|
||||
if (m_mode == RxConfig::LightMode) {
|
||||
LOG_ERR(CLEAR "%s" RED_BOLD_S "fast RandomX mode disabled by config", rx_tag());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_mode == RxConfig::AutoMode && uv_get_total_memory() < (maxSize() + RxCache::maxSize())) {
|
||||
LOG_ERR(CLEAR "%s" RED_BOLD_S "not enough memory for RandomX dataset", rx_tag());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (hugePages) {
|
||||
m_flags = RANDOMX_FLAG_LARGE_PAGES;
|
||||
m_dataset = randomx_alloc_dataset(static_cast<randomx_flags>(m_flags));
|
||||
|
|
|
@ -28,9 +28,10 @@
|
|||
#define XMRIG_RX_DATASET_H
|
||||
|
||||
|
||||
#include "base/tools/Object.h"
|
||||
#include "crypto/common/Algorithm.h"
|
||||
#include "crypto/randomx/configuration.h"
|
||||
#include "base/tools/Object.h"
|
||||
#include "crypto/rx/RxConfig.h"
|
||||
|
||||
|
||||
struct randomx_dataset;
|
||||
|
@ -49,7 +50,7 @@ class RxDataset
|
|||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(RxDataset)
|
||||
|
||||
RxDataset(bool hugePages, bool cache);
|
||||
RxDataset(bool hugePages, bool cache, RxConfig::Mode mode);
|
||||
RxDataset(RxCache *cache);
|
||||
~RxDataset();
|
||||
|
||||
|
@ -69,9 +70,10 @@ public:
|
|||
private:
|
||||
void allocate(bool hugePages);
|
||||
|
||||
int m_flags = 0;
|
||||
randomx_dataset *m_dataset = nullptr;
|
||||
RxCache *m_cache = nullptr;
|
||||
const RxConfig::Mode m_mode = RxConfig::FastMode;
|
||||
int m_flags = 0;
|
||||
randomx_dataset *m_dataset = nullptr;
|
||||
RxCache *m_cache = nullptr;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -198,7 +198,7 @@ private:
|
|||
return;
|
||||
}
|
||||
|
||||
auto dataset = new RxDataset(hugePages, false);
|
||||
auto dataset = new RxDataset(hugePages, false, RxConfig::FastMode);
|
||||
if (!dataset->get()) {
|
||||
printSkipped(nodeId, "failed to allocate dataset");
|
||||
|
||||
|
@ -339,14 +339,14 @@ xmrig::RxDataset *xmrig::RxNUMAStorage::dataset(const Job &job, uint32_t nodeId)
|
|||
std::pair<uint32_t, uint32_t> xmrig::RxNUMAStorage::hugePages() const
|
||||
{
|
||||
if (!d_ptr->isAllocated()) {
|
||||
return { 0u, 0u };
|
||||
return { 0U, 0U };
|
||||
}
|
||||
|
||||
return d_ptr->hugePages();
|
||||
}
|
||||
|
||||
|
||||
void xmrig::RxNUMAStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages)
|
||||
void xmrig::RxNUMAStorage::init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode)
|
||||
{
|
||||
d_ptr->setSeed(seed);
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ public:
|
|||
protected:
|
||||
RxDataset *dataset(const Job &job, uint32_t nodeId) const override;
|
||||
std::pair<uint32_t, uint32_t> hugePages() const override;
|
||||
void init(const RxSeed &seed, uint32_t threads, bool hugePages) override;
|
||||
void init(const RxSeed &seed, uint32_t threads, bool hugePages, RxConfig::Mode mode) override;
|
||||
|
||||
private:
|
||||
RxNUMAStoragePrivate *d_ptr;
|
||||
|
|
|
@ -90,11 +90,11 @@ std::pair<uint32_t, uint32_t> xmrig::RxQueue::hugePages()
|
|||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
return m_storage && m_state == STATE_IDLE ? m_storage->hugePages() : std::pair<uint32_t, uint32_t>(0u, 0u);
|
||||
return m_storage && m_state == STATE_IDLE ? m_storage->hugePages() : std::pair<uint32_t, uint32_t>(0U, 0U);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::RxQueue::enqueue(const RxSeed &seed, const std::vector<uint32_t> &nodeset, uint32_t threads, bool hugePages)
|
||||
void xmrig::RxQueue::enqueue(const RxSeed &seed, const std::vector<uint32_t> &nodeset, uint32_t threads, bool hugePages, RxConfig::Mode mode)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(m_mutex);
|
||||
|
||||
|
@ -114,7 +114,7 @@ void xmrig::RxQueue::enqueue(const RxSeed &seed, const std::vector<uint32_t> &no
|
|||
return;
|
||||
}
|
||||
|
||||
m_queue.emplace_back(seed, nodeset, threads, hugePages);
|
||||
m_queue.emplace_back(seed, nodeset, threads, hugePages, mode);
|
||||
m_seed = seed;
|
||||
m_state = STATE_PENDING;
|
||||
|
||||
|
@ -156,7 +156,7 @@ void xmrig::RxQueue::backgroundInit()
|
|||
Buffer::toHex(item.seed.data().data(), 8).data()
|
||||
);
|
||||
|
||||
m_storage->init(item.seed, item.threads, item.hugePages);
|
||||
m_storage->init(item.seed, item.threads, item.hugePages, item.mode);
|
||||
|
||||
lock = std::unique_lock<std::mutex>(m_mutex);
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
|
||||
|
||||
#include "base/tools/Object.h"
|
||||
#include "crypto/rx/RxConfig.h"
|
||||
#include "crypto/rx/RxSeed.h"
|
||||
|
||||
|
||||
|
@ -52,14 +53,16 @@ class RxDataset;
|
|||
class RxQueueItem
|
||||
{
|
||||
public:
|
||||
RxQueueItem(const RxSeed &seed, const std::vector<uint32_t> &nodeset, uint32_t threads, bool hugePages) :
|
||||
RxQueueItem(const RxSeed &seed, const std::vector<uint32_t> &nodeset, uint32_t threads, bool hugePages, RxConfig::Mode mode) :
|
||||
hugePages(hugePages),
|
||||
mode(mode),
|
||||
seed(seed),
|
||||
nodeset(nodeset),
|
||||
threads(threads)
|
||||
{}
|
||||
|
||||
const bool hugePages;
|
||||
const RxConfig::Mode mode;
|
||||
const RxSeed seed;
|
||||
const std::vector<uint32_t> nodeset;
|
||||
const uint32_t threads;
|
||||
|
@ -77,7 +80,7 @@ public:
|
|||
bool isReady(const Job &job);
|
||||
RxDataset *dataset(const Job &job, uint32_t nodeId);
|
||||
std::pair<uint32_t, uint32_t> hugePages();
|
||||
void enqueue(const RxSeed &seed, const std::vector<uint32_t> &nodeset, uint32_t threads, bool hugePages);
|
||||
void enqueue(const RxSeed &seed, const std::vector<uint32_t> &nodeset, uint32_t threads, bool hugePages, RxConfig::Mode mode);
|
||||
|
||||
private:
|
||||
enum State {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue