Merge xmrig v6.7.0 into master

This commit is contained in:
MoneroOcean 2020-12-23 06:03:02 +00:00
commit 1719879f7e
249 changed files with 6814 additions and 6134 deletions

View file

@ -28,12 +28,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <thread>
#include <vector>
#include <array>
#include "crypto/randomx/aes_hash.hpp"
#include "crypto/randomx/soft_aes.h"
#include "crypto/randomx/randomx.h"
#include "base/tools/Chrono.h"
#include "base/tools/Profiler.h"
#include "crypto/randomx/randomx.h"
#include "crypto/randomx/soft_aes.h"
#include "crypto/rx/Profiler.h"
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
@ -371,7 +372,7 @@ hashAndFillAes1Rx4_impl* softAESImpl = &hashAndFillAes1Rx4<1,1>;
void SelectSoftAESImpl(size_t threadsCount)
{
constexpr int test_length_ms = 100;
const std::vector<hashAndFillAes1Rx4_impl *> impl = {
const std::array<hashAndFillAes1Rx4_impl *, 4> impl = {
&hashAndFillAes1Rx4<1,1>,
&hashAndFillAes1Rx4<2,1>,
&hashAndFillAes1Rx4<2,2>,

View file

@ -0,0 +1,28 @@
r0_avx2_increments:
db 2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0
mul_hi_avx2_data:
db 0,0,0,0,1,0,0,0
r0_avx2_mul:
;#/ 6364136223846793005
db 45, 127, 149, 76, 45, 244, 81, 88
r1_avx2_add:
;#/ 9298411001130361340
db 252, 161, 245, 89, 138, 151, 10, 129
r2_avx2_add:
;#/ 12065312585734608966
db 70, 216, 194, 56, 223, 153, 112, 167
r3_avx2_add:
;#/ 9306329213124626780
db 92, 73, 34, 191, 28, 185, 38, 129
r4_avx2_add:
;#/ 5281919268842080866
db 98, 138, 159, 23, 151, 37, 77, 73
r5_avx2_add:
;#/ 10536153434571861004
db 12, 236, 170, 206, 185, 239, 55, 146
r6_avx2_add:
;#/ 3398623926847679864
db 120, 45, 230, 108, 116, 86, 42, 47
r7_avx2_add:
;#/ 9549104520008361294
db 78, 229, 44, 182, 247, 59, 133, 132

View file

@ -0,0 +1,31 @@
add rsp, 40
pop r9
movdqu xmm0, xmmword ptr [rsp]
movdqu xmm1, xmmword ptr [rsp + 16]
movdqu xmm2, xmmword ptr [rsp + 32]
movdqu xmm3, xmmword ptr [rsp + 48]
movdqu xmm4, xmmword ptr [rsp + 64]
movdqu xmm5, xmmword ptr [rsp + 80]
movdqu xmm6, xmmword ptr [rsp + 96]
movdqu xmm7, xmmword ptr [rsp + 112]
movdqu xmm8, xmmword ptr [rsp + 128]
movdqu xmm9, xmmword ptr [rsp + 144]
movdqu xmm10, xmmword ptr [rsp + 160]
movdqu xmm11, xmmword ptr [rsp + 176]
movdqu xmm12, xmmword ptr [rsp + 192]
movdqu xmm13, xmmword ptr [rsp + 208]
movdqu xmm14, xmmword ptr [rsp + 224]
movdqu xmm15, xmmword ptr [rsp + 240]
vzeroupper
add rsp, 256
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rdi
pop rbp
pop rbx
ret

View file

@ -0,0 +1,37 @@
;# prefetch RandomX dataset lines
prefetchnta byte ptr [rsi]
prefetchnta byte ptr [rsi+64]
prefetchnta byte ptr [rsi+128]
prefetchnta byte ptr [rsi+192]
prefetchnta byte ptr [rsi+256]
;# prefetch RandomX cache lines
mov rbx, rbp
and rbx, RANDOMX_CACHE_MASK
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rax, [rbp+1]
and rax, RANDOMX_CACHE_MASK
shl rax, 6
add rax, rdi
prefetchnta byte ptr [rax]
mov [rsp], rax
lea rax, [rbp+2]
and rax, RANDOMX_CACHE_MASK
shl rax, 6
add rax, rdi
prefetchnta byte ptr [rax]
mov [rsp+8], rax
lea rax, [rbp+3]
and rax, RANDOMX_CACHE_MASK
shl rax, 6
add rax, rdi
prefetchnta byte ptr [rax]
mov [rsp+16], rax
lea rax, [rbp+4]
and rax, RANDOMX_CACHE_MASK
shl rax, 6
add rax, rdi
prefetchnta byte ptr [rax]
mov [rsp+24], rax

View file

@ -0,0 +1,38 @@
mov qword ptr [rsi+0], r8
vpunpcklqdq ymm8, ymm0, ymm1
mov qword ptr [rsi+8], r9
vpunpcklqdq ymm9, ymm2, ymm3
mov qword ptr [rsi+16], r10
vpunpcklqdq ymm10, ymm4, ymm5
mov qword ptr [rsi+24], r11
vpunpcklqdq ymm11, ymm6, ymm7
mov qword ptr [rsi+32], r12
vpunpckhqdq ymm12, ymm0, ymm1
mov qword ptr [rsi+40], r13
vpunpckhqdq ymm13, ymm2, ymm3
mov qword ptr [rsi+48], r14
vpunpckhqdq ymm14, ymm4, ymm5
mov qword ptr [rsi+56], r15
vpunpckhqdq ymm15, ymm6, ymm7
vperm2i128 ymm0, ymm8, ymm9, 32
vperm2i128 ymm1, ymm10, ymm11, 32
vmovdqu ymmword ptr [rsi+64], ymm0
vmovdqu ymmword ptr [rsi+96], ymm1
vperm2i128 ymm2, ymm12, ymm13, 32
vperm2i128 ymm3, ymm14, ymm15, 32
vmovdqu ymmword ptr [rsi+128], ymm2
vmovdqu ymmword ptr [rsi+160], ymm3
vperm2i128 ymm4, ymm8, ymm9, 49
vperm2i128 ymm5, ymm10, ymm11, 49
vmovdqu ymmword ptr [rsi+192], ymm4
vmovdqu ymmword ptr [rsi+224], ymm5
vperm2i128 ymm6, ymm12, ymm13, 49
vperm2i128 ymm7, ymm14, ymm15, 49
vmovdqu ymmword ptr [rsi+256], ymm6
vmovdqu ymmword ptr [rsi+288], ymm7
add rbp, 5
add rsi, 320
cmp rbp, qword ptr [rsp+40]
db 15, 130, 0, 0, 0, 0 ;# jb rel32

View file

@ -0,0 +1,27 @@
push rbx
push rbp
push rdi
push rsi
push r12
push r13
push r14
push r15
;# save all XMM registers just to be safe for all calling conventions
sub rsp, 256
movdqu xmmword ptr [rsp], xmm0
movdqu xmmword ptr [rsp + 16], xmm1
movdqu xmmword ptr [rsp + 32], xmm2
movdqu xmmword ptr [rsp + 48], xmm3
movdqu xmmword ptr [rsp + 64], xmm4
movdqu xmmword ptr [rsp + 80], xmm5
movdqu xmmword ptr [rsp + 96], xmm6
movdqu xmmword ptr [rsp + 112], xmm7
movdqu xmmword ptr [rsp + 128], xmm8
movdqu xmmword ptr [rsp + 144], xmm9
movdqu xmmword ptr [rsp + 160], xmm10
movdqu xmmword ptr [rsp + 176], xmm11
movdqu xmmword ptr [rsp + 192], xmm12
movdqu xmmword ptr [rsp + 208], xmm13
movdqu xmmword ptr [rsp + 224], xmm14
movdqu xmmword ptr [rsp + 240], xmm15

View file

@ -0,0 +1,50 @@
sub rsp, 40
mov [rsp], rbx
vmovdqu ymmword ptr [rsp+8], ymm14
mov rax, [rsp+40]
mov rbx, [rsp+48]
mov rcx, [rsp+56]
mov rdx, [rsp+64]
vmovdqu ymm8, ymmword ptr [rax] ;# ymm8 = r0[1], r1[1], r2[1], r3[1]
vmovdqu ymm9, ymmword ptr [rbx] ;# ymm9 = r0[2], r1[2], r2[2], r3[2]
vmovdqu ymm10, ymmword ptr [rcx] ;# ymm10 = r0[3], r1[3], r2[3], r3[3]
vmovdqu ymm11, ymmword ptr [rdx] ;# ymm11 = r0[4], r1[4], r2[4], r3[4]
vpunpcklqdq ymm12, ymm8, ymm9 ;# ymm12 = r0[1], r0[2], r2[1], r2[2]
vpunpcklqdq ymm13, ymm10, ymm11 ;# ymm13 = r0[3], r0[4], r2[3], r2[4]
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r0[1], r0[2], r0[3], r0[4]
vpxor ymm0, ymm0, ymm14
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r2[1], r2[2], r2[3], r2[4]
vpxor ymm2, ymm2, ymm14
vpunpckhqdq ymm12, ymm8, ymm9 ;# ymm12 = r1[1], r1[2], r3[1], r3[2]
vpunpckhqdq ymm13, ymm10, ymm11 ;# ymm13 = r1[3], r1[4], r3[3], r3[4]
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r1[1], r1[2], r1[3], r1[4]
vpxor ymm1, ymm1, ymm14
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r3[1], r3[2], r3[3], r3[4]
vpxor ymm3, ymm3, ymm14
vmovdqu ymm8, ymmword ptr [rax+32] ;# ymm8 = r4[1], r5[1], r6[1], r7[1]
vmovdqu ymm9, ymmword ptr [rbx+32] ;# ymm9 = r4[2], r5[2], r6[2], r7[2]
vmovdqu ymm10, ymmword ptr [rcx+32] ;# ymm10 = r4[3], r5[3], r6[3], r7[3]
vmovdqu ymm11, ymmword ptr [rdx+32] ;# ymm11 = r4[4], r5[4], r6[4], r7[4]
vpunpcklqdq ymm12, ymm8, ymm9 ;# ymm12 = r4[1], r4[2], r6[1], r6[2]
vpunpcklqdq ymm13, ymm10, ymm11 ;# ymm13 = r4[3], r4[4], r6[3], r6[4]
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r4[1], r4[2], r4[3], r4[4]
vpxor ymm4, ymm4, ymm14
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r6[1], r6[2], r6[3], r6[4]
vpxor ymm6, ymm6, ymm14
vpunpckhqdq ymm12, ymm8, ymm9 ;# ymm12 = r5[1], r5[2], r7[1], r7[2]
vpunpckhqdq ymm13, ymm10, ymm11 ;# ymm13 = r5[3], r5[4], r7[3], r7[4]
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r5[1], r5[2], r5[3], r5[4]
vpxor ymm5, ymm5, ymm14
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r7[1], r7[2], r7[3], r7[4]
vpxor ymm7, ymm7, ymm14
mov rbx, [rsp]
vmovdqu ymm14, ymmword ptr [rsp+8]
add rsp, 40

View file

@ -0,0 +1,29 @@
vmovdqu ymmword ptr [rsp], ymm0
mov rax, [rsp]
and rax, RANDOMX_CACHE_MASK
shl rax, 6
add rax, rdi
mov [rsp], rax
prefetchnta byte ptr [rax]
mov rax, [rsp+8]
and rax, RANDOMX_CACHE_MASK
shl rax, 6
add rax, rdi
mov [rsp+8], rax
prefetchnta byte ptr [rax]
mov rax, [rsp+16]
and rax, RANDOMX_CACHE_MASK
shl rax, 6
add rax, rdi
mov [rsp+16], rax
prefetchnta byte ptr [rax]
mov rax, [rsp+24]
and rax, RANDOMX_CACHE_MASK
shl rax, 6
add rax, rdi
mov [rsp+24], rax
prefetchnta byte ptr [rax]

View file

@ -1,5 +1,7 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
All rights reserved.
@ -59,10 +61,11 @@ namespace randomx {
template<class Allocator>
void deallocCache(randomx_cache* cache) {
if (cache->memory != nullptr)
if (cache->memory != nullptr) {
Allocator::freeMemory(cache->memory, RANDOMX_CACHE_MAX_SIZE);
if (cache->jit != nullptr)
delete cache->jit;
}
delete cache->jit;
}
template void deallocCache<DefaultAllocator>(randomx_cache* cache);
@ -77,16 +80,16 @@ namespace randomx {
context.pwdlen = (uint32_t)keySize;
context.salt = CONST_CAST(uint8_t *)RandomX_CurrentConfig.ArgonSalt;
context.saltlen = (uint32_t)strlen(RandomX_CurrentConfig.ArgonSalt);
context.secret = NULL;
context.secret = nullptr;
context.secretlen = 0;
context.ad = NULL;
context.ad = nullptr;
context.adlen = 0;
context.t_cost = RandomX_CurrentConfig.ArgonIterations;
context.m_cost = RandomX_CurrentConfig.ArgonMemory;
context.lanes = RandomX_CurrentConfig.ArgonLanes;
context.threads = 1;
context.allocate_cbk = NULL;
context.free_cbk = NULL;
context.allocate_cbk = nullptr;
context.free_cbk = nullptr;
context.flags = ARGON2_DEFAULT_FLAGS;
context.version = ARGON2_VERSION_NUMBER;
@ -100,8 +103,18 @@ namespace randomx {
void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) {
initCache(cache, key, keySize);
# ifdef XMRIG_SECURE_JIT
cache->jit->enableWriting();
# endif
cache->jit->generateSuperscalarHash(cache->programs);
cache->jit->generateDatasetInitCode();
cache->datasetInit = cache->jit->getDatasetInitFunc();
# ifdef XMRIG_SECURE_JIT
cache->jit->enableExecution();
# endif
}
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;

View file

@ -48,7 +48,7 @@ struct randomx_cache {
randomx::DatasetInitFunc* datasetInit;
randomx::SuperscalarProgram programs[RANDOMX_CACHE_MAX_ACCESSES];
bool isInitialized() {
bool isInitialized() const {
return programs[0].getSize() != 0;
}
};

View file

@ -1,6 +1,7 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019, SChernykh <https://github.com/SChernykh>
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
All rights reserved.
@ -28,18 +29,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "crypto/randomx/jit_compiler_a64.hpp"
#include "crypto/randomx/superscalar.hpp"
#include "crypto/common/VirtualMemory.h"
#include "crypto/randomx/program.hpp"
#include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/superscalar.hpp"
#include "crypto/randomx/virtual_memory.hpp"
static bool hugePagesJIT = false;
static int optimizedDatasetInit = -1;
void randomx_set_huge_pages_jit(bool hugePages)
{
hugePagesJIT = hugePages;
}
void randomx_set_optimized_dataset_init(int value)
{
optimizedDatasetInit = value;
}
namespace ARMV8A {
constexpr uint32_t B = 0x14000000;
@ -96,37 +104,28 @@ static size_t CalcDatasetItemSize()
constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
JitCompilerA64::JitCompilerA64(bool hugePagesEnable)
: code((uint8_t*) allocExecutableMemory(CodeSize + CalcDatasetItemSize(), hugePagesJIT && hugePagesEnable))
, literalPos(ImulRcpLiteralsEnd)
, num32bitLiterals(0)
JitCompilerA64::JitCompilerA64(bool hugePagesEnable, bool) :
hugePages(hugePagesJIT && hugePagesEnable),
literalPos(ImulRcpLiteralsEnd)
{
memset(reg_changed_offset, 0, sizeof(reg_changed_offset));
memcpy(code, (void*) randomx_program_aarch64, CodeSize);
}
JitCompilerA64::~JitCompilerA64()
{
freePagedMemory(code, CodeSize + CalcDatasetItemSize());
}
#if defined(ios_HOST_OS) || defined (darwin_HOST_OS)
void sys_icache_invalidate(void *start, size_t len);
#endif
static void clear_code_cache(char* p1, char* p2)
{
# if defined(ios_HOST_OS) || defined (darwin_HOST_OS)
sys_icache_invalidate(p1, static_cast<size_t>(p2 - p1));
# elif defined (HAVE_BUILTIN_CLEAR_CACHE) || defined (__GNUC__)
__builtin___clear_cache(p1, p2);
# else
# error "No clear code cache function found"
# endif
freePagedMemory(code, allocatedSize);
}
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config, uint32_t)
{
if (!allocatedSize) {
allocate(CodeSize);
}
#ifdef XMRIG_SECURE_JIT
else {
enableWriting();
}
#endif
uint32_t codePos = MainLoopBegin + 4;
// and w16, w10, ScratchpadL3Mask64
@ -171,11 +170,22 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
clear_code_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
# ifndef XMRIG_OS_APPLE
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
# endif
}
void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset)
{
if (!allocatedSize) {
allocate(CodeSize);
}
#ifdef XMRIG_SECURE_JIT
else {
enableWriting();
}
#endif
uint32_t codePos = MainLoopBegin + 4;
// and w16, w10, ScratchpadL3Mask64
@ -226,12 +236,23 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos);
emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos);
clear_code_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
# ifndef XMRIG_OS_APPLE
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
# endif
}
template<size_t N>
void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N])
{
if (!allocatedSize) {
allocate(CodeSize + CalcDatasetItemSize());
}
#ifdef XMRIG_SECURE_JIT
else {
enableWriting();
}
#endif
uint32_t codePos = CodeSize;
uint8_t* p1 = (uint8_t*)randomx_calc_dataset_item_aarch64;
@ -342,13 +363,19 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N])
memcpy(code + codePos, p1, p2 - p1);
codePos += p2 - p1;
clear_code_cache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
# ifndef XMRIG_OS_APPLE
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
# endif
}
template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES]);
DatasetInitFunc* JitCompilerA64::getDatasetInitFunc()
DatasetInitFunc* JitCompilerA64::getDatasetInitFunc() const
{
# ifdef XMRIG_SECURE_JIT
enableExecution();
# endif
return (DatasetInitFunc*)(code + (((uint8_t*)randomx_init_dataset_aarch64) - ((uint8_t*)randomx_program_aarch64)));
}
@ -357,6 +384,26 @@ size_t JitCompilerA64::getCodeSize()
return CodeSize;
}
void JitCompilerA64::enableWriting() const
{
xmrig::VirtualMemory::protectRW(code, allocatedSize);
}
void JitCompilerA64::enableExecution() const
{
xmrig::VirtualMemory::protectRX(code, allocatedSize);
}
void JitCompilerA64::allocate(size_t size)
{
allocatedSize = size;
code = static_cast<uint8_t*>(allocExecutableMemory(allocatedSize, hugePages));
memcpy(code, reinterpret_cast<const void *>(randomx_program_aarch64), CodeSize);
}
void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos)
{
uint32_t k = codePos;

View file

@ -1,6 +1,7 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019, SChernykh <https://github.com/SChernykh>
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
All rights reserved.
@ -46,7 +47,7 @@ namespace randomx {
class JitCompilerA64 {
public:
explicit JitCompilerA64(bool hugePagesEnable);
explicit JitCompilerA64(bool hugePagesEnable, bool optimizedInitDatasetEnable);
~JitCompilerA64();
void prepare() {}
@ -58,16 +59,32 @@ namespace randomx {
void generateDatasetInitCode() {}
ProgramFunc* getProgramFunc() { return reinterpret_cast<ProgramFunc*>(code); }
DatasetInitFunc* getDatasetInitFunc();
inline ProgramFunc *getProgramFunc() const {
# ifdef XMRIG_SECURE_JIT
enableExecution();
# endif
return reinterpret_cast<ProgramFunc*>(code);
}
DatasetInitFunc* getDatasetInitFunc() const;
uint8_t* getCode() { return code; }
size_t getCodeSize();
void enableWriting() const;
void enableExecution() const;
static InstructionGeneratorA64 engine[256];
uint32_t reg_changed_offset[8];
uint8_t* code;
private:
const bool hugePages;
uint32_t reg_changed_offset[8]{};
uint8_t* code = nullptr;
uint32_t literalPos;
uint32_t num32bitLiterals;
uint32_t num32bitLiterals = 0;
size_t allocatedSize = 0;
void allocate(size_t size);
static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos)
{
@ -90,6 +107,7 @@ namespace randomx {
template<uint32_t tmp_reg_fp>
void emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos);
public:
void h_IADD_RS(Instruction&, uint32_t&);
void h_IADD_M(Instruction&, uint32_t&);
void h_ISUB_R(Instruction&, uint32_t&);

View file

@ -35,3 +35,6 @@ void randomx_set_huge_pages_jit(bool)
{
}
void randomx_set_optimized_dataset_init(int)
{
}

View file

@ -1,5 +1,7 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
All rights reserved.
@ -41,7 +43,7 @@ namespace randomx {
class JitCompilerFallback {
public:
explicit JitCompilerFallback(bool) {
explicit JitCompilerFallback(bool, bool) {
throw std::runtime_error("JIT compilation is not supported on this platform");
}
void prepare() {}
@ -70,5 +72,7 @@ namespace randomx {
size_t getCodeSize() {
return 0;
}
void enableWriting() {}
void enableExecution() {}
};
}

View file

@ -1,5 +1,7 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
All rights reserved.
@ -30,14 +32,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstring>
#include <climits>
#include <atomic>
#include "crypto/randomx/jit_compiler_x86.hpp"
#include "backend/cpu/Cpu.h"
#include "crypto/common/VirtualMemory.h"
#include "crypto/randomx/jit_compiler_x86_static.hpp"
#include "crypto/randomx/superscalar.hpp"
#include "crypto/randomx/program.hpp"
#include "crypto/randomx/reciprocal.h"
#include "crypto/randomx/superscalar.hpp"
#include "crypto/randomx/virtual_memory.hpp"
#include "base/tools/Profiler.h"
#include "backend/cpu/Cpu.h"
#include "crypto/rx/Profiler.h"
#ifdef XMRIG_FIX_RYZEN
# include "crypto/rx/Rx.h"
@ -45,17 +49,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef _MSC_VER
# include <intrin.h>
#else
# include <cpuid.h>
#endif
static bool hugePagesJIT = false;
static int optimizedDatasetInit = -1;
void randomx_set_huge_pages_jit(bool hugePages)
{
hugePagesJIT = hugePages;
}
void randomx_set_optimized_dataset_init(int value)
{
optimizedDatasetInit = value;
}
namespace randomx {
/*
@ -112,6 +120,11 @@ namespace randomx {
#define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init)
#define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin)
#define codeDatasetInit ADDR(randomx_dataset_init)
#define codeDatasetInitAVX2_prologue ADDR(randomx_dataset_init_avx2_prologue)
#define codeDatasetInitAVX2_loop_end ADDR(randomx_dataset_init_avx2_loop_end)
#define codeDatasetInitAVX2_loop_epilogue ADDR(randomx_dataset_init_avx2_epilogue)
#define codeDatasetInitAVX2_ssh_load ADDR(randomx_dataset_init_avx2_ssh_load)
#define codeDatasetInitAVX2_ssh_prefetch ADDR(randomx_dataset_init_avx2_ssh_prefetch)
#define codeLoopStore ADDR(randomx_program_loop_store)
#define codeLoopEnd ADDR(randomx_program_loop_end)
#define codeEpilogue ADDR(randomx_program_epilogue)
@ -128,7 +141,12 @@ namespace randomx {
#define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit)
#define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin)
#define loopStoreSize (codeLoopEnd - codeLoopStore)
#define datasetInitSize (codeEpilogue - codeDatasetInit)
#define datasetInitSize (codeDatasetInitAVX2_prologue - codeDatasetInit)
#define datasetInitAVX2_prologue_size (codeDatasetInitAVX2_loop_end - codeDatasetInitAVX2_prologue)
#define datasetInitAVX2_loop_end_size (codeDatasetInitAVX2_loop_epilogue - codeDatasetInitAVX2_loop_end)
#define datasetInitAVX2_epilogue_size (codeDatasetInitAVX2_ssh_load - codeDatasetInitAVX2_loop_epilogue)
#define datasetInitAVX2_ssh_load_size (codeDatasetInitAVX2_ssh_prefetch - codeDatasetInitAVX2_ssh_load)
#define datasetInitAVX2_ssh_prefetch_size (codeEpilogue - codeDatasetInitAVX2_ssh_prefetch)
#define epilogueSize (codeShhLoad - codeEpilogue)
#define codeSshLoadSize (codeShhPrefetch - codeShhLoad)
#define codeSshPrefetchSize (codeShhEnd - codeShhPrefetch)
@ -166,20 +184,27 @@ namespace randomx {
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
};
static inline uint8_t* alignToPage(uint8_t* p, size_t pageSize) {
size_t k = (size_t) p;
k -= k % pageSize;
return (uint8_t*) k;
}
size_t JitCompilerX86::getCodeSize() {
return codePos < prologueSize ? 0 : codePos - prologueSize;
}
static inline void cpuid(uint32_t level, int32_t output[4])
{
memset(output, 0, sizeof(int32_t) * 4);
void JitCompilerX86::enableWriting() const {
uint8_t* p1 = alignToPage(code, 4096);
uint8_t* p2 = code + CodeSize;
xmrig::VirtualMemory::protectRW(p1, p2 - p1);
}
# ifdef _MSC_VER
__cpuid(output, static_cast<int>(level));
# else
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
# endif
}
void JitCompilerX86::enableExecution() const {
uint8_t* p1 = alignToPage(code, 4096);
uint8_t* p2 = code + CodeSize;
xmrig::VirtualMemory::protectRX(p1, p2 - p1);
}
# ifdef _MSC_VER
static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return _rotl(a, shift); }
@ -190,17 +215,68 @@ namespace randomx {
static std::atomic<size_t> codeOffset;
constexpr size_t codeOffsetIncrement = 59 * 64;
JitCompilerX86::JitCompilerX86(bool hugePagesEnable) {
JitCompilerX86::JitCompilerX86(bool hugePagesEnable, bool optimizedInitDatasetEnable) {
BranchesWithin32B = xmrig::Cpu::info()->jccErratum();
int32_t info[4];
cpuid(1, info);
hasAVX = ((info[2] & (1 << 27)) != 0) && ((info[2] & (1 << 28)) != 0);
hasAVX = xmrig::Cpu::info()->hasAVX();
hasAVX2 = xmrig::Cpu::info()->hasAVX2();
cpuid(0x80000001, info);
hasXOP = ((info[2] & (1 << 11)) != 0);
// Disable by default
initDatasetAVX2 = false;
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2, hugePagesJIT && hugePagesEnable);
if (optimizedInitDatasetEnable) {
// Dataset init using AVX2:
// -1 = Auto detect
// 0 = Always disabled
// +1 = Always enabled
if (optimizedDatasetInit > 0) {
initDatasetAVX2 = true;
}
else if (optimizedDatasetInit < 0) {
xmrig::ICpuInfo::Vendor vendor = xmrig::Cpu::info()->vendor();
xmrig::ICpuInfo::Arch arch = xmrig::Cpu::info()->arch();
if (vendor == xmrig::ICpuInfo::VENDOR_INTEL) {
// AVX2 init is faster on Intel CPUs without HT
initDatasetAVX2 = (xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads());
}
else if (vendor == xmrig::ICpuInfo::VENDOR_AMD) {
switch (arch) {
case xmrig::ICpuInfo::ARCH_ZEN:
case xmrig::ICpuInfo::ARCH_ZEN_PLUS:
default:
// AVX2 init is slower on Zen/Zen+
// Also disable it for other unknown architectures
initDatasetAVX2 = false;
break;
case xmrig::ICpuInfo::ARCH_ZEN2:
// AVX2 init is faster on Zen2 without SMT (mobile CPUs)
initDatasetAVX2 = (xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads());
break;
case xmrig::ICpuInfo::ARCH_ZEN3:
// AVX2 init is faster on Zen3
initDatasetAVX2 = true;
break;
}
}
}
}
// Sorry, low-end Intel CPUs
if (!hasAVX2) {
initDatasetAVX2 = false;
}
hasXOP = xmrig::Cpu::info()->hasXOP();
allocatedSize = initDatasetAVX2 ? (CodeSize * 4) : (CodeSize * 2);
allocatedCode = static_cast<uint8_t*>(allocExecutableMemory(allocatedSize,
# ifdef XMRIG_SECURE_JIT
false
# else
hugePagesJIT && hugePagesEnable
# endif
));
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
code = allocatedCode + (codeOffset.fetch_add(codeOffsetIncrement) % CodeSize);
@ -224,7 +300,7 @@ namespace randomx {
JitCompilerX86::~JitCompilerX86() {
codeOffset.fetch_sub(codeOffsetIncrement);
freePagedMemory(allocatedCode, CodeSize);
freePagedMemory(allocatedCode, allocatedSize);
}
void JitCompilerX86::prepare() {
@ -237,6 +313,10 @@ namespace randomx {
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
PROFILE_SCOPE(RandomX_JIT_compile);
# ifdef XMRIG_SECURE_JIT
enableWriting();
# endif
vm_flags = flags;
generateProgramPrologue(prog, pcfg);
@ -271,14 +351,49 @@ namespace randomx {
template<size_t N>
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
uint8_t* p = code;
if (initDatasetAVX2) {
codePos = 0;
emit(codeDatasetInitAVX2_prologue, datasetInitAVX2_prologue_size, code, codePos);
for (unsigned j = 0; j < RandomX_CurrentConfig.CacheAccesses; ++j) {
SuperscalarProgram& prog = programs[j];
uint32_t pos = codePos;
for (uint32_t i = 0, n = prog.getSize(); i < n; ++i) {
generateSuperscalarCode<true>(prog(i), p, pos);
}
codePos = pos;
emit(codeShhLoad, codeSshLoadSize, code, codePos);
emit(codeDatasetInitAVX2_ssh_load, datasetInitAVX2_ssh_load_size, code, codePos);
if (j < RandomX_CurrentConfig.CacheAccesses - 1) {
*(uint32_t*)(code + codePos) = 0xd88b49 + (static_cast<uint32_t>(prog.getAddressRegister()) << 16);
codePos += 3;
emit(RandomX_CurrentConfig.codeShhPrefetchTweaked, codeSshPrefetchSize, code, codePos);
uint8_t* p = code + codePos;
emit(codeDatasetInitAVX2_ssh_prefetch, datasetInitAVX2_ssh_prefetch_size, code, codePos);
p[3] += prog.getAddressRegister() << 3;
}
}
emit(codeDatasetInitAVX2_loop_end, datasetInitAVX2_loop_end_size, code, codePos);
// Number of bytes from the start of randomx_dataset_init_avx2_prologue to loop_begin label
constexpr int32_t prologue_size = 320;
*(int32_t*)(code + codePos - 4) = prologue_size - codePos;
emit(codeDatasetInitAVX2_loop_epilogue, datasetInitAVX2_epilogue_size, code, codePos);
return;
}
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
codePos = superScalarHashOffset + codeSshInitSize;
for (unsigned j = 0; j < RandomX_CurrentConfig.CacheAccesses; ++j) {
SuperscalarProgram& prog = programs[j];
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
generateSuperscalarCode(instr);
uint32_t pos = codePos;
for (uint32_t i = 0, n = prog.getSize(); i < n; ++i) {
generateSuperscalarCode<false>(prog(i), p, pos);
}
codePos = pos;
emit(codeShhLoad, codeSshLoadSize, code, codePos);
if (j < RandomX_CurrentConfig.CacheAccesses - 1) {
*(uint32_t*)(code + codePos) = 0xd88b49 + (static_cast<uint32_t>(prog.getAddressRegister()) << 16);
@ -293,7 +408,10 @@ namespace randomx {
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES]);
void JitCompilerX86::generateDatasetInitCode() {
memcpy(code, codeDatasetInit, datasetInitSize);
// AVX2 code is generated in generateSuperscalarHash()
if (!initDatasetAVX2) {
memcpy(code, codeDatasetInit, datasetInitSize);
}
}
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
@ -372,101 +490,243 @@ namespace randomx {
emit32(epilogueOffset - codePos - 4, code, codePos);
}
void JitCompilerX86::generateSuperscalarCode(Instruction& instr) {
static constexpr uint8_t REX_SUB_RR[] = { 0x4d, 0x2b };
static constexpr uint8_t REX_MOV_RR64[] = { 0x49, 0x8b };
static constexpr uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b };
static constexpr uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf };
static constexpr uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf };
static constexpr uint8_t REX_MUL_R[] = { 0x49, 0xf7 };
static constexpr uint8_t REX_81[] = { 0x49, 0x81 };
static constexpr uint8_t MOV_RAX_I[] = { 0x48, 0xb8 };
static constexpr uint8_t REX_LEA[] = { 0x4f, 0x8d };
static constexpr uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
static constexpr uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
static constexpr uint8_t REX_ROT_I8[] = { 0x49, 0xc1 };
template<bool AVX2>
FORCE_INLINE void JitCompilerX86::generateSuperscalarCode(Instruction& instr, uint8_t* code, uint32_t& codePos) {
switch ((SuperscalarInstructionType)instr.opcode)
{
case randomx::SuperscalarInstructionType::ISUB_R:
emit(REX_SUB_RR, code, codePos);
emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos);
*(uint32_t*)(code + codePos) = 0x00C02B4DUL + (instr.dst << 19) + (instr.src << 16);
codePos += 3;
if (AVX2) {
emit32(0xC0FBFDC5UL + (instr.src << 24) + (instr.dst << 27) - (instr.dst << 11), code, codePos);
}
break;
case randomx::SuperscalarInstructionType::IXOR_R:
emit(REX_XOR_RR, code, codePos);
emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos);
*(uint32_t*)(code + codePos) = 0x00C0334DUL + (instr.dst << 19) + (instr.src << 16);
codePos += 3;
if (AVX2) {
emit32(0xC0EFFDC5UL + (instr.src << 24) + (instr.dst << 27) - (instr.dst << 11), code, codePos);
}
break;
case randomx::SuperscalarInstructionType::IADD_RS:
emit(REX_LEA, code, codePos);
emitByte(0x04 + 8 * instr.dst, code, codePos);
genSIB(instr.getModShift(), instr.src, instr.dst, code, codePos);
emit32(0x00048D4F + (instr.dst << 19) + (genSIB(instr.getModShift(), instr.src, instr.dst) << 24), code, codePos);
if (AVX2) {
if (instr.getModShift()) {
static const uint8_t t[] = { 0xC5, 0xBD, 0x73, 0xF0, 0x00, 0xC5, 0xBD, 0xD4, 0xC0 };
uint8_t* p = code + codePos;
emit(t, code, codePos);
p[3] += instr.src;
p[4] = instr.getModShift();
p[8] += instr.dst * 9;
}
else {
emit32(0xC0D4FDC5UL + (instr.src << 24) + (instr.dst << 27) - (instr.dst << 11), code, codePos);
}
}
break;
case randomx::SuperscalarInstructionType::IMUL_R:
emit(REX_IMUL_RR, code, codePos);
emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos);
emit32(0xC0AF0F4DUL + (instr.dst << 27) + (instr.src << 24), code, codePos);
if (AVX2) {
static const uint8_t t[] = {
0xC5, 0xBD, 0x73, 0xD0, 0x20,
0xC5, 0xB5, 0x73, 0xD0, 0x20,
0xC5, 0x7D, 0xF4, 0xD0,
0xC5, 0x35, 0xF4, 0xD8,
0xC5, 0xBD, 0xF4, 0xC0,
0xC4, 0xC1, 0x25, 0x73, 0xF3, 0x20,
0xC5, 0xFD, 0x73, 0xF0, 0x20,
0xC4, 0x41, 0x2D, 0xD4, 0xD3,
0xC5, 0xAD, 0xD4, 0xC0
};
uint8_t* p = code + codePos;
emit(t, code, codePos);
p[3] += instr.dst;
p[8] += instr.src;
p[11] -= instr.dst * 8;
p[13] += instr.src;
p[17] += instr.dst;
p[21] += instr.dst * 8 + instr.src;
p[29] -= instr.dst * 8;
p[31] += instr.dst;
p[41] += instr.dst * 9;
}
break;
case randomx::SuperscalarInstructionType::IROR_C:
emit(REX_ROT_I8, code, codePos);
emitByte(0xc8 + instr.dst, code, codePos);
emitByte(instr.getImm32() & 63, code, codePos);
{
const uint32_t shift = instr.getImm32() & 63;
emit32(0x00C8C149UL + (instr.dst << 16) + (shift << 24), code, codePos);
if (AVX2) {
static const uint8_t t[] = { 0xC5, 0xBD, 0x73, 0xD0, 0x00, 0xC5, 0xB5, 0x73, 0xF0, 0x00, 0xC4, 0xC1, 0x3D, 0xEB, 0xC1 };
uint8_t* p = code + codePos;
emit(t, code, codePos);
p[3] += instr.dst;
p[4] = shift;
p[8] += instr.dst;
p[9] = 64 - shift;
p[14] += instr.dst * 8;
}
}
break;
case randomx::SuperscalarInstructionType::IADD_C7:
emit(REX_81, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos);
case randomx::SuperscalarInstructionType::IADD_C8:
case randomx::SuperscalarInstructionType::IADD_C9:
if (AVX2) {
static const uint8_t t[] = { 0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4C, 0x03, 0xC0, 0xC4, 0x62, 0x7D, 0x19, 0x05, 0xEC, 0xFF, 0xFF, 0xFF, 0xC4, 0xC1, 0x7D, 0xD4, 0xC0 };
uint8_t* p = code + codePos;
emit(t, code, codePos);
*(uint64_t*)(p + 2) = signExtend2sCompl(instr.getImm32());
p[12] += instr.dst * 8;
p[24] -= instr.dst * 8;
p[26] += instr.dst * 8;
}
else {
*(uint32_t*)(code + codePos) = 0x00C08149UL + (instr.dst << 16);
codePos += 3;
emit32(instr.getImm32(), code, codePos);
}
break;
case randomx::SuperscalarInstructionType::IXOR_C7:
emit(REX_XOR_RI, code, codePos);
emitByte(0xf0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos);
break;
case randomx::SuperscalarInstructionType::IADD_C8:
emit(REX_81, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos);
break;
case randomx::SuperscalarInstructionType::IXOR_C8:
emit(REX_XOR_RI, code, codePos);
emitByte(0xf0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos);
break;
case randomx::SuperscalarInstructionType::IADD_C9:
emit(REX_81, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos);
break;
case randomx::SuperscalarInstructionType::IXOR_C9:
emit(REX_XOR_RI, code, codePos);
emitByte(0xf0 + instr.dst, code, codePos);
emit32(instr.getImm32(), code, codePos);
if (AVX2) {
static const uint8_t t[] = { 0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4C, 0x33, 0xC0, 0xC4, 0x62, 0x7D, 0x19, 0x05, 0xEC, 0xFF, 0xFF, 0xFF, 0xC4, 0xC1, 0x7D, 0xEF, 0xC0 };
uint8_t* p = code + codePos;
emit(t, code, codePos);
*(uint64_t*)(p + 2) = signExtend2sCompl(instr.getImm32());
p[12] += instr.dst * 8;
p[24] -= instr.dst * 8;
p[26] += instr.dst * 8;
}
else {
*(uint32_t*)(code + codePos) = 0x00F08149UL + (instr.dst << 16);
codePos += 3;
emit32(instr.getImm32(), code, codePos);
}
break;
case randomx::SuperscalarInstructionType::IMULH_R:
emit(REX_MOV_RR64, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos);
emit(REX_MUL_R, code, codePos);
emitByte(0xe0 + instr.src, code, codePos);
emit(REX_MOV_R64R, code, codePos);
emitByte(0xc2 + 8 * instr.dst, code, codePos);
*(uint32_t*)(code + codePos) = 0x00C08B49UL + (instr.dst << 16);
codePos += 3;
*(uint32_t*)(code + codePos) = 0x00E0F749UL + (instr.src << 16);
codePos += 3;
*(uint32_t*)(code + codePos) = 0x00C28B4CUL + (instr.dst << 19);
codePos += 3;
if (AVX2) {
static const uint8_t t[] = {
0xC5, 0xBD, 0x73, 0xD0, 0x20,
0xC5, 0xB5, 0x73, 0xD0, 0x20,
0xC5, 0x7D, 0xF4, 0xD0,
0xC5, 0x3D, 0xF4, 0xD8,
0xC4, 0x41, 0x7D, 0xF4, 0xE1,
0xC4, 0xC1, 0x3D, 0xF4, 0xC1,
0xC4, 0xC1, 0x2D, 0x73, 0xD2, 0x20,
0xC4, 0x41, 0x25, 0xEF, 0xC6,
0xC4, 0x41, 0x25, 0xD4, 0xDC,
0xC4, 0x41, 0x25, 0xD4, 0xDA,
0xC4, 0x41, 0x25, 0xEF, 0xCE,
0xC4, 0x42, 0x3D, 0x37, 0xC1,
0xC4, 0x41, 0x3D, 0xDB, 0xC7,
0xC5, 0xBD, 0xD4, 0xC0,
0xC4, 0xC1, 0x25, 0x73, 0xD3, 0x20,
0xC5, 0xA5, 0xD4, 0xC0
};
uint8_t* p = code + codePos;
emit(t, code, codePos);
p[3] += instr.dst;
p[8] += instr.src;
p[11] -= instr.dst * 8;
p[13] += instr.src;
p[17] += instr.src;
p[20] -= instr.dst * 8;
p[27] += instr.dst * 8;
p[67] += instr.dst * 9;
p[77] += instr.dst * 9;
}
break;
case randomx::SuperscalarInstructionType::ISMULH_R:
emit(REX_MOV_RR64, code, codePos);
emitByte(0xc0 + instr.dst, code, codePos);
emit(REX_MUL_R, code, codePos);
emitByte(0xe8 + instr.src, code, codePos);
emit(REX_MOV_R64R, code, codePos);
emitByte(0xc2 + 8 * instr.dst, code, codePos);
*(uint32_t*)(code + codePos) = 0x00C08B49UL + (instr.dst << 16);
codePos += 3;
*(uint32_t*)(code + codePos) = 0x00E8F749UL + (instr.src << 16);
codePos += 3;
*(uint32_t*)(code + codePos) = 0x00C28B4CUL + (instr.dst << 19);
codePos += 3;
if (AVX2) {
static const uint8_t t[] = {
0xC5, 0xBD, 0x73, 0xD0, 0x20,
0xC5, 0xB5, 0x73, 0xD0, 0x20,
0xC5, 0x7D, 0xF4, 0xD0,
0xC5, 0x3D, 0xF4, 0xD8,
0xC4, 0x41, 0x7D, 0xF4, 0xE1,
0xC4, 0x41, 0x3D, 0xF4, 0xE9,
0xC4, 0xC1, 0x2D, 0x73, 0xD2, 0x20,
0xC4, 0x41, 0x25, 0xEF, 0xC6,
0xC4, 0x41, 0x25, 0xD4, 0xDC,
0xC4, 0x41, 0x25, 0xD4, 0xDA,
0xC4, 0x41, 0x25, 0xEF, 0xCE,
0xC4, 0x42, 0x3D, 0x37, 0xC1,
0xC4, 0x41, 0x3D, 0xDB, 0xC7,
0xC4, 0x41, 0x15, 0xD4, 0xE8,
0xC4, 0xC1, 0x25, 0x73, 0xD3, 0x20,
0xC4, 0x41, 0x15, 0xD4, 0xC3,
0xC4, 0x41, 0x35, 0xEF, 0xC9,
0xC4, 0x62, 0x35, 0x37, 0xD0,
0xC4, 0x62, 0x35, 0x37, 0xD8,
0xC5, 0x2D, 0xDB, 0xD0,
0xC5, 0x25, 0xDB, 0xD8,
0xC4, 0x41, 0x3D, 0xFB, 0xC2,
0xC4, 0xC1, 0x3D, 0xFB, 0xC3
};
uint8_t* p = code + codePos;
emit(t, code, codePos);
p[3] += instr.dst;
p[8] += instr.src;
p[11] -= instr.dst * 8;
p[13] += instr.src;
p[17] += instr.src;
p[20] -= instr.dst * 8;
p[89] += instr.dst;
p[94] += instr.src;
p[98] += instr.src;
p[102] += instr.dst;
p[112] += instr.dst * 8;
}
break;
case randomx::SuperscalarInstructionType::IMUL_RCP:
emit(MOV_RAX_I, code, codePos);
*(uint32_t*)(code + codePos) = 0x0000B848UL;
codePos += 2;
emit64(randomx_reciprocal_fast(instr.getImm32()), code, codePos);
emit(REX_IMUL_RM, code, codePos);
emitByte(0xc0 + 8 * instr.dst, code, codePos);
emit32(0xC0AF0F4CUL + (instr.dst << 27), code, codePos);
if (AVX2) {
static const uint8_t t[] = {
0xC4, 0x62, 0x7D, 0x19, 0x25, 0xEB, 0xFF, 0xFF, 0xFF,
0xC5, 0xBD, 0x73, 0xD0, 0x20,
0xC4, 0xC1, 0x35, 0x73, 0xD4, 0x20,
0xC4, 0x41, 0x7D, 0xF4, 0xD4,
0xC5, 0x35, 0xF4, 0xD8,
0xC4, 0xC1, 0x3D, 0xF4, 0xC4,
0xC4, 0xC1, 0x25, 0x73, 0xF3, 0x20,
0xC5, 0xFD, 0x73, 0xF0, 0x20,
0xC4, 0x41, 0x2D, 0xD4, 0xD3,
0xC5, 0xAD, 0xD4, 0xC0
};
uint8_t* p = code + codePos;
emit(t, code, codePos);
p[12] += instr.dst;
p[22] -= instr.dst * 8;
p[28] += instr.dst;
p[33] += instr.dst * 8;
p[41] -= instr.dst * 8;
p[43] += instr.dst;
p[53] += instr.dst * 9;
}
break;
default:
UNREACHABLE;
}
}
template void JitCompilerX86::generateSuperscalarCode<false>(Instruction&, uint8_t*, uint32_t&);
template void JitCompilerX86::generateSuperscalarCode<true>(Instruction&, uint8_t*, uint32_t&);
template<bool rax>
FORCE_INLINE void JitCompilerX86::genAddressReg(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos) {
*(uint32_t*)(code + codePos) = (rax ? 0x24808d41 : 0x24888d41) + (src << 16);
@ -546,10 +806,6 @@ namespace randomx {
codePos = pos;
}
void JitCompilerX86::genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos) {
emitByte((scale << 6) | (index << 3) | base, code, codePos);
}
void JitCompilerX86::h_ISUB_R(const Instruction& instr) {
uint8_t* const p = code;
uint32_t pos = codePos;
@ -1088,11 +1344,11 @@ namespace randomx {
pos += 14;
if (jmp_offset >= -128) {
*(uint32_t*)(p + pos) = 0x74 + (jmp_offset << 8);
*(uint32_t*)(p + pos) = 0x74 + (static_cast<uint32_t>(jmp_offset) << 8);
pos += 2;
}
else {
*(uint64_t*)(p + pos) = 0x840f + ((static_cast<int64_t>(jmp_offset) - 4) << 16);
*(uint64_t*)(p + pos) = 0x840f + (static_cast<uint64_t>(jmp_offset - 4) << 16);
pos += 6;
}

View file

@ -1,5 +1,7 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
All rights reserved.
@ -47,7 +49,7 @@ namespace randomx {
class JitCompilerX86 {
public:
explicit JitCompilerX86(bool hugePagesEnable);
explicit JitCompilerX86(bool hugePagesEnable, bool optimizedInitDatasetEnable);
~JitCompilerX86();
void prepare();
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
@ -55,24 +57,38 @@ namespace randomx {
template<size_t N>
void generateSuperscalarHash(SuperscalarProgram (&programs)[N]);
void generateDatasetInitCode();
ProgramFunc* getProgramFunc() {
return (ProgramFunc*)code;
inline ProgramFunc *getProgramFunc() const {
# ifdef XMRIG_SECURE_JIT
enableExecution();
# endif
return reinterpret_cast<ProgramFunc*>(code);
}
DatasetInitFunc* getDatasetInitFunc() {
inline DatasetInitFunc *getDatasetInitFunc() const {
# ifdef XMRIG_SECURE_JIT
enableExecution();
# endif
return (DatasetInitFunc*)code;
}
uint8_t* getCode() {
return code;
}
size_t getCodeSize();
void enableWriting() const;
void enableExecution() const;
alignas(64) static InstructionGeneratorX86 engine[256];
int registerUsage[RegistersCount];
uint8_t* code;
uint32_t codePos;
uint32_t codePosFirst;
uint32_t vm_flags;
private:
int registerUsage[RegistersCount] = {};
uint8_t* code = nullptr;
uint32_t codePos = 0;
uint32_t codePosFirst = 0;
uint32_t vm_flags = 0;
# ifdef XMRIG_FIX_RYZEN
std::pair<const void*, const void*> mainLoopBounds;
@ -80,9 +96,12 @@ namespace randomx {
bool BranchesWithin32B = false;
bool hasAVX;
bool hasAVX2;
bool initDatasetAVX2;
bool hasXOP;
uint8_t* allocatedCode;
uint8_t* allocatedCode = nullptr;
size_t allocatedSize = 0;
void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&, ProgramConfiguration&);
@ -90,9 +109,10 @@ namespace randomx {
static void genAddressReg(const Instruction&, const uint32_t src, uint8_t* code, uint32_t& codePos);
static void genAddressRegDst(const Instruction&, uint8_t* code, uint32_t& codePos);
static void genAddressImm(const Instruction&, uint8_t* code, uint32_t& codePos);
static void genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos);
static uint32_t genSIB(int scale, int index, int base) { return (scale << 6) | (index << 3) | base; }
void generateSuperscalarCode(Instruction &);
template<bool AVX2>
void generateSuperscalarCode(Instruction& inst, uint8_t* code, uint32_t& codePos);
static void emitByte(uint8_t val, uint8_t* code, uint32_t& codePos) {
code[codePos] = val;
@ -119,6 +139,7 @@ namespace randomx {
codePos += count;
}
public:
void h_IADD_RS(const Instruction&);
void h_IADD_M(const Instruction&);
void h_ISUB_R(const Instruction&);

View file

@ -52,6 +52,11 @@
.global DECL(randomx_program_loop_store)
.global DECL(randomx_program_loop_end)
.global DECL(randomx_dataset_init)
.global DECL(randomx_dataset_init_avx2_prologue)
.global DECL(randomx_dataset_init_avx2_loop_end)
.global DECL(randomx_dataset_init_avx2_epilogue)
.global DECL(randomx_dataset_init_avx2_ssh_load)
.global DECL(randomx_dataset_init_avx2_ssh_prefetch)
.global DECL(randomx_program_epilogue)
.global DECL(randomx_sshash_load)
.global DECL(randomx_sshash_prefetch)
@ -192,6 +197,97 @@ call_offset:
pop rbx
ret
.balign 64
DECL(randomx_dataset_init_avx2_prologue):
#include "asm/program_sshash_avx2_save_registers.inc"
#if defined(WINABI)
mov rdi, qword ptr [rcx] ;# cache->memory
mov rsi, rdx ;# dataset
mov rbp, r8 ;# block index
push r9 ;# max. block index
#else
mov rdi, qword ptr [rdi] ;# cache->memory
;# dataset in rsi
mov rbp, rdx ;# block index
push rcx ;# max. block index
#endif
sub rsp, 40
jmp randomx_dataset_init_avx2_prologue_loop_begin
#include "asm/program_sshash_avx2_constants.inc"
.balign 64
randomx_dataset_init_avx2_prologue_loop_begin:
#include "asm/program_sshash_avx2_loop_begin.inc"
;# init integer registers (lane 0)
lea r8, [rbp+1]
imul r8, qword ptr [r0_avx2_mul+rip]
mov r9, qword ptr [r1_avx2_add+rip]
xor r9, r8
mov r10, qword ptr [r2_avx2_add+rip]
xor r10, r8
mov r11, qword ptr [r3_avx2_add+rip]
xor r11, r8
mov r12, qword ptr [r4_avx2_add+rip]
xor r12, r8
mov r13, qword ptr [r5_avx2_add+rip]
xor r13, r8
mov r14, qword ptr [r6_avx2_add+rip]
xor r14, r8
mov r15, qword ptr [r7_avx2_add+rip]
xor r15, r8
;# init AVX registers (lanes 1-4)
mov qword ptr [rsp+32], rbp
vbroadcastsd ymm0, qword ptr [rsp+32]
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip]
;# ymm0 *= r0_avx2_mul
vbroadcastsd ymm1, qword ptr [r0_avx2_mul+rip]
vpsrlq ymm8, ymm0, 32
vpsrlq ymm9, ymm1, 32
vpmuludq ymm10, ymm0, ymm1
vpmuludq ymm11, ymm9, ymm0
vpmuludq ymm0, ymm8, ymm1
vpsllq ymm11, ymm11, 32
vpsllq ymm0, ymm0, 32
vpaddq ymm10, ymm10, ymm11
vpaddq ymm0, ymm10, ymm0
vbroadcastsd ymm1, qword ptr [r1_avx2_add+rip]
vpxor ymm1, ymm0, ymm1
vbroadcastsd ymm2, qword ptr [r2_avx2_add+rip]
vpxor ymm2, ymm0, ymm2
vbroadcastsd ymm3, qword ptr [r3_avx2_add+rip]
vpxor ymm3, ymm0, ymm3
vbroadcastsd ymm4, qword ptr [r4_avx2_add+rip]
vpxor ymm4, ymm0, ymm4
vbroadcastsd ymm5, qword ptr [r5_avx2_add+rip]
vpxor ymm5, ymm0, ymm5
vbroadcastsd ymm6, qword ptr [r6_avx2_add+rip]
vpxor ymm6, ymm0, ymm6
vbroadcastsd ymm7, qword ptr [r7_avx2_add+rip]
vpxor ymm7, ymm0, ymm7
vbroadcastsd ymm15, qword ptr [mul_hi_avx2_data+rip] ;# carry_bit (bit 32)
vpsllq ymm14, ymm15, 31 ;# sign64 (bit 63)
;# generated SuperscalarHash code goes here
DECL(randomx_dataset_init_avx2_loop_end):
#include "asm/program_sshash_avx2_loop_end.inc"
DECL(randomx_dataset_init_avx2_epilogue):
#include "asm/program_sshash_avx2_epilogue.inc"
DECL(randomx_dataset_init_avx2_ssh_load):
#include "asm/program_sshash_avx2_ssh_load.inc"
DECL(randomx_dataset_init_avx2_ssh_prefetch):
#include "asm/program_sshash_avx2_ssh_prefetch.inc"
.balign 64
DECL(randomx_program_epilogue):
#include "asm/program_epilogue_store.inc"

View file

@ -41,6 +41,11 @@ PUBLIC randomx_program_read_dataset_ryzen
PUBLIC randomx_program_read_dataset_sshash_init
PUBLIC randomx_program_read_dataset_sshash_fin
PUBLIC randomx_dataset_init
PUBLIC randomx_dataset_init_avx2_prologue
PUBLIC randomx_dataset_init_avx2_loop_end
PUBLIC randomx_dataset_init_avx2_epilogue
PUBLIC randomx_dataset_init_avx2_ssh_load
PUBLIC randomx_dataset_init_avx2_ssh_prefetch
PUBLIC randomx_program_loop_store
PUBLIC randomx_program_loop_end
PUBLIC randomx_program_epilogue
@ -183,6 +188,94 @@ init_block_loop:
randomx_dataset_init ENDP
ALIGN 64
randomx_dataset_init_avx2_prologue PROC
include asm/program_sshash_avx2_save_registers.inc
mov rdi, qword ptr [rcx] ;# cache->memory
mov rsi, rdx ;# dataset
mov rbp, r8 ;# block index
push r9 ;# max. block index
sub rsp, 40
jmp loop_begin
include asm/program_sshash_avx2_constants.inc
ALIGN 64
loop_begin:
include asm/program_sshash_avx2_loop_begin.inc
;# init integer registers (lane 0)
lea r8, [rbp+1]
imul r8, qword ptr [r0_avx2_mul]
mov r9, qword ptr [r1_avx2_add]
xor r9, r8
mov r10, qword ptr [r2_avx2_add]
xor r10, r8
mov r11, qword ptr [r3_avx2_add]
xor r11, r8
mov r12, qword ptr [r4_avx2_add]
xor r12, r8
mov r13, qword ptr [r5_avx2_add]
xor r13, r8
mov r14, qword ptr [r6_avx2_add]
xor r14, r8
mov r15, qword ptr [r7_avx2_add]
xor r15, r8
;# init AVX registers (lanes 1-4)
mov qword ptr [rsp+32], rbp
vbroadcastsd ymm0, qword ptr [rsp+32]
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments]
;# ymm0 *= r0_avx2_mul
vbroadcastsd ymm1, qword ptr [r0_avx2_mul]
vpsrlq ymm8, ymm0, 32
vpsrlq ymm9, ymm1, 32
vpmuludq ymm10, ymm0, ymm1
vpmuludq ymm11, ymm9, ymm0
vpmuludq ymm0, ymm8, ymm1
vpsllq ymm11, ymm11, 32
vpsllq ymm0, ymm0, 32
vpaddq ymm10, ymm10, ymm11
vpaddq ymm0, ymm10, ymm0
vbroadcastsd ymm1, qword ptr [r1_avx2_add]
vpxor ymm1, ymm0, ymm1
vbroadcastsd ymm2, qword ptr [r2_avx2_add]
vpxor ymm2, ymm0, ymm2
vbroadcastsd ymm3, qword ptr [r3_avx2_add]
vpxor ymm3, ymm0, ymm3
vbroadcastsd ymm4, qword ptr [r4_avx2_add]
vpxor ymm4, ymm0, ymm4
vbroadcastsd ymm5, qword ptr [r5_avx2_add]
vpxor ymm5, ymm0, ymm5
vbroadcastsd ymm6, qword ptr [r6_avx2_add]
vpxor ymm6, ymm0, ymm6
vbroadcastsd ymm7, qword ptr [r7_avx2_add]
vpxor ymm7, ymm0, ymm7
vbroadcastsd ymm15, qword ptr [mul_hi_avx2_data] ;# carry_bit (bit 32)
vpsllq ymm14, ymm15, 31 ;# sign64 (bit 63)
randomx_dataset_init_avx2_prologue ENDP
;# generated SuperscalarHash code goes here
randomx_dataset_init_avx2_loop_end PROC
include asm/program_sshash_avx2_loop_end.inc
randomx_dataset_init_avx2_loop_end ENDP
randomx_dataset_init_avx2_epilogue PROC
include asm/program_sshash_avx2_epilogue.inc
randomx_dataset_init_avx2_epilogue ENDP
randomx_dataset_init_avx2_ssh_load PROC
include asm/program_sshash_avx2_ssh_load.inc
randomx_dataset_init_avx2_ssh_load ENDP
randomx_dataset_init_avx2_ssh_prefetch PROC
include asm/program_sshash_avx2_ssh_prefetch.inc
randomx_dataset_init_avx2_ssh_prefetch ENDP
randomx_program_epilogue PROC
include asm/program_epilogue_store.inc
include asm/program_epilogue_win64.inc

View file

@ -44,6 +44,11 @@ extern "C" {
void randomx_program_loop_store();
void randomx_program_loop_end();
void randomx_dataset_init();
void randomx_dataset_init_avx2_prologue();
void randomx_dataset_init_avx2_loop_end();
void randomx_dataset_init_avx2_epilogue();
void randomx_dataset_init_avx2_ssh_load();
void randomx_dataset_init_avx2_ssh_prefetch();
void randomx_program_epilogue();
void randomx_sshash_load();
void randomx_sshash_prefetch();

View file

@ -53,7 +53,7 @@ extern "C" {
#include "crypto/randomx/defyx/KangarooTwelve.h"
}
#include "base/tools/Profiler.h"
#include "crypto/rx/Profiler.h"
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
{
@ -444,9 +444,9 @@ extern "C" {
break;
case RANDOMX_FLAG_JIT:
cache->jit = new randomx::JitCompiler(false);
cache->jit = new randomx::JitCompiler(false, true);
cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->datasetInit = nullptr;
cache->memory = memory;
break;

View file

@ -177,6 +177,7 @@ void randomx_apply_config(const T& config)
void randomx_set_scratchpad_prefetch_mode(int mode);
void randomx_set_huge_pages_jit(bool hugePages);
void randomx_set_optimized_dataset_init(int value);
#if defined(__cplusplus)
extern "C" {

View file

@ -196,7 +196,7 @@ namespace randomx {
int latency_;
int resultOp_ = 0;
int dstOp_ = 0;
int srcOp_;
int srcOp_ = 0;
SuperscalarInstructionInfo(const char* name)
: name_(name), type_(SuperscalarInstructionType::INVALID), latency_(0) {}
@ -282,11 +282,11 @@ namespace randomx {
return fetchNextDefault(gen);
}
private:
const char* name_;
int index_;
const int* counts_;
int opsCount_;
DecoderBuffer() : index_(-1) {}
const char* name_ = nullptr;
int index_ = -1;
const int* counts_ = nullptr;
int opsCount_ = 0;
DecoderBuffer() = default;
static const DecoderBuffer decodeBuffer484;
static const DecoderBuffer decodeBuffer7333;
static const DecoderBuffer decodeBuffer3733;
@ -555,10 +555,10 @@ namespace randomx {
const SuperscalarInstructionInfo* info_;
int src_ = -1;
int dst_ = -1;
int mod_;
uint32_t imm32_;
SuperscalarInstructionType opGroup_;
int opGroupPar_;
int mod_ = 0;
uint32_t imm32_ = 0;
SuperscalarInstructionType opGroup_ = SuperscalarInstructionType::INVALID;
int opGroupPar_ = 0;
bool canReuse_ = false;
bool groupParIsSource_ = false;

View file

@ -39,13 +39,13 @@ namespace randomx {
Instruction& operator()(int pc) {
return programBuffer[pc];
}
uint32_t getSize() {
uint32_t getSize() const {
return size;
}
void setSize(uint32_t val) {
size = val;
}
int getAddressRegister() {
int getAddressRegister() const {
return addrReg;
}
void setAddressRegister(int val) {

View file

@ -30,13 +30,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <iomanip>
#include <stdexcept>
#include "crypto/randomx/virtual_machine.hpp"
#include "crypto/randomx/common.hpp"
#include "crypto/randomx/aes_hash.hpp"
#include "crypto/randomx/blake2/blake2.h"
#include "crypto/randomx/intrin_portable.h"
#include "crypto/randomx/allocator.hpp"
#include "crypto/randomx/blake2/blake2.h"
#include "crypto/randomx/common.hpp"
#include "crypto/randomx/intrin_portable.h"
#include "crypto/randomx/soft_aes.h"
#include "base/tools/Profiler.h"
#include "crypto/rx/Profiler.h"
randomx_vm::~randomx_vm() {

View file

@ -1,5 +1,7 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
All rights reserved.
@ -28,7 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/randomx/vm_compiled.hpp"
#include "crypto/randomx/common.hpp"
#include "base/tools/Profiler.h"
#include "crypto/rx/Profiler.h"
namespace randomx {
@ -56,9 +58,9 @@ namespace randomx {
void CompiledVm<softAes>::execute() {
PROFILE_SCOPE(RandomX_JIT_execute);
#ifdef XMRIG_ARM
# ifdef XMRIG_ARM
memcpy(reg.f, config.eMask, sizeof(config.eMask));
#endif
# endif
compiler.getProgramFunc()(reg, mem, scratchpad, RandomX_CurrentConfig.ProgramIterations);
}

View file

@ -59,7 +59,7 @@ namespace randomx {
protected:
void execute();
JitCompiler compiler{ true };
JitCompiler compiler{ true, false };
};
using CompiledVmDefault = CompiledVm<1>;

View file

@ -1,5 +1,7 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
All rights reserved.
@ -36,6 +38,11 @@ namespace randomx {
void CompiledLightVm<softAes>::setCache(randomx_cache* cache) {
cachePtr = cache;
mem.memory = cache->memory;
# ifdef XMRIG_SECURE_JIT
compiler.enableWriting();
# endif
compiler.generateSuperscalarHash(cache->programs);
}
@ -43,7 +50,13 @@ namespace randomx {
void CompiledLightVm<softAes>::run(void* seed) {
VmBase<softAes>::generateProgram(seed);
randomx_vm::initialize();
# ifdef XMRIG_SECURE_JIT
compiler.enableWriting();
# endif
compiler.generateProgramLight(program, config, datasetOffset);
CompiledVm<softAes>::execute();
}