Merge xmrig v6.7.0 into master
This commit is contained in:
commit
1719879f7e
249 changed files with 6814 additions and 6134 deletions
|
@ -28,12 +28,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
|
||||
#include "crypto/randomx/aes_hash.hpp"
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "base/tools/Profiler.h"
|
||||
#include "crypto/randomx/randomx.h"
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
|
||||
|
@ -371,7 +372,7 @@ hashAndFillAes1Rx4_impl* softAESImpl = &hashAndFillAes1Rx4<1,1>;
|
|||
void SelectSoftAESImpl(size_t threadsCount)
|
||||
{
|
||||
constexpr int test_length_ms = 100;
|
||||
const std::vector<hashAndFillAes1Rx4_impl *> impl = {
|
||||
const std::array<hashAndFillAes1Rx4_impl *, 4> impl = {
|
||||
&hashAndFillAes1Rx4<1,1>,
|
||||
&hashAndFillAes1Rx4<2,1>,
|
||||
&hashAndFillAes1Rx4<2,2>,
|
||||
|
|
28
src/crypto/randomx/asm/program_sshash_avx2_constants.inc
Normal file
28
src/crypto/randomx/asm/program_sshash_avx2_constants.inc
Normal file
|
@ -0,0 +1,28 @@
|
|||
r0_avx2_increments:
|
||||
db 2,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0
|
||||
mul_hi_avx2_data:
|
||||
db 0,0,0,0,1,0,0,0
|
||||
r0_avx2_mul:
|
||||
;#/ 6364136223846793005
|
||||
db 45, 127, 149, 76, 45, 244, 81, 88
|
||||
r1_avx2_add:
|
||||
;#/ 9298411001130361340
|
||||
db 252, 161, 245, 89, 138, 151, 10, 129
|
||||
r2_avx2_add:
|
||||
;#/ 12065312585734608966
|
||||
db 70, 216, 194, 56, 223, 153, 112, 167
|
||||
r3_avx2_add:
|
||||
;#/ 9306329213124626780
|
||||
db 92, 73, 34, 191, 28, 185, 38, 129
|
||||
r4_avx2_add:
|
||||
;#/ 5281919268842080866
|
||||
db 98, 138, 159, 23, 151, 37, 77, 73
|
||||
r5_avx2_add:
|
||||
;#/ 10536153434571861004
|
||||
db 12, 236, 170, 206, 185, 239, 55, 146
|
||||
r6_avx2_add:
|
||||
;#/ 3398623926847679864
|
||||
db 120, 45, 230, 108, 116, 86, 42, 47
|
||||
r7_avx2_add:
|
||||
;#/ 9549104520008361294
|
||||
db 78, 229, 44, 182, 247, 59, 133, 132
|
31
src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc
Normal file
31
src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc
Normal file
|
@ -0,0 +1,31 @@
|
|||
add rsp, 40
|
||||
pop r9
|
||||
|
||||
movdqu xmm0, xmmword ptr [rsp]
|
||||
movdqu xmm1, xmmword ptr [rsp + 16]
|
||||
movdqu xmm2, xmmword ptr [rsp + 32]
|
||||
movdqu xmm3, xmmword ptr [rsp + 48]
|
||||
movdqu xmm4, xmmword ptr [rsp + 64]
|
||||
movdqu xmm5, xmmword ptr [rsp + 80]
|
||||
movdqu xmm6, xmmword ptr [rsp + 96]
|
||||
movdqu xmm7, xmmword ptr [rsp + 112]
|
||||
movdqu xmm8, xmmword ptr [rsp + 128]
|
||||
movdqu xmm9, xmmword ptr [rsp + 144]
|
||||
movdqu xmm10, xmmword ptr [rsp + 160]
|
||||
movdqu xmm11, xmmword ptr [rsp + 176]
|
||||
movdqu xmm12, xmmword ptr [rsp + 192]
|
||||
movdqu xmm13, xmmword ptr [rsp + 208]
|
||||
movdqu xmm14, xmmword ptr [rsp + 224]
|
||||
movdqu xmm15, xmmword ptr [rsp + 240]
|
||||
vzeroupper
|
||||
add rsp, 256
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
37
src/crypto/randomx/asm/program_sshash_avx2_loop_begin.inc
Normal file
37
src/crypto/randomx/asm/program_sshash_avx2_loop_begin.inc
Normal file
|
@ -0,0 +1,37 @@
|
|||
;# prefetch RandomX dataset lines
|
||||
prefetchnta byte ptr [rsi]
|
||||
prefetchnta byte ptr [rsi+64]
|
||||
prefetchnta byte ptr [rsi+128]
|
||||
prefetchnta byte ptr [rsi+192]
|
||||
prefetchnta byte ptr [rsi+256]
|
||||
|
||||
;# prefetch RandomX cache lines
|
||||
mov rbx, rbp
|
||||
and rbx, RANDOMX_CACHE_MASK
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rax, [rbp+1]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
prefetchnta byte ptr [rax]
|
||||
mov [rsp], rax
|
||||
lea rax, [rbp+2]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
prefetchnta byte ptr [rax]
|
||||
mov [rsp+8], rax
|
||||
lea rax, [rbp+3]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
prefetchnta byte ptr [rax]
|
||||
mov [rsp+16], rax
|
||||
lea rax, [rbp+4]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
prefetchnta byte ptr [rax]
|
||||
mov [rsp+24], rax
|
38
src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc
Normal file
38
src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc
Normal file
|
@ -0,0 +1,38 @@
|
|||
mov qword ptr [rsi+0], r8
|
||||
vpunpcklqdq ymm8, ymm0, ymm1
|
||||
mov qword ptr [rsi+8], r9
|
||||
vpunpcklqdq ymm9, ymm2, ymm3
|
||||
mov qword ptr [rsi+16], r10
|
||||
vpunpcklqdq ymm10, ymm4, ymm5
|
||||
mov qword ptr [rsi+24], r11
|
||||
vpunpcklqdq ymm11, ymm6, ymm7
|
||||
mov qword ptr [rsi+32], r12
|
||||
vpunpckhqdq ymm12, ymm0, ymm1
|
||||
mov qword ptr [rsi+40], r13
|
||||
vpunpckhqdq ymm13, ymm2, ymm3
|
||||
mov qword ptr [rsi+48], r14
|
||||
vpunpckhqdq ymm14, ymm4, ymm5
|
||||
mov qword ptr [rsi+56], r15
|
||||
vpunpckhqdq ymm15, ymm6, ymm7
|
||||
|
||||
vperm2i128 ymm0, ymm8, ymm9, 32
|
||||
vperm2i128 ymm1, ymm10, ymm11, 32
|
||||
vmovdqu ymmword ptr [rsi+64], ymm0
|
||||
vmovdqu ymmword ptr [rsi+96], ymm1
|
||||
vperm2i128 ymm2, ymm12, ymm13, 32
|
||||
vperm2i128 ymm3, ymm14, ymm15, 32
|
||||
vmovdqu ymmword ptr [rsi+128], ymm2
|
||||
vmovdqu ymmword ptr [rsi+160], ymm3
|
||||
vperm2i128 ymm4, ymm8, ymm9, 49
|
||||
vperm2i128 ymm5, ymm10, ymm11, 49
|
||||
vmovdqu ymmword ptr [rsi+192], ymm4
|
||||
vmovdqu ymmword ptr [rsi+224], ymm5
|
||||
vperm2i128 ymm6, ymm12, ymm13, 49
|
||||
vperm2i128 ymm7, ymm14, ymm15, 49
|
||||
vmovdqu ymmword ptr [rsi+256], ymm6
|
||||
vmovdqu ymmword ptr [rsi+288], ymm7
|
||||
|
||||
add rbp, 5
|
||||
add rsi, 320
|
||||
cmp rbp, qword ptr [rsp+40]
|
||||
db 15, 130, 0, 0, 0, 0 ;# jb rel32
|
|
@ -0,0 +1,27 @@
|
|||
push rbx
|
||||
push rbp
|
||||
push rdi
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
;# save all XMM registers just to be safe for all calling conventions
|
||||
sub rsp, 256
|
||||
movdqu xmmword ptr [rsp], xmm0
|
||||
movdqu xmmword ptr [rsp + 16], xmm1
|
||||
movdqu xmmword ptr [rsp + 32], xmm2
|
||||
movdqu xmmword ptr [rsp + 48], xmm3
|
||||
movdqu xmmword ptr [rsp + 64], xmm4
|
||||
movdqu xmmword ptr [rsp + 80], xmm5
|
||||
movdqu xmmword ptr [rsp + 96], xmm6
|
||||
movdqu xmmword ptr [rsp + 112], xmm7
|
||||
movdqu xmmword ptr [rsp + 128], xmm8
|
||||
movdqu xmmword ptr [rsp + 144], xmm9
|
||||
movdqu xmmword ptr [rsp + 160], xmm10
|
||||
movdqu xmmword ptr [rsp + 176], xmm11
|
||||
movdqu xmmword ptr [rsp + 192], xmm12
|
||||
movdqu xmmword ptr [rsp + 208], xmm13
|
||||
movdqu xmmword ptr [rsp + 224], xmm14
|
||||
movdqu xmmword ptr [rsp + 240], xmm15
|
50
src/crypto/randomx/asm/program_sshash_avx2_ssh_load.inc
Normal file
50
src/crypto/randomx/asm/program_sshash_avx2_ssh_load.inc
Normal file
|
@ -0,0 +1,50 @@
|
|||
sub rsp, 40
|
||||
mov [rsp], rbx
|
||||
vmovdqu ymmword ptr [rsp+8], ymm14
|
||||
|
||||
mov rax, [rsp+40]
|
||||
mov rbx, [rsp+48]
|
||||
mov rcx, [rsp+56]
|
||||
mov rdx, [rsp+64]
|
||||
|
||||
vmovdqu ymm8, ymmword ptr [rax] ;# ymm8 = r0[1], r1[1], r2[1], r3[1]
|
||||
vmovdqu ymm9, ymmword ptr [rbx] ;# ymm9 = r0[2], r1[2], r2[2], r3[2]
|
||||
vmovdqu ymm10, ymmword ptr [rcx] ;# ymm10 = r0[3], r1[3], r2[3], r3[3]
|
||||
vmovdqu ymm11, ymmword ptr [rdx] ;# ymm11 = r0[4], r1[4], r2[4], r3[4]
|
||||
|
||||
vpunpcklqdq ymm12, ymm8, ymm9 ;# ymm12 = r0[1], r0[2], r2[1], r2[2]
|
||||
vpunpcklqdq ymm13, ymm10, ymm11 ;# ymm13 = r0[3], r0[4], r2[3], r2[4]
|
||||
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r0[1], r0[2], r0[3], r0[4]
|
||||
vpxor ymm0, ymm0, ymm14
|
||||
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r2[1], r2[2], r2[3], r2[4]
|
||||
vpxor ymm2, ymm2, ymm14
|
||||
|
||||
vpunpckhqdq ymm12, ymm8, ymm9 ;# ymm12 = r1[1], r1[2], r3[1], r3[2]
|
||||
vpunpckhqdq ymm13, ymm10, ymm11 ;# ymm13 = r1[3], r1[4], r3[3], r3[4]
|
||||
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r1[1], r1[2], r1[3], r1[4]
|
||||
vpxor ymm1, ymm1, ymm14
|
||||
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r3[1], r3[2], r3[3], r3[4]
|
||||
vpxor ymm3, ymm3, ymm14
|
||||
|
||||
vmovdqu ymm8, ymmword ptr [rax+32] ;# ymm8 = r4[1], r5[1], r6[1], r7[1]
|
||||
vmovdqu ymm9, ymmword ptr [rbx+32] ;# ymm9 = r4[2], r5[2], r6[2], r7[2]
|
||||
vmovdqu ymm10, ymmword ptr [rcx+32] ;# ymm10 = r4[3], r5[3], r6[3], r7[3]
|
||||
vmovdqu ymm11, ymmword ptr [rdx+32] ;# ymm11 = r4[4], r5[4], r6[4], r7[4]
|
||||
|
||||
vpunpcklqdq ymm12, ymm8, ymm9 ;# ymm12 = r4[1], r4[2], r6[1], r6[2]
|
||||
vpunpcklqdq ymm13, ymm10, ymm11 ;# ymm13 = r4[3], r4[4], r6[3], r6[4]
|
||||
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r4[1], r4[2], r4[3], r4[4]
|
||||
vpxor ymm4, ymm4, ymm14
|
||||
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r6[1], r6[2], r6[3], r6[4]
|
||||
vpxor ymm6, ymm6, ymm14
|
||||
|
||||
vpunpckhqdq ymm12, ymm8, ymm9 ;# ymm12 = r5[1], r5[2], r7[1], r7[2]
|
||||
vpunpckhqdq ymm13, ymm10, ymm11 ;# ymm13 = r5[3], r5[4], r7[3], r7[4]
|
||||
vperm2i128 ymm14, ymm12, ymm13, 32 ;# ymm14 = r5[1], r5[2], r5[3], r5[4]
|
||||
vpxor ymm5, ymm5, ymm14
|
||||
vperm2i128 ymm14, ymm12, ymm13, 49 ;# ymm14 = r7[1], r7[2], r7[3], r7[4]
|
||||
vpxor ymm7, ymm7, ymm14
|
||||
|
||||
mov rbx, [rsp]
|
||||
vmovdqu ymm14, ymmword ptr [rsp+8]
|
||||
add rsp, 40
|
29
src/crypto/randomx/asm/program_sshash_avx2_ssh_prefetch.inc
Normal file
29
src/crypto/randomx/asm/program_sshash_avx2_ssh_prefetch.inc
Normal file
|
@ -0,0 +1,29 @@
|
|||
vmovdqu ymmword ptr [rsp], ymm0
|
||||
|
||||
mov rax, [rsp]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
mov [rsp], rax
|
||||
prefetchnta byte ptr [rax]
|
||||
|
||||
mov rax, [rsp+8]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
mov [rsp+8], rax
|
||||
prefetchnta byte ptr [rax]
|
||||
|
||||
mov rax, [rsp+16]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
mov [rsp+16], rax
|
||||
prefetchnta byte ptr [rax]
|
||||
|
||||
mov rax, [rsp+24]
|
||||
and rax, RANDOMX_CACHE_MASK
|
||||
shl rax, 6
|
||||
add rax, rdi
|
||||
mov [rsp+24], rax
|
||||
prefetchnta byte ptr [rax]
|
|
@ -1,5 +1,7 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
|
@ -59,10 +61,11 @@ namespace randomx {
|
|||
|
||||
template<class Allocator>
|
||||
void deallocCache(randomx_cache* cache) {
|
||||
if (cache->memory != nullptr)
|
||||
if (cache->memory != nullptr) {
|
||||
Allocator::freeMemory(cache->memory, RANDOMX_CACHE_MAX_SIZE);
|
||||
if (cache->jit != nullptr)
|
||||
delete cache->jit;
|
||||
}
|
||||
|
||||
delete cache->jit;
|
||||
}
|
||||
|
||||
template void deallocCache<DefaultAllocator>(randomx_cache* cache);
|
||||
|
@ -77,16 +80,16 @@ namespace randomx {
|
|||
context.pwdlen = (uint32_t)keySize;
|
||||
context.salt = CONST_CAST(uint8_t *)RandomX_CurrentConfig.ArgonSalt;
|
||||
context.saltlen = (uint32_t)strlen(RandomX_CurrentConfig.ArgonSalt);
|
||||
context.secret = NULL;
|
||||
context.secret = nullptr;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.ad = nullptr;
|
||||
context.adlen = 0;
|
||||
context.t_cost = RandomX_CurrentConfig.ArgonIterations;
|
||||
context.m_cost = RandomX_CurrentConfig.ArgonMemory;
|
||||
context.lanes = RandomX_CurrentConfig.ArgonLanes;
|
||||
context.threads = 1;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.allocate_cbk = nullptr;
|
||||
context.free_cbk = nullptr;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = ARGON2_VERSION_NUMBER;
|
||||
|
||||
|
@ -100,8 +103,18 @@ namespace randomx {
|
|||
|
||||
void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) {
|
||||
initCache(cache, key, keySize);
|
||||
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
cache->jit->enableWriting();
|
||||
# endif
|
||||
|
||||
cache->jit->generateSuperscalarHash(cache->programs);
|
||||
cache->jit->generateDatasetInitCode();
|
||||
cache->datasetInit = cache->jit->getDatasetInitFunc();
|
||||
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
cache->jit->enableExecution();
|
||||
# endif
|
||||
}
|
||||
|
||||
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
|
||||
|
|
|
@ -48,7 +48,7 @@ struct randomx_cache {
|
|||
randomx::DatasetInitFunc* datasetInit;
|
||||
randomx::SuperscalarProgram programs[RANDOMX_CACHE_MAX_ACCESSES];
|
||||
|
||||
bool isInitialized() {
|
||||
bool isInitialized() const {
|
||||
return programs[0].getSize() != 0;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
|
@ -28,18 +29,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*/
|
||||
|
||||
#include "crypto/randomx/jit_compiler_a64.hpp"
|
||||
#include "crypto/randomx/superscalar.hpp"
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
#include "crypto/randomx/program.hpp"
|
||||
#include "crypto/randomx/reciprocal.h"
|
||||
#include "crypto/randomx/superscalar.hpp"
|
||||
#include "crypto/randomx/virtual_memory.hpp"
|
||||
|
||||
static bool hugePagesJIT = false;
|
||||
static int optimizedDatasetInit = -1;
|
||||
|
||||
void randomx_set_huge_pages_jit(bool hugePages)
|
||||
{
|
||||
hugePagesJIT = hugePages;
|
||||
}
|
||||
|
||||
void randomx_set_optimized_dataset_init(int value)
|
||||
{
|
||||
optimizedDatasetInit = value;
|
||||
}
|
||||
|
||||
namespace ARMV8A {
|
||||
|
||||
constexpr uint32_t B = 0x14000000;
|
||||
|
@ -96,37 +104,28 @@ static size_t CalcDatasetItemSize()
|
|||
|
||||
constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||
|
||||
JitCompilerA64::JitCompilerA64(bool hugePagesEnable)
|
||||
: code((uint8_t*) allocExecutableMemory(CodeSize + CalcDatasetItemSize(), hugePagesJIT && hugePagesEnable))
|
||||
, literalPos(ImulRcpLiteralsEnd)
|
||||
, num32bitLiterals(0)
|
||||
JitCompilerA64::JitCompilerA64(bool hugePagesEnable, bool) :
|
||||
hugePages(hugePagesJIT && hugePagesEnable),
|
||||
literalPos(ImulRcpLiteralsEnd)
|
||||
{
|
||||
memset(reg_changed_offset, 0, sizeof(reg_changed_offset));
|
||||
memcpy(code, (void*) randomx_program_aarch64, CodeSize);
|
||||
}
|
||||
|
||||
JitCompilerA64::~JitCompilerA64()
|
||||
{
|
||||
freePagedMemory(code, CodeSize + CalcDatasetItemSize());
|
||||
}
|
||||
|
||||
#if defined(ios_HOST_OS) || defined (darwin_HOST_OS)
|
||||
void sys_icache_invalidate(void *start, size_t len);
|
||||
#endif
|
||||
|
||||
static void clear_code_cache(char* p1, char* p2)
|
||||
{
|
||||
# if defined(ios_HOST_OS) || defined (darwin_HOST_OS)
|
||||
sys_icache_invalidate(p1, static_cast<size_t>(p2 - p1));
|
||||
# elif defined (HAVE_BUILTIN_CLEAR_CACHE) || defined (__GNUC__)
|
||||
__builtin___clear_cache(p1, p2);
|
||||
# else
|
||||
# error "No clear code cache function found"
|
||||
# endif
|
||||
freePagedMemory(code, allocatedSize);
|
||||
}
|
||||
|
||||
void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& config, uint32_t)
|
||||
{
|
||||
if (!allocatedSize) {
|
||||
allocate(CodeSize);
|
||||
}
|
||||
#ifdef XMRIG_SECURE_JIT
|
||||
else {
|
||||
enableWriting();
|
||||
}
|
||||
#endif
|
||||
|
||||
uint32_t codePos = MainLoopBegin + 4;
|
||||
|
||||
// and w16, w10, ScratchpadL3Mask64
|
||||
|
@ -171,11 +170,22 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
|
|||
codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64);
|
||||
emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
|
||||
|
||||
clear_code_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
|
||||
# ifndef XMRIG_OS_APPLE
|
||||
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
|
||||
# endif
|
||||
}
|
||||
|
||||
void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset)
|
||||
{
|
||||
if (!allocatedSize) {
|
||||
allocate(CodeSize);
|
||||
}
|
||||
#ifdef XMRIG_SECURE_JIT
|
||||
else {
|
||||
enableWriting();
|
||||
}
|
||||
#endif
|
||||
|
||||
uint32_t codePos = MainLoopBegin + 4;
|
||||
|
||||
// and w16, w10, ScratchpadL3Mask64
|
||||
|
@ -226,12 +236,23 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
|
|||
emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos);
|
||||
emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos);
|
||||
|
||||
clear_code_cache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
|
||||
# ifndef XMRIG_OS_APPLE
|
||||
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
|
||||
# endif
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N])
|
||||
{
|
||||
if (!allocatedSize) {
|
||||
allocate(CodeSize + CalcDatasetItemSize());
|
||||
}
|
||||
#ifdef XMRIG_SECURE_JIT
|
||||
else {
|
||||
enableWriting();
|
||||
}
|
||||
#endif
|
||||
|
||||
uint32_t codePos = CodeSize;
|
||||
|
||||
uint8_t* p1 = (uint8_t*)randomx_calc_dataset_item_aarch64;
|
||||
|
@ -342,13 +363,19 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N])
|
|||
memcpy(code + codePos, p1, p2 - p1);
|
||||
codePos += p2 - p1;
|
||||
|
||||
clear_code_cache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
|
||||
# ifndef XMRIG_OS_APPLE
|
||||
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
|
||||
# endif
|
||||
}
|
||||
|
||||
template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES]);
|
||||
|
||||
DatasetInitFunc* JitCompilerA64::getDatasetInitFunc()
|
||||
DatasetInitFunc* JitCompilerA64::getDatasetInitFunc() const
|
||||
{
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
enableExecution();
|
||||
# endif
|
||||
|
||||
return (DatasetInitFunc*)(code + (((uint8_t*)randomx_init_dataset_aarch64) - ((uint8_t*)randomx_program_aarch64)));
|
||||
}
|
||||
|
||||
|
@ -357,6 +384,26 @@ size_t JitCompilerA64::getCodeSize()
|
|||
return CodeSize;
|
||||
}
|
||||
|
||||
void JitCompilerA64::enableWriting() const
|
||||
{
|
||||
xmrig::VirtualMemory::protectRW(code, allocatedSize);
|
||||
}
|
||||
|
||||
void JitCompilerA64::enableExecution() const
|
||||
{
|
||||
xmrig::VirtualMemory::protectRX(code, allocatedSize);
|
||||
}
|
||||
|
||||
|
||||
void JitCompilerA64::allocate(size_t size)
|
||||
{
|
||||
allocatedSize = size;
|
||||
code = static_cast<uint8_t*>(allocExecutableMemory(allocatedSize, hugePages));
|
||||
|
||||
memcpy(code, reinterpret_cast<const void *>(randomx_program_aarch64), CodeSize);
|
||||
}
|
||||
|
||||
|
||||
void JitCompilerA64::emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos)
|
||||
{
|
||||
uint32_t k = codePos;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
|
@ -46,7 +47,7 @@ namespace randomx {
|
|||
|
||||
class JitCompilerA64 {
|
||||
public:
|
||||
explicit JitCompilerA64(bool hugePagesEnable);
|
||||
explicit JitCompilerA64(bool hugePagesEnable, bool optimizedInitDatasetEnable);
|
||||
~JitCompilerA64();
|
||||
|
||||
void prepare() {}
|
||||
|
@ -58,16 +59,32 @@ namespace randomx {
|
|||
|
||||
void generateDatasetInitCode() {}
|
||||
|
||||
ProgramFunc* getProgramFunc() { return reinterpret_cast<ProgramFunc*>(code); }
|
||||
DatasetInitFunc* getDatasetInitFunc();
|
||||
inline ProgramFunc *getProgramFunc() const {
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
enableExecution();
|
||||
# endif
|
||||
|
||||
return reinterpret_cast<ProgramFunc*>(code);
|
||||
}
|
||||
|
||||
DatasetInitFunc* getDatasetInitFunc() const;
|
||||
uint8_t* getCode() { return code; }
|
||||
size_t getCodeSize();
|
||||
|
||||
void enableWriting() const;
|
||||
void enableExecution() const;
|
||||
|
||||
static InstructionGeneratorA64 engine[256];
|
||||
uint32_t reg_changed_offset[8];
|
||||
uint8_t* code;
|
||||
|
||||
private:
|
||||
const bool hugePages;
|
||||
uint32_t reg_changed_offset[8]{};
|
||||
uint8_t* code = nullptr;
|
||||
uint32_t literalPos;
|
||||
uint32_t num32bitLiterals;
|
||||
uint32_t num32bitLiterals = 0;
|
||||
size_t allocatedSize = 0;
|
||||
|
||||
void allocate(size_t size);
|
||||
|
||||
static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos)
|
||||
{
|
||||
|
@ -90,6 +107,7 @@ namespace randomx {
|
|||
template<uint32_t tmp_reg_fp>
|
||||
void emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos);
|
||||
|
||||
public:
|
||||
void h_IADD_RS(Instruction&, uint32_t&);
|
||||
void h_IADD_M(Instruction&, uint32_t&);
|
||||
void h_ISUB_R(Instruction&, uint32_t&);
|
||||
|
|
|
@ -35,3 +35,6 @@ void randomx_set_huge_pages_jit(bool)
|
|||
{
|
||||
}
|
||||
|
||||
void randomx_set_optimized_dataset_init(int)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
|
@ -41,7 +43,7 @@ namespace randomx {
|
|||
|
||||
class JitCompilerFallback {
|
||||
public:
|
||||
explicit JitCompilerFallback(bool) {
|
||||
explicit JitCompilerFallback(bool, bool) {
|
||||
throw std::runtime_error("JIT compilation is not supported on this platform");
|
||||
}
|
||||
void prepare() {}
|
||||
|
@ -70,5 +72,7 @@ namespace randomx {
|
|||
size_t getCodeSize() {
|
||||
return 0;
|
||||
}
|
||||
void enableWriting() {}
|
||||
void enableExecution() {}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
|
@ -30,14 +32,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <cstring>
|
||||
#include <climits>
|
||||
#include <atomic>
|
||||
|
||||
#include "crypto/randomx/jit_compiler_x86.hpp"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
#include "crypto/randomx/jit_compiler_x86_static.hpp"
|
||||
#include "crypto/randomx/superscalar.hpp"
|
||||
#include "crypto/randomx/program.hpp"
|
||||
#include "crypto/randomx/reciprocal.h"
|
||||
#include "crypto/randomx/superscalar.hpp"
|
||||
#include "crypto/randomx/virtual_memory.hpp"
|
||||
#include "base/tools/Profiler.h"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
#ifdef XMRIG_FIX_RYZEN
|
||||
# include "crypto/rx/Rx.h"
|
||||
|
@ -45,17 +49,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
#else
|
||||
# include <cpuid.h>
|
||||
#endif
|
||||
|
||||
static bool hugePagesJIT = false;
|
||||
static int optimizedDatasetInit = -1;
|
||||
|
||||
void randomx_set_huge_pages_jit(bool hugePages)
|
||||
{
|
||||
hugePagesJIT = hugePages;
|
||||
}
|
||||
|
||||
void randomx_set_optimized_dataset_init(int value)
|
||||
{
|
||||
optimizedDatasetInit = value;
|
||||
}
|
||||
|
||||
namespace randomx {
|
||||
/*
|
||||
|
||||
|
@ -112,6 +120,11 @@ namespace randomx {
|
|||
#define codeReadDatasetLightSshInit ADDR(randomx_program_read_dataset_sshash_init)
|
||||
#define codeReadDatasetLightSshFin ADDR(randomx_program_read_dataset_sshash_fin)
|
||||
#define codeDatasetInit ADDR(randomx_dataset_init)
|
||||
#define codeDatasetInitAVX2_prologue ADDR(randomx_dataset_init_avx2_prologue)
|
||||
#define codeDatasetInitAVX2_loop_end ADDR(randomx_dataset_init_avx2_loop_end)
|
||||
#define codeDatasetInitAVX2_loop_epilogue ADDR(randomx_dataset_init_avx2_epilogue)
|
||||
#define codeDatasetInitAVX2_ssh_load ADDR(randomx_dataset_init_avx2_ssh_load)
|
||||
#define codeDatasetInitAVX2_ssh_prefetch ADDR(randomx_dataset_init_avx2_ssh_prefetch)
|
||||
#define codeLoopStore ADDR(randomx_program_loop_store)
|
||||
#define codeLoopEnd ADDR(randomx_program_loop_end)
|
||||
#define codeEpilogue ADDR(randomx_program_epilogue)
|
||||
|
@ -128,7 +141,12 @@ namespace randomx {
|
|||
#define readDatasetLightInitSize (codeReadDatasetLightSshFin - codeReadDatasetLightSshInit)
|
||||
#define readDatasetLightFinSize (codeLoopStore - codeReadDatasetLightSshFin)
|
||||
#define loopStoreSize (codeLoopEnd - codeLoopStore)
|
||||
#define datasetInitSize (codeEpilogue - codeDatasetInit)
|
||||
#define datasetInitSize (codeDatasetInitAVX2_prologue - codeDatasetInit)
|
||||
#define datasetInitAVX2_prologue_size (codeDatasetInitAVX2_loop_end - codeDatasetInitAVX2_prologue)
|
||||
#define datasetInitAVX2_loop_end_size (codeDatasetInitAVX2_loop_epilogue - codeDatasetInitAVX2_loop_end)
|
||||
#define datasetInitAVX2_epilogue_size (codeDatasetInitAVX2_ssh_load - codeDatasetInitAVX2_loop_epilogue)
|
||||
#define datasetInitAVX2_ssh_load_size (codeDatasetInitAVX2_ssh_prefetch - codeDatasetInitAVX2_ssh_load)
|
||||
#define datasetInitAVX2_ssh_prefetch_size (codeEpilogue - codeDatasetInitAVX2_ssh_prefetch)
|
||||
#define epilogueSize (codeShhLoad - codeEpilogue)
|
||||
#define codeSshLoadSize (codeShhPrefetch - codeShhLoad)
|
||||
#define codeSshPrefetchSize (codeShhEnd - codeShhPrefetch)
|
||||
|
@ -166,20 +184,27 @@ namespace randomx {
|
|||
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
};
|
||||
|
||||
static inline uint8_t* alignToPage(uint8_t* p, size_t pageSize) {
|
||||
size_t k = (size_t) p;
|
||||
k -= k % pageSize;
|
||||
return (uint8_t*) k;
|
||||
}
|
||||
|
||||
size_t JitCompilerX86::getCodeSize() {
|
||||
return codePos < prologueSize ? 0 : codePos - prologueSize;
|
||||
}
|
||||
|
||||
static inline void cpuid(uint32_t level, int32_t output[4])
|
||||
{
|
||||
memset(output, 0, sizeof(int32_t) * 4);
|
||||
void JitCompilerX86::enableWriting() const {
|
||||
uint8_t* p1 = alignToPage(code, 4096);
|
||||
uint8_t* p2 = code + CodeSize;
|
||||
xmrig::VirtualMemory::protectRW(p1, p2 - p1);
|
||||
}
|
||||
|
||||
# ifdef _MSC_VER
|
||||
__cpuid(output, static_cast<int>(level));
|
||||
# else
|
||||
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
|
||||
# endif
|
||||
}
|
||||
void JitCompilerX86::enableExecution() const {
|
||||
uint8_t* p1 = alignToPage(code, 4096);
|
||||
uint8_t* p2 = code + CodeSize;
|
||||
xmrig::VirtualMemory::protectRX(p1, p2 - p1);
|
||||
}
|
||||
|
||||
# ifdef _MSC_VER
|
||||
static FORCE_INLINE uint32_t rotl32(uint32_t a, int shift) { return _rotl(a, shift); }
|
||||
|
@ -190,17 +215,68 @@ namespace randomx {
|
|||
static std::atomic<size_t> codeOffset;
|
||||
constexpr size_t codeOffsetIncrement = 59 * 64;
|
||||
|
||||
JitCompilerX86::JitCompilerX86(bool hugePagesEnable) {
|
||||
JitCompilerX86::JitCompilerX86(bool hugePagesEnable, bool optimizedInitDatasetEnable) {
|
||||
BranchesWithin32B = xmrig::Cpu::info()->jccErratum();
|
||||
|
||||
int32_t info[4];
|
||||
cpuid(1, info);
|
||||
hasAVX = ((info[2] & (1 << 27)) != 0) && ((info[2] & (1 << 28)) != 0);
|
||||
hasAVX = xmrig::Cpu::info()->hasAVX();
|
||||
hasAVX2 = xmrig::Cpu::info()->hasAVX2();
|
||||
|
||||
cpuid(0x80000001, info);
|
||||
hasXOP = ((info[2] & (1 << 11)) != 0);
|
||||
// Disable by default
|
||||
initDatasetAVX2 = false;
|
||||
|
||||
allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2, hugePagesJIT && hugePagesEnable);
|
||||
if (optimizedInitDatasetEnable) {
|
||||
// Dataset init using AVX2:
|
||||
// -1 = Auto detect
|
||||
// 0 = Always disabled
|
||||
// +1 = Always enabled
|
||||
if (optimizedDatasetInit > 0) {
|
||||
initDatasetAVX2 = true;
|
||||
}
|
||||
else if (optimizedDatasetInit < 0) {
|
||||
xmrig::ICpuInfo::Vendor vendor = xmrig::Cpu::info()->vendor();
|
||||
xmrig::ICpuInfo::Arch arch = xmrig::Cpu::info()->arch();
|
||||
|
||||
if (vendor == xmrig::ICpuInfo::VENDOR_INTEL) {
|
||||
// AVX2 init is faster on Intel CPUs without HT
|
||||
initDatasetAVX2 = (xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads());
|
||||
}
|
||||
else if (vendor == xmrig::ICpuInfo::VENDOR_AMD) {
|
||||
switch (arch) {
|
||||
case xmrig::ICpuInfo::ARCH_ZEN:
|
||||
case xmrig::ICpuInfo::ARCH_ZEN_PLUS:
|
||||
default:
|
||||
// AVX2 init is slower on Zen/Zen+
|
||||
// Also disable it for other unknown architectures
|
||||
initDatasetAVX2 = false;
|
||||
break;
|
||||
case xmrig::ICpuInfo::ARCH_ZEN2:
|
||||
// AVX2 init is faster on Zen2 without SMT (mobile CPUs)
|
||||
initDatasetAVX2 = (xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads());
|
||||
break;
|
||||
case xmrig::ICpuInfo::ARCH_ZEN3:
|
||||
// AVX2 init is faster on Zen3
|
||||
initDatasetAVX2 = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sorry, low-end Intel CPUs
|
||||
if (!hasAVX2) {
|
||||
initDatasetAVX2 = false;
|
||||
}
|
||||
|
||||
hasXOP = xmrig::Cpu::info()->hasXOP();
|
||||
|
||||
allocatedSize = initDatasetAVX2 ? (CodeSize * 4) : (CodeSize * 2);
|
||||
allocatedCode = static_cast<uint8_t*>(allocExecutableMemory(allocatedSize,
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
false
|
||||
# else
|
||||
hugePagesJIT && hugePagesEnable
|
||||
# endif
|
||||
));
|
||||
|
||||
// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
|
||||
code = allocatedCode + (codeOffset.fetch_add(codeOffsetIncrement) % CodeSize);
|
||||
|
@ -224,7 +300,7 @@ namespace randomx {
|
|||
|
||||
JitCompilerX86::~JitCompilerX86() {
|
||||
codeOffset.fetch_sub(codeOffsetIncrement);
|
||||
freePagedMemory(allocatedCode, CodeSize);
|
||||
freePagedMemory(allocatedCode, allocatedSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::prepare() {
|
||||
|
@ -237,6 +313,10 @@ namespace randomx {
|
|||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg, uint32_t flags) {
|
||||
PROFILE_SCOPE(RandomX_JIT_compile);
|
||||
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
enableWriting();
|
||||
# endif
|
||||
|
||||
vm_flags = flags;
|
||||
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
|
@ -271,14 +351,49 @@ namespace randomx {
|
|||
|
||||
template<size_t N>
|
||||
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
|
||||
uint8_t* p = code;
|
||||
if (initDatasetAVX2) {
|
||||
codePos = 0;
|
||||
emit(codeDatasetInitAVX2_prologue, datasetInitAVX2_prologue_size, code, codePos);
|
||||
|
||||
for (unsigned j = 0; j < RandomX_CurrentConfig.CacheAccesses; ++j) {
|
||||
SuperscalarProgram& prog = programs[j];
|
||||
uint32_t pos = codePos;
|
||||
for (uint32_t i = 0, n = prog.getSize(); i < n; ++i) {
|
||||
generateSuperscalarCode<true>(prog(i), p, pos);
|
||||
}
|
||||
codePos = pos;
|
||||
emit(codeShhLoad, codeSshLoadSize, code, codePos);
|
||||
emit(codeDatasetInitAVX2_ssh_load, datasetInitAVX2_ssh_load_size, code, codePos);
|
||||
if (j < RandomX_CurrentConfig.CacheAccesses - 1) {
|
||||
*(uint32_t*)(code + codePos) = 0xd88b49 + (static_cast<uint32_t>(prog.getAddressRegister()) << 16);
|
||||
codePos += 3;
|
||||
emit(RandomX_CurrentConfig.codeShhPrefetchTweaked, codeSshPrefetchSize, code, codePos);
|
||||
uint8_t* p = code + codePos;
|
||||
emit(codeDatasetInitAVX2_ssh_prefetch, datasetInitAVX2_ssh_prefetch_size, code, codePos);
|
||||
p[3] += prog.getAddressRegister() << 3;
|
||||
}
|
||||
}
|
||||
|
||||
emit(codeDatasetInitAVX2_loop_end, datasetInitAVX2_loop_end_size, code, codePos);
|
||||
|
||||
// Number of bytes from the start of randomx_dataset_init_avx2_prologue to loop_begin label
|
||||
constexpr int32_t prologue_size = 320;
|
||||
*(int32_t*)(code + codePos - 4) = prologue_size - codePos;
|
||||
|
||||
emit(codeDatasetInitAVX2_loop_epilogue, datasetInitAVX2_epilogue_size, code, codePos);
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
|
||||
codePos = superScalarHashOffset + codeSshInitSize;
|
||||
for (unsigned j = 0; j < RandomX_CurrentConfig.CacheAccesses; ++j) {
|
||||
SuperscalarProgram& prog = programs[j];
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
generateSuperscalarCode(instr);
|
||||
uint32_t pos = codePos;
|
||||
for (uint32_t i = 0, n = prog.getSize(); i < n; ++i) {
|
||||
generateSuperscalarCode<false>(prog(i), p, pos);
|
||||
}
|
||||
codePos = pos;
|
||||
emit(codeShhLoad, codeSshLoadSize, code, codePos);
|
||||
if (j < RandomX_CurrentConfig.CacheAccesses - 1) {
|
||||
*(uint32_t*)(code + codePos) = 0xd88b49 + (static_cast<uint32_t>(prog.getAddressRegister()) << 16);
|
||||
|
@ -293,7 +408,10 @@ namespace randomx {
|
|||
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_MAX_ACCESSES]);
|
||||
|
||||
void JitCompilerX86::generateDatasetInitCode() {
|
||||
memcpy(code, codeDatasetInit, datasetInitSize);
|
||||
// AVX2 code is generated in generateSuperscalarHash()
|
||||
if (!initDatasetAVX2) {
|
||||
memcpy(code, codeDatasetInit, datasetInitSize);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
|
||||
|
@ -372,101 +490,243 @@ namespace randomx {
|
|||
emit32(epilogueOffset - codePos - 4, code, codePos);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateSuperscalarCode(Instruction& instr) {
|
||||
static constexpr uint8_t REX_SUB_RR[] = { 0x4d, 0x2b };
|
||||
static constexpr uint8_t REX_MOV_RR64[] = { 0x49, 0x8b };
|
||||
static constexpr uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b };
|
||||
static constexpr uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf };
|
||||
static constexpr uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf };
|
||||
static constexpr uint8_t REX_MUL_R[] = { 0x49, 0xf7 };
|
||||
static constexpr uint8_t REX_81[] = { 0x49, 0x81 };
|
||||
static constexpr uint8_t MOV_RAX_I[] = { 0x48, 0xb8 };
|
||||
static constexpr uint8_t REX_LEA[] = { 0x4f, 0x8d };
|
||||
static constexpr uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
|
||||
static constexpr uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
|
||||
static constexpr uint8_t REX_ROT_I8[] = { 0x49, 0xc1 };
|
||||
|
||||
template<bool AVX2>
|
||||
FORCE_INLINE void JitCompilerX86::generateSuperscalarCode(Instruction& instr, uint8_t* code, uint32_t& codePos) {
|
||||
switch ((SuperscalarInstructionType)instr.opcode)
|
||||
{
|
||||
case randomx::SuperscalarInstructionType::ISUB_R:
|
||||
emit(REX_SUB_RR, code, codePos);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos);
|
||||
*(uint32_t*)(code + codePos) = 0x00C02B4DUL + (instr.dst << 19) + (instr.src << 16);
|
||||
codePos += 3;
|
||||
if (AVX2) {
|
||||
emit32(0xC0FBFDC5UL + (instr.src << 24) + (instr.dst << 27) - (instr.dst << 11), code, codePos);
|
||||
}
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_R:
|
||||
emit(REX_XOR_RR, code, codePos);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos);
|
||||
*(uint32_t*)(code + codePos) = 0x00C0334DUL + (instr.dst << 19) + (instr.src << 16);
|
||||
codePos += 3;
|
||||
if (AVX2) {
|
||||
emit32(0xC0EFFDC5UL + (instr.src << 24) + (instr.dst << 27) - (instr.dst << 11), code, codePos);
|
||||
}
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_RS:
|
||||
emit(REX_LEA, code, codePos);
|
||||
emitByte(0x04 + 8 * instr.dst, code, codePos);
|
||||
genSIB(instr.getModShift(), instr.src, instr.dst, code, codePos);
|
||||
emit32(0x00048D4F + (instr.dst << 19) + (genSIB(instr.getModShift(), instr.src, instr.dst) << 24), code, codePos);
|
||||
if (AVX2) {
|
||||
if (instr.getModShift()) {
|
||||
static const uint8_t t[] = { 0xC5, 0xBD, 0x73, 0xF0, 0x00, 0xC5, 0xBD, 0xD4, 0xC0 };
|
||||
uint8_t* p = code + codePos;
|
||||
emit(t, code, codePos);
|
||||
p[3] += instr.src;
|
||||
p[4] = instr.getModShift();
|
||||
p[8] += instr.dst * 9;
|
||||
}
|
||||
else {
|
||||
emit32(0xC0D4FDC5UL + (instr.src << 24) + (instr.dst << 27) - (instr.dst << 11), code, codePos);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMUL_R:
|
||||
emit(REX_IMUL_RR, code, codePos);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos);
|
||||
emit32(0xC0AF0F4DUL + (instr.dst << 27) + (instr.src << 24), code, codePos);
|
||||
if (AVX2) {
|
||||
static const uint8_t t[] = {
|
||||
0xC5, 0xBD, 0x73, 0xD0, 0x20,
|
||||
0xC5, 0xB5, 0x73, 0xD0, 0x20,
|
||||
0xC5, 0x7D, 0xF4, 0xD0,
|
||||
0xC5, 0x35, 0xF4, 0xD8,
|
||||
0xC5, 0xBD, 0xF4, 0xC0,
|
||||
0xC4, 0xC1, 0x25, 0x73, 0xF3, 0x20,
|
||||
0xC5, 0xFD, 0x73, 0xF0, 0x20,
|
||||
0xC4, 0x41, 0x2D, 0xD4, 0xD3,
|
||||
0xC5, 0xAD, 0xD4, 0xC0
|
||||
};
|
||||
uint8_t* p = code + codePos;
|
||||
emit(t, code, codePos);
|
||||
p[3] += instr.dst;
|
||||
p[8] += instr.src;
|
||||
p[11] -= instr.dst * 8;
|
||||
p[13] += instr.src;
|
||||
p[17] += instr.dst;
|
||||
p[21] += instr.dst * 8 + instr.src;
|
||||
p[29] -= instr.dst * 8;
|
||||
p[31] += instr.dst;
|
||||
p[41] += instr.dst * 9;
|
||||
}
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IROR_C:
|
||||
emit(REX_ROT_I8, code, codePos);
|
||||
emitByte(0xc8 + instr.dst, code, codePos);
|
||||
emitByte(instr.getImm32() & 63, code, codePos);
|
||||
{
|
||||
const uint32_t shift = instr.getImm32() & 63;
|
||||
emit32(0x00C8C149UL + (instr.dst << 16) + (shift << 24), code, codePos);
|
||||
if (AVX2) {
|
||||
static const uint8_t t[] = { 0xC5, 0xBD, 0x73, 0xD0, 0x00, 0xC5, 0xB5, 0x73, 0xF0, 0x00, 0xC4, 0xC1, 0x3D, 0xEB, 0xC1 };
|
||||
uint8_t* p = code + codePos;
|
||||
emit(t, code, codePos);
|
||||
p[3] += instr.dst;
|
||||
p[4] = shift;
|
||||
p[8] += instr.dst;
|
||||
p[9] = 64 - shift;
|
||||
p[14] += instr.dst * 8;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C7:
|
||||
emit(REX_81, code, codePos);
|
||||
emitByte(0xc0 + instr.dst, code, codePos);
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
case randomx::SuperscalarInstructionType::IADD_C8:
|
||||
case randomx::SuperscalarInstructionType::IADD_C9:
|
||||
if (AVX2) {
|
||||
static const uint8_t t[] = { 0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4C, 0x03, 0xC0, 0xC4, 0x62, 0x7D, 0x19, 0x05, 0xEC, 0xFF, 0xFF, 0xFF, 0xC4, 0xC1, 0x7D, 0xD4, 0xC0 };
|
||||
uint8_t* p = code + codePos;
|
||||
emit(t, code, codePos);
|
||||
*(uint64_t*)(p + 2) = signExtend2sCompl(instr.getImm32());
|
||||
p[12] += instr.dst * 8;
|
||||
p[24] -= instr.dst * 8;
|
||||
p[26] += instr.dst * 8;
|
||||
}
|
||||
else {
|
||||
*(uint32_t*)(code + codePos) = 0x00C08149UL + (instr.dst << 16);
|
||||
codePos += 3;
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
}
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C7:
|
||||
emit(REX_XOR_RI, code, codePos);
|
||||
emitByte(0xf0 + instr.dst, code, codePos);
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C8:
|
||||
emit(REX_81, code, codePos);
|
||||
emitByte(0xc0 + instr.dst, code, codePos);
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C8:
|
||||
emit(REX_XOR_RI, code, codePos);
|
||||
emitByte(0xf0 + instr.dst, code, codePos);
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C9:
|
||||
emit(REX_81, code, codePos);
|
||||
emitByte(0xc0 + instr.dst, code, codePos);
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C9:
|
||||
emit(REX_XOR_RI, code, codePos);
|
||||
emitByte(0xf0 + instr.dst, code, codePos);
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
if (AVX2) {
|
||||
static const uint8_t t[] = { 0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4C, 0x33, 0xC0, 0xC4, 0x62, 0x7D, 0x19, 0x05, 0xEC, 0xFF, 0xFF, 0xFF, 0xC4, 0xC1, 0x7D, 0xEF, 0xC0 };
|
||||
uint8_t* p = code + codePos;
|
||||
emit(t, code, codePos);
|
||||
*(uint64_t*)(p + 2) = signExtend2sCompl(instr.getImm32());
|
||||
p[12] += instr.dst * 8;
|
||||
p[24] -= instr.dst * 8;
|
||||
p[26] += instr.dst * 8;
|
||||
}
|
||||
else {
|
||||
*(uint32_t*)(code + codePos) = 0x00F08149UL + (instr.dst << 16);
|
||||
codePos += 3;
|
||||
emit32(instr.getImm32(), code, codePos);
|
||||
}
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMULH_R:
|
||||
emit(REX_MOV_RR64, code, codePos);
|
||||
emitByte(0xc0 + instr.dst, code, codePos);
|
||||
emit(REX_MUL_R, code, codePos);
|
||||
emitByte(0xe0 + instr.src, code, codePos);
|
||||
emit(REX_MOV_R64R, code, codePos);
|
||||
emitByte(0xc2 + 8 * instr.dst, code, codePos);
|
||||
*(uint32_t*)(code + codePos) = 0x00C08B49UL + (instr.dst << 16);
|
||||
codePos += 3;
|
||||
*(uint32_t*)(code + codePos) = 0x00E0F749UL + (instr.src << 16);
|
||||
codePos += 3;
|
||||
*(uint32_t*)(code + codePos) = 0x00C28B4CUL + (instr.dst << 19);
|
||||
codePos += 3;
|
||||
if (AVX2) {
|
||||
static const uint8_t t[] = {
|
||||
0xC5, 0xBD, 0x73, 0xD0, 0x20,
|
||||
0xC5, 0xB5, 0x73, 0xD0, 0x20,
|
||||
0xC5, 0x7D, 0xF4, 0xD0,
|
||||
0xC5, 0x3D, 0xF4, 0xD8,
|
||||
0xC4, 0x41, 0x7D, 0xF4, 0xE1,
|
||||
0xC4, 0xC1, 0x3D, 0xF4, 0xC1,
|
||||
0xC4, 0xC1, 0x2D, 0x73, 0xD2, 0x20,
|
||||
0xC4, 0x41, 0x25, 0xEF, 0xC6,
|
||||
0xC4, 0x41, 0x25, 0xD4, 0xDC,
|
||||
0xC4, 0x41, 0x25, 0xD4, 0xDA,
|
||||
0xC4, 0x41, 0x25, 0xEF, 0xCE,
|
||||
0xC4, 0x42, 0x3D, 0x37, 0xC1,
|
||||
0xC4, 0x41, 0x3D, 0xDB, 0xC7,
|
||||
0xC5, 0xBD, 0xD4, 0xC0,
|
||||
0xC4, 0xC1, 0x25, 0x73, 0xD3, 0x20,
|
||||
0xC5, 0xA5, 0xD4, 0xC0
|
||||
};
|
||||
uint8_t* p = code + codePos;
|
||||
emit(t, code, codePos);
|
||||
p[3] += instr.dst;
|
||||
p[8] += instr.src;
|
||||
p[11] -= instr.dst * 8;
|
||||
p[13] += instr.src;
|
||||
p[17] += instr.src;
|
||||
p[20] -= instr.dst * 8;
|
||||
p[27] += instr.dst * 8;
|
||||
p[67] += instr.dst * 9;
|
||||
p[77] += instr.dst * 9;
|
||||
}
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::ISMULH_R:
|
||||
emit(REX_MOV_RR64, code, codePos);
|
||||
emitByte(0xc0 + instr.dst, code, codePos);
|
||||
emit(REX_MUL_R, code, codePos);
|
||||
emitByte(0xe8 + instr.src, code, codePos);
|
||||
emit(REX_MOV_R64R, code, codePos);
|
||||
emitByte(0xc2 + 8 * instr.dst, code, codePos);
|
||||
*(uint32_t*)(code + codePos) = 0x00C08B49UL + (instr.dst << 16);
|
||||
codePos += 3;
|
||||
*(uint32_t*)(code + codePos) = 0x00E8F749UL + (instr.src << 16);
|
||||
codePos += 3;
|
||||
*(uint32_t*)(code + codePos) = 0x00C28B4CUL + (instr.dst << 19);
|
||||
codePos += 3;
|
||||
if (AVX2) {
|
||||
static const uint8_t t[] = {
|
||||
0xC5, 0xBD, 0x73, 0xD0, 0x20,
|
||||
0xC5, 0xB5, 0x73, 0xD0, 0x20,
|
||||
0xC5, 0x7D, 0xF4, 0xD0,
|
||||
0xC5, 0x3D, 0xF4, 0xD8,
|
||||
0xC4, 0x41, 0x7D, 0xF4, 0xE1,
|
||||
0xC4, 0x41, 0x3D, 0xF4, 0xE9,
|
||||
0xC4, 0xC1, 0x2D, 0x73, 0xD2, 0x20,
|
||||
0xC4, 0x41, 0x25, 0xEF, 0xC6,
|
||||
0xC4, 0x41, 0x25, 0xD4, 0xDC,
|
||||
0xC4, 0x41, 0x25, 0xD4, 0xDA,
|
||||
0xC4, 0x41, 0x25, 0xEF, 0xCE,
|
||||
0xC4, 0x42, 0x3D, 0x37, 0xC1,
|
||||
0xC4, 0x41, 0x3D, 0xDB, 0xC7,
|
||||
0xC4, 0x41, 0x15, 0xD4, 0xE8,
|
||||
0xC4, 0xC1, 0x25, 0x73, 0xD3, 0x20,
|
||||
0xC4, 0x41, 0x15, 0xD4, 0xC3,
|
||||
0xC4, 0x41, 0x35, 0xEF, 0xC9,
|
||||
0xC4, 0x62, 0x35, 0x37, 0xD0,
|
||||
0xC4, 0x62, 0x35, 0x37, 0xD8,
|
||||
0xC5, 0x2D, 0xDB, 0xD0,
|
||||
0xC5, 0x25, 0xDB, 0xD8,
|
||||
0xC4, 0x41, 0x3D, 0xFB, 0xC2,
|
||||
0xC4, 0xC1, 0x3D, 0xFB, 0xC3
|
||||
};
|
||||
uint8_t* p = code + codePos;
|
||||
emit(t, code, codePos);
|
||||
p[3] += instr.dst;
|
||||
p[8] += instr.src;
|
||||
p[11] -= instr.dst * 8;
|
||||
p[13] += instr.src;
|
||||
p[17] += instr.src;
|
||||
p[20] -= instr.dst * 8;
|
||||
p[89] += instr.dst;
|
||||
p[94] += instr.src;
|
||||
p[98] += instr.src;
|
||||
p[102] += instr.dst;
|
||||
p[112] += instr.dst * 8;
|
||||
}
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMUL_RCP:
|
||||
emit(MOV_RAX_I, code, codePos);
|
||||
*(uint32_t*)(code + codePos) = 0x0000B848UL;
|
||||
codePos += 2;
|
||||
emit64(randomx_reciprocal_fast(instr.getImm32()), code, codePos);
|
||||
emit(REX_IMUL_RM, code, codePos);
|
||||
emitByte(0xc0 + 8 * instr.dst, code, codePos);
|
||||
emit32(0xC0AF0F4CUL + (instr.dst << 27), code, codePos);
|
||||
if (AVX2) {
|
||||
static const uint8_t t[] = {
|
||||
0xC4, 0x62, 0x7D, 0x19, 0x25, 0xEB, 0xFF, 0xFF, 0xFF,
|
||||
0xC5, 0xBD, 0x73, 0xD0, 0x20,
|
||||
0xC4, 0xC1, 0x35, 0x73, 0xD4, 0x20,
|
||||
0xC4, 0x41, 0x7D, 0xF4, 0xD4,
|
||||
0xC5, 0x35, 0xF4, 0xD8,
|
||||
0xC4, 0xC1, 0x3D, 0xF4, 0xC4,
|
||||
0xC4, 0xC1, 0x25, 0x73, 0xF3, 0x20,
|
||||
0xC5, 0xFD, 0x73, 0xF0, 0x20,
|
||||
0xC4, 0x41, 0x2D, 0xD4, 0xD3,
|
||||
0xC5, 0xAD, 0xD4, 0xC0
|
||||
};
|
||||
uint8_t* p = code + codePos;
|
||||
emit(t, code, codePos);
|
||||
p[12] += instr.dst;
|
||||
p[22] -= instr.dst * 8;
|
||||
p[28] += instr.dst;
|
||||
p[33] += instr.dst * 8;
|
||||
p[41] -= instr.dst * 8;
|
||||
p[43] += instr.dst;
|
||||
p[53] += instr.dst * 9;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
template void JitCompilerX86::generateSuperscalarCode<false>(Instruction&, uint8_t*, uint32_t&);
|
||||
template void JitCompilerX86::generateSuperscalarCode<true>(Instruction&, uint8_t*, uint32_t&);
|
||||
|
||||
template<bool rax>
|
||||
FORCE_INLINE void JitCompilerX86::genAddressReg(const Instruction& instr, const uint32_t src, uint8_t* code, uint32_t& codePos) {
|
||||
*(uint32_t*)(code + codePos) = (rax ? 0x24808d41 : 0x24888d41) + (src << 16);
|
||||
|
@ -546,10 +806,6 @@ namespace randomx {
|
|||
codePos = pos;
|
||||
}
|
||||
|
||||
void JitCompilerX86::genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos) {
|
||||
emitByte((scale << 6) | (index << 3) | base, code, codePos);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISUB_R(const Instruction& instr) {
|
||||
uint8_t* const p = code;
|
||||
uint32_t pos = codePos;
|
||||
|
@ -1088,11 +1344,11 @@ namespace randomx {
|
|||
pos += 14;
|
||||
|
||||
if (jmp_offset >= -128) {
|
||||
*(uint32_t*)(p + pos) = 0x74 + (jmp_offset << 8);
|
||||
*(uint32_t*)(p + pos) = 0x74 + (static_cast<uint32_t>(jmp_offset) << 8);
|
||||
pos += 2;
|
||||
}
|
||||
else {
|
||||
*(uint64_t*)(p + pos) = 0x840f + ((static_cast<int64_t>(jmp_offset) - 4) << 16);
|
||||
*(uint64_t*)(p + pos) = 0x840f + (static_cast<uint64_t>(jmp_offset - 4) << 16);
|
||||
pos += 6;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
|
@ -47,7 +49,7 @@ namespace randomx {
|
|||
|
||||
class JitCompilerX86 {
|
||||
public:
|
||||
explicit JitCompilerX86(bool hugePagesEnable);
|
||||
explicit JitCompilerX86(bool hugePagesEnable, bool optimizedInitDatasetEnable);
|
||||
~JitCompilerX86();
|
||||
void prepare();
|
||||
void generateProgram(Program&, ProgramConfiguration&, uint32_t);
|
||||
|
@ -55,24 +57,38 @@ namespace randomx {
|
|||
template<size_t N>
|
||||
void generateSuperscalarHash(SuperscalarProgram (&programs)[N]);
|
||||
void generateDatasetInitCode();
|
||||
ProgramFunc* getProgramFunc() {
|
||||
return (ProgramFunc*)code;
|
||||
|
||||
inline ProgramFunc *getProgramFunc() const {
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
enableExecution();
|
||||
# endif
|
||||
|
||||
return reinterpret_cast<ProgramFunc*>(code);
|
||||
}
|
||||
DatasetInitFunc* getDatasetInitFunc() {
|
||||
|
||||
inline DatasetInitFunc *getDatasetInitFunc() const {
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
enableExecution();
|
||||
# endif
|
||||
|
||||
return (DatasetInitFunc*)code;
|
||||
}
|
||||
|
||||
uint8_t* getCode() {
|
||||
return code;
|
||||
}
|
||||
size_t getCodeSize();
|
||||
void enableWriting() const;
|
||||
void enableExecution() const;
|
||||
|
||||
alignas(64) static InstructionGeneratorX86 engine[256];
|
||||
|
||||
int registerUsage[RegistersCount];
|
||||
uint8_t* code;
|
||||
uint32_t codePos;
|
||||
uint32_t codePosFirst;
|
||||
uint32_t vm_flags;
|
||||
private:
|
||||
int registerUsage[RegistersCount] = {};
|
||||
uint8_t* code = nullptr;
|
||||
uint32_t codePos = 0;
|
||||
uint32_t codePosFirst = 0;
|
||||
uint32_t vm_flags = 0;
|
||||
|
||||
# ifdef XMRIG_FIX_RYZEN
|
||||
std::pair<const void*, const void*> mainLoopBounds;
|
||||
|
@ -80,9 +96,12 @@ namespace randomx {
|
|||
|
||||
bool BranchesWithin32B = false;
|
||||
bool hasAVX;
|
||||
bool hasAVX2;
|
||||
bool initDatasetAVX2;
|
||||
bool hasXOP;
|
||||
|
||||
uint8_t* allocatedCode;
|
||||
uint8_t* allocatedCode = nullptr;
|
||||
size_t allocatedSize = 0;
|
||||
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||
|
@ -90,9 +109,10 @@ namespace randomx {
|
|||
static void genAddressReg(const Instruction&, const uint32_t src, uint8_t* code, uint32_t& codePos);
|
||||
static void genAddressRegDst(const Instruction&, uint8_t* code, uint32_t& codePos);
|
||||
static void genAddressImm(const Instruction&, uint8_t* code, uint32_t& codePos);
|
||||
static void genSIB(int scale, int index, int base, uint8_t* code, uint32_t& codePos);
|
||||
static uint32_t genSIB(int scale, int index, int base) { return (scale << 6) | (index << 3) | base; }
|
||||
|
||||
void generateSuperscalarCode(Instruction &);
|
||||
template<bool AVX2>
|
||||
void generateSuperscalarCode(Instruction& inst, uint8_t* code, uint32_t& codePos);
|
||||
|
||||
static void emitByte(uint8_t val, uint8_t* code, uint32_t& codePos) {
|
||||
code[codePos] = val;
|
||||
|
@ -119,6 +139,7 @@ namespace randomx {
|
|||
codePos += count;
|
||||
}
|
||||
|
||||
public:
|
||||
void h_IADD_RS(const Instruction&);
|
||||
void h_IADD_M(const Instruction&);
|
||||
void h_ISUB_R(const Instruction&);
|
||||
|
|
|
@ -52,6 +52,11 @@
|
|||
.global DECL(randomx_program_loop_store)
|
||||
.global DECL(randomx_program_loop_end)
|
||||
.global DECL(randomx_dataset_init)
|
||||
.global DECL(randomx_dataset_init_avx2_prologue)
|
||||
.global DECL(randomx_dataset_init_avx2_loop_end)
|
||||
.global DECL(randomx_dataset_init_avx2_epilogue)
|
||||
.global DECL(randomx_dataset_init_avx2_ssh_load)
|
||||
.global DECL(randomx_dataset_init_avx2_ssh_prefetch)
|
||||
.global DECL(randomx_program_epilogue)
|
||||
.global DECL(randomx_sshash_load)
|
||||
.global DECL(randomx_sshash_prefetch)
|
||||
|
@ -192,6 +197,97 @@ call_offset:
|
|||
pop rbx
|
||||
ret
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_dataset_init_avx2_prologue):
|
||||
#include "asm/program_sshash_avx2_save_registers.inc"
|
||||
|
||||
#if defined(WINABI)
|
||||
mov rdi, qword ptr [rcx] ;# cache->memory
|
||||
mov rsi, rdx ;# dataset
|
||||
mov rbp, r8 ;# block index
|
||||
push r9 ;# max. block index
|
||||
#else
|
||||
mov rdi, qword ptr [rdi] ;# cache->memory
|
||||
;# dataset in rsi
|
||||
mov rbp, rdx ;# block index
|
||||
push rcx ;# max. block index
|
||||
#endif
|
||||
sub rsp, 40
|
||||
|
||||
jmp randomx_dataset_init_avx2_prologue_loop_begin
|
||||
#include "asm/program_sshash_avx2_constants.inc"
|
||||
|
||||
.balign 64
|
||||
randomx_dataset_init_avx2_prologue_loop_begin:
|
||||
#include "asm/program_sshash_avx2_loop_begin.inc"
|
||||
|
||||
;# init integer registers (lane 0)
|
||||
lea r8, [rbp+1]
|
||||
imul r8, qword ptr [r0_avx2_mul+rip]
|
||||
mov r9, qword ptr [r1_avx2_add+rip]
|
||||
xor r9, r8
|
||||
mov r10, qword ptr [r2_avx2_add+rip]
|
||||
xor r10, r8
|
||||
mov r11, qword ptr [r3_avx2_add+rip]
|
||||
xor r11, r8
|
||||
mov r12, qword ptr [r4_avx2_add+rip]
|
||||
xor r12, r8
|
||||
mov r13, qword ptr [r5_avx2_add+rip]
|
||||
xor r13, r8
|
||||
mov r14, qword ptr [r6_avx2_add+rip]
|
||||
xor r14, r8
|
||||
mov r15, qword ptr [r7_avx2_add+rip]
|
||||
xor r15, r8
|
||||
|
||||
;# init AVX registers (lanes 1-4)
|
||||
mov qword ptr [rsp+32], rbp
|
||||
vbroadcastsd ymm0, qword ptr [rsp+32]
|
||||
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip]
|
||||
|
||||
;# ymm0 *= r0_avx2_mul
|
||||
vbroadcastsd ymm1, qword ptr [r0_avx2_mul+rip]
|
||||
vpsrlq ymm8, ymm0, 32
|
||||
vpsrlq ymm9, ymm1, 32
|
||||
vpmuludq ymm10, ymm0, ymm1
|
||||
vpmuludq ymm11, ymm9, ymm0
|
||||
vpmuludq ymm0, ymm8, ymm1
|
||||
vpsllq ymm11, ymm11, 32
|
||||
vpsllq ymm0, ymm0, 32
|
||||
vpaddq ymm10, ymm10, ymm11
|
||||
vpaddq ymm0, ymm10, ymm0
|
||||
|
||||
vbroadcastsd ymm1, qword ptr [r1_avx2_add+rip]
|
||||
vpxor ymm1, ymm0, ymm1
|
||||
vbroadcastsd ymm2, qword ptr [r2_avx2_add+rip]
|
||||
vpxor ymm2, ymm0, ymm2
|
||||
vbroadcastsd ymm3, qword ptr [r3_avx2_add+rip]
|
||||
vpxor ymm3, ymm0, ymm3
|
||||
vbroadcastsd ymm4, qword ptr [r4_avx2_add+rip]
|
||||
vpxor ymm4, ymm0, ymm4
|
||||
vbroadcastsd ymm5, qword ptr [r5_avx2_add+rip]
|
||||
vpxor ymm5, ymm0, ymm5
|
||||
vbroadcastsd ymm6, qword ptr [r6_avx2_add+rip]
|
||||
vpxor ymm6, ymm0, ymm6
|
||||
vbroadcastsd ymm7, qword ptr [r7_avx2_add+rip]
|
||||
vpxor ymm7, ymm0, ymm7
|
||||
|
||||
vbroadcastsd ymm15, qword ptr [mul_hi_avx2_data+rip] ;# carry_bit (bit 32)
|
||||
vpsllq ymm14, ymm15, 31 ;# sign64 (bit 63)
|
||||
|
||||
;# generated SuperscalarHash code goes here
|
||||
|
||||
DECL(randomx_dataset_init_avx2_loop_end):
|
||||
#include "asm/program_sshash_avx2_loop_end.inc"
|
||||
|
||||
DECL(randomx_dataset_init_avx2_epilogue):
|
||||
#include "asm/program_sshash_avx2_epilogue.inc"
|
||||
|
||||
DECL(randomx_dataset_init_avx2_ssh_load):
|
||||
#include "asm/program_sshash_avx2_ssh_load.inc"
|
||||
|
||||
DECL(randomx_dataset_init_avx2_ssh_prefetch):
|
||||
#include "asm/program_sshash_avx2_ssh_prefetch.inc"
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_epilogue):
|
||||
#include "asm/program_epilogue_store.inc"
|
||||
|
|
|
@ -41,6 +41,11 @@ PUBLIC randomx_program_read_dataset_ryzen
|
|||
PUBLIC randomx_program_read_dataset_sshash_init
|
||||
PUBLIC randomx_program_read_dataset_sshash_fin
|
||||
PUBLIC randomx_dataset_init
|
||||
PUBLIC randomx_dataset_init_avx2_prologue
|
||||
PUBLIC randomx_dataset_init_avx2_loop_end
|
||||
PUBLIC randomx_dataset_init_avx2_epilogue
|
||||
PUBLIC randomx_dataset_init_avx2_ssh_load
|
||||
PUBLIC randomx_dataset_init_avx2_ssh_prefetch
|
||||
PUBLIC randomx_program_loop_store
|
||||
PUBLIC randomx_program_loop_end
|
||||
PUBLIC randomx_program_epilogue
|
||||
|
@ -183,6 +188,94 @@ init_block_loop:
|
|||
randomx_dataset_init ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_dataset_init_avx2_prologue PROC
|
||||
include asm/program_sshash_avx2_save_registers.inc
|
||||
|
||||
mov rdi, qword ptr [rcx] ;# cache->memory
|
||||
mov rsi, rdx ;# dataset
|
||||
mov rbp, r8 ;# block index
|
||||
push r9 ;# max. block index
|
||||
sub rsp, 40
|
||||
|
||||
jmp loop_begin
|
||||
include asm/program_sshash_avx2_constants.inc
|
||||
|
||||
ALIGN 64
|
||||
loop_begin:
|
||||
include asm/program_sshash_avx2_loop_begin.inc
|
||||
|
||||
;# init integer registers (lane 0)
|
||||
lea r8, [rbp+1]
|
||||
imul r8, qword ptr [r0_avx2_mul]
|
||||
mov r9, qword ptr [r1_avx2_add]
|
||||
xor r9, r8
|
||||
mov r10, qword ptr [r2_avx2_add]
|
||||
xor r10, r8
|
||||
mov r11, qword ptr [r3_avx2_add]
|
||||
xor r11, r8
|
||||
mov r12, qword ptr [r4_avx2_add]
|
||||
xor r12, r8
|
||||
mov r13, qword ptr [r5_avx2_add]
|
||||
xor r13, r8
|
||||
mov r14, qword ptr [r6_avx2_add]
|
||||
xor r14, r8
|
||||
mov r15, qword ptr [r7_avx2_add]
|
||||
xor r15, r8
|
||||
|
||||
;# init AVX registers (lanes 1-4)
|
||||
mov qword ptr [rsp+32], rbp
|
||||
vbroadcastsd ymm0, qword ptr [rsp+32]
|
||||
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments]
|
||||
|
||||
;# ymm0 *= r0_avx2_mul
|
||||
vbroadcastsd ymm1, qword ptr [r0_avx2_mul]
|
||||
vpsrlq ymm8, ymm0, 32
|
||||
vpsrlq ymm9, ymm1, 32
|
||||
vpmuludq ymm10, ymm0, ymm1
|
||||
vpmuludq ymm11, ymm9, ymm0
|
||||
vpmuludq ymm0, ymm8, ymm1
|
||||
vpsllq ymm11, ymm11, 32
|
||||
vpsllq ymm0, ymm0, 32
|
||||
vpaddq ymm10, ymm10, ymm11
|
||||
vpaddq ymm0, ymm10, ymm0
|
||||
|
||||
vbroadcastsd ymm1, qword ptr [r1_avx2_add]
|
||||
vpxor ymm1, ymm0, ymm1
|
||||
vbroadcastsd ymm2, qword ptr [r2_avx2_add]
|
||||
vpxor ymm2, ymm0, ymm2
|
||||
vbroadcastsd ymm3, qword ptr [r3_avx2_add]
|
||||
vpxor ymm3, ymm0, ymm3
|
||||
vbroadcastsd ymm4, qword ptr [r4_avx2_add]
|
||||
vpxor ymm4, ymm0, ymm4
|
||||
vbroadcastsd ymm5, qword ptr [r5_avx2_add]
|
||||
vpxor ymm5, ymm0, ymm5
|
||||
vbroadcastsd ymm6, qword ptr [r6_avx2_add]
|
||||
vpxor ymm6, ymm0, ymm6
|
||||
vbroadcastsd ymm7, qword ptr [r7_avx2_add]
|
||||
vpxor ymm7, ymm0, ymm7
|
||||
|
||||
vbroadcastsd ymm15, qword ptr [mul_hi_avx2_data] ;# carry_bit (bit 32)
|
||||
vpsllq ymm14, ymm15, 31 ;# sign64 (bit 63)
|
||||
randomx_dataset_init_avx2_prologue ENDP
|
||||
|
||||
;# generated SuperscalarHash code goes here
|
||||
|
||||
randomx_dataset_init_avx2_loop_end PROC
|
||||
include asm/program_sshash_avx2_loop_end.inc
|
||||
randomx_dataset_init_avx2_loop_end ENDP
|
||||
|
||||
randomx_dataset_init_avx2_epilogue PROC
|
||||
include asm/program_sshash_avx2_epilogue.inc
|
||||
randomx_dataset_init_avx2_epilogue ENDP
|
||||
|
||||
randomx_dataset_init_avx2_ssh_load PROC
|
||||
include asm/program_sshash_avx2_ssh_load.inc
|
||||
randomx_dataset_init_avx2_ssh_load ENDP
|
||||
|
||||
randomx_dataset_init_avx2_ssh_prefetch PROC
|
||||
include asm/program_sshash_avx2_ssh_prefetch.inc
|
||||
randomx_dataset_init_avx2_ssh_prefetch ENDP
|
||||
|
||||
randomx_program_epilogue PROC
|
||||
include asm/program_epilogue_store.inc
|
||||
include asm/program_epilogue_win64.inc
|
||||
|
|
|
@ -44,6 +44,11 @@ extern "C" {
|
|||
void randomx_program_loop_store();
|
||||
void randomx_program_loop_end();
|
||||
void randomx_dataset_init();
|
||||
void randomx_dataset_init_avx2_prologue();
|
||||
void randomx_dataset_init_avx2_loop_end();
|
||||
void randomx_dataset_init_avx2_epilogue();
|
||||
void randomx_dataset_init_avx2_ssh_load();
|
||||
void randomx_dataset_init_avx2_ssh_prefetch();
|
||||
void randomx_program_epilogue();
|
||||
void randomx_sshash_load();
|
||||
void randomx_sshash_prefetch();
|
||||
|
|
|
@ -53,7 +53,7 @@ extern "C" {
|
|||
#include "crypto/randomx/defyx/KangarooTwelve.h"
|
||||
}
|
||||
|
||||
#include "base/tools/Profiler.h"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
|
||||
{
|
||||
|
@ -444,9 +444,9 @@ extern "C" {
|
|||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT:
|
||||
cache->jit = new randomx::JitCompiler(false);
|
||||
cache->jit = new randomx::JitCompiler(false, true);
|
||||
cache->initialize = &randomx::initCacheCompile;
|
||||
cache->datasetInit = cache->jit->getDatasetInitFunc();
|
||||
cache->datasetInit = nullptr;
|
||||
cache->memory = memory;
|
||||
break;
|
||||
|
||||
|
|
|
@ -177,6 +177,7 @@ void randomx_apply_config(const T& config)
|
|||
|
||||
void randomx_set_scratchpad_prefetch_mode(int mode);
|
||||
void randomx_set_huge_pages_jit(bool hugePages);
|
||||
void randomx_set_optimized_dataset_init(int value);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
|
|
|
@ -196,7 +196,7 @@ namespace randomx {
|
|||
int latency_;
|
||||
int resultOp_ = 0;
|
||||
int dstOp_ = 0;
|
||||
int srcOp_;
|
||||
int srcOp_ = 0;
|
||||
|
||||
SuperscalarInstructionInfo(const char* name)
|
||||
: name_(name), type_(SuperscalarInstructionType::INVALID), latency_(0) {}
|
||||
|
@ -282,11 +282,11 @@ namespace randomx {
|
|||
return fetchNextDefault(gen);
|
||||
}
|
||||
private:
|
||||
const char* name_;
|
||||
int index_;
|
||||
const int* counts_;
|
||||
int opsCount_;
|
||||
DecoderBuffer() : index_(-1) {}
|
||||
const char* name_ = nullptr;
|
||||
int index_ = -1;
|
||||
const int* counts_ = nullptr;
|
||||
int opsCount_ = 0;
|
||||
DecoderBuffer() = default;
|
||||
static const DecoderBuffer decodeBuffer484;
|
||||
static const DecoderBuffer decodeBuffer7333;
|
||||
static const DecoderBuffer decodeBuffer3733;
|
||||
|
@ -555,10 +555,10 @@ namespace randomx {
|
|||
const SuperscalarInstructionInfo* info_;
|
||||
int src_ = -1;
|
||||
int dst_ = -1;
|
||||
int mod_;
|
||||
uint32_t imm32_;
|
||||
SuperscalarInstructionType opGroup_;
|
||||
int opGroupPar_;
|
||||
int mod_ = 0;
|
||||
uint32_t imm32_ = 0;
|
||||
SuperscalarInstructionType opGroup_ = SuperscalarInstructionType::INVALID;
|
||||
int opGroupPar_ = 0;
|
||||
bool canReuse_ = false;
|
||||
bool groupParIsSource_ = false;
|
||||
|
||||
|
|
|
@ -39,13 +39,13 @@ namespace randomx {
|
|||
Instruction& operator()(int pc) {
|
||||
return programBuffer[pc];
|
||||
}
|
||||
uint32_t getSize() {
|
||||
uint32_t getSize() const {
|
||||
return size;
|
||||
}
|
||||
void setSize(uint32_t val) {
|
||||
size = val;
|
||||
}
|
||||
int getAddressRegister() {
|
||||
int getAddressRegister() const {
|
||||
return addrReg;
|
||||
}
|
||||
void setAddressRegister(int val) {
|
||||
|
|
|
@ -30,13 +30,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
#include "crypto/randomx/virtual_machine.hpp"
|
||||
#include "crypto/randomx/common.hpp"
|
||||
#include "crypto/randomx/aes_hash.hpp"
|
||||
#include "crypto/randomx/blake2/blake2.h"
|
||||
#include "crypto/randomx/intrin_portable.h"
|
||||
#include "crypto/randomx/allocator.hpp"
|
||||
#include "crypto/randomx/blake2/blake2.h"
|
||||
#include "crypto/randomx/common.hpp"
|
||||
#include "crypto/randomx/intrin_portable.h"
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "base/tools/Profiler.h"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
randomx_vm::~randomx_vm() {
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
|
@ -28,7 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "crypto/randomx/vm_compiled.hpp"
|
||||
#include "crypto/randomx/common.hpp"
|
||||
#include "base/tools/Profiler.h"
|
||||
#include "crypto/rx/Profiler.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
|
@ -56,9 +58,9 @@ namespace randomx {
|
|||
void CompiledVm<softAes>::execute() {
|
||||
PROFILE_SCOPE(RandomX_JIT_execute);
|
||||
|
||||
#ifdef XMRIG_ARM
|
||||
# ifdef XMRIG_ARM
|
||||
memcpy(reg.f, config.eMask, sizeof(config.eMask));
|
||||
#endif
|
||||
# endif
|
||||
compiler.getProgramFunc()(reg, mem, scratchpad, RandomX_CurrentConfig.ProgramIterations);
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ namespace randomx {
|
|||
protected:
|
||||
void execute();
|
||||
|
||||
JitCompiler compiler{ true };
|
||||
JitCompiler compiler{ true, false };
|
||||
};
|
||||
|
||||
using CompiledVmDefault = CompiledVm<1>;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2018-2020, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019-2020, SChernykh <https://github.com/SChernykh>
|
||||
Copyright (c) 2019-2020, XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
|
@ -36,6 +38,11 @@ namespace randomx {
|
|||
void CompiledLightVm<softAes>::setCache(randomx_cache* cache) {
|
||||
cachePtr = cache;
|
||||
mem.memory = cache->memory;
|
||||
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
compiler.enableWriting();
|
||||
# endif
|
||||
|
||||
compiler.generateSuperscalarHash(cache->programs);
|
||||
}
|
||||
|
||||
|
@ -43,7 +50,13 @@ namespace randomx {
|
|||
void CompiledLightVm<softAes>::run(void* seed) {
|
||||
VmBase<softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
|
||||
# ifdef XMRIG_SECURE_JIT
|
||||
compiler.enableWriting();
|
||||
# endif
|
||||
|
||||
compiler.generateProgramLight(program, config, datasetOffset);
|
||||
|
||||
CompiledVm<softAes>::execute();
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue