From 633aaccd9cf274230b16a2151b8f155b0fffc049 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Sat, 19 Dec 2020 16:18:49 +0100 Subject: [PATCH] Added config option for AVX2 dataset init -1 = Auto detect 0 = Always disabled 1 = Enabled if AVX2 is supported --- src/config.json | 1 + src/core/config/Config_default.h | 1 + src/crypto/randomx/jit_compiler_a64.cpp | 8 ++- src/crypto/randomx/jit_compiler_a64.hpp | 2 +- src/crypto/randomx/jit_compiler_fallback.cpp | 3 + src/crypto/randomx/jit_compiler_fallback.hpp | 2 +- src/crypto/randomx/jit_compiler_x86.cpp | 65 ++++++++++++-------- src/crypto/randomx/jit_compiler_x86.hpp | 2 +- src/crypto/randomx/randomx.cpp | 2 +- src/crypto/randomx/randomx.h | 1 + src/crypto/randomx/vm_compiled.hpp | 2 +- src/crypto/rx/Rx.cpp | 1 + src/crypto/rx/RxConfig.cpp | 9 ++- src/crypto/rx/RxConfig.h | 13 ++-- 14 files changed, 74 insertions(+), 38 deletions(-) diff --git a/src/config.json b/src/config.json index 68fb439f..aad27326 100644 --- a/src/config.json +++ b/src/config.json @@ -16,6 +16,7 @@ "title": true, "randomx": { "init": -1, + "init-avx2": -1, "mode": "auto", "1gb-pages": false, "rdmsr": true, diff --git a/src/core/config/Config_default.h b/src/core/config/Config_default.h index 6c8106ca..94cb88d6 100644 --- a/src/core/config/Config_default.h +++ b/src/core/config/Config_default.h @@ -50,6 +50,7 @@ R"===( "colors": true, "randomx": { "init": -1, + "init-avx2": -1, "mode": "auto", "1gb-pages": false, "rdmsr": true, diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp index 50e39c50..f98e36f6 100644 --- a/src/crypto/randomx/jit_compiler_a64.cpp +++ b/src/crypto/randomx/jit_compiler_a64.cpp @@ -36,12 +36,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "crypto/randomx/virtual_memory.hpp" static bool hugePagesJIT = false; +static int optimizedDatasetInit = -1; void randomx_set_huge_pages_jit(bool hugePages) { hugePagesJIT = hugePages; } +void randomx_set_optimized_dataset_init(int value) +{ + optimizedDatasetInit = value; +} + namespace ARMV8A { constexpr uint32_t B = 0x14000000; @@ -98,7 +104,7 @@ static size_t CalcDatasetItemSize() constexpr uint32_t IntRegMap[8] = { 4, 5, 6, 7, 12, 13, 14, 15 }; -JitCompilerA64::JitCompilerA64(bool hugePagesEnable) : +JitCompilerA64::JitCompilerA64(bool hugePagesEnable, bool) : hugePages(hugePagesJIT && hugePagesEnable), literalPos(ImulRcpLiteralsEnd) { diff --git a/src/crypto/randomx/jit_compiler_a64.hpp b/src/crypto/randomx/jit_compiler_a64.hpp index faa2ac2d..32ff5166 100644 --- a/src/crypto/randomx/jit_compiler_a64.hpp +++ b/src/crypto/randomx/jit_compiler_a64.hpp @@ -47,7 +47,7 @@ namespace randomx { class JitCompilerA64 { public: - explicit JitCompilerA64(bool hugePagesEnable); + explicit JitCompilerA64(bool hugePagesEnable, bool optimizedInitDatasetEnable); ~JitCompilerA64(); void prepare() {} diff --git a/src/crypto/randomx/jit_compiler_fallback.cpp b/src/crypto/randomx/jit_compiler_fallback.cpp index 374da678..369458a4 100644 --- a/src/crypto/randomx/jit_compiler_fallback.cpp +++ b/src/crypto/randomx/jit_compiler_fallback.cpp @@ -35,3 +35,6 @@ void randomx_set_huge_pages_jit(bool) { } +void randomx_set_optimized_dataset_init(int) +{ +} diff --git a/src/crypto/randomx/jit_compiler_fallback.hpp b/src/crypto/randomx/jit_compiler_fallback.hpp index b8641197..cdf87cba 100644 --- a/src/crypto/randomx/jit_compiler_fallback.hpp +++ b/src/crypto/randomx/jit_compiler_fallback.hpp @@ -43,7 +43,7 @@ namespace randomx { class JitCompilerFallback { public: - explicit JitCompilerFallback(bool) { + explicit JitCompilerFallback(bool, bool) { throw std::runtime_error("JIT compilation is not supported on this platform"); } void prepare() {} diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 621ca9b6..5f7a83a1 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -52,12 +52,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif static bool hugePagesJIT = false; +static int optimizedDatasetInit = -1; void randomx_set_huge_pages_jit(bool hugePages) { hugePagesJIT = hugePages; } +void randomx_set_optimized_dataset_init(int value) +{ + optimizedDatasetInit = value; +} + namespace randomx { /* @@ -209,41 +215,52 @@ namespace randomx { static std::atomic codeOffset; constexpr size_t codeOffsetIncrement = 59 * 64; - JitCompilerX86::JitCompilerX86(bool hugePagesEnable) { + JitCompilerX86::JitCompilerX86(bool hugePagesEnable, bool optimizedInitDatasetEnable) { BranchesWithin32B = xmrig::Cpu::info()->jccErratum(); hasAVX = xmrig::Cpu::info()->hasAVX(); hasAVX2 = xmrig::Cpu::info()->hasAVX2(); - // Set to false by default + // Disable by default initDatasetAVX2 = false; - xmrig::ICpuInfo::Vendor vendor = xmrig::Cpu::info()->vendor(); - xmrig::ICpuInfo::Arch arch = xmrig::Cpu::info()->arch(); - - if (vendor == xmrig::ICpuInfo::VENDOR_INTEL) { - // AVX2 init is faster on Intel CPUs without HT - initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads(); - } - else if (vendor == xmrig::ICpuInfo::VENDOR_AMD) { - switch (arch) { - case xmrig::ICpuInfo::ARCH_ZEN: - case xmrig::ICpuInfo::ARCH_ZEN_PLUS: - // AVX2 init is slow on Zen/Zen+ - initDatasetAVX2 = false; - break; - case xmrig::ICpuInfo::ARCH_ZEN2: - // AVX2 init is faster on Zen2 without SMT (mobile CPUs) - initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads(); - break; - case xmrig::ICpuInfo::ARCH_ZEN3: - // AVX2 init is faster on Zen3 + if (optimizedInitDatasetEnable) { + // Dataset init using AVX2: + // -1 = Auto detect + // 0 = Always disabled + // +1 = Always enabled + if (optimizedDatasetInit > 0) { initDatasetAVX2 = true; - break; + } + else if (optimizedDatasetInit < 0) { + xmrig::ICpuInfo::Vendor vendor = xmrig::Cpu::info()->vendor(); + xmrig::ICpuInfo::Arch arch = xmrig::Cpu::info()->arch(); + + if (vendor == xmrig::ICpuInfo::VENDOR_INTEL) { + // AVX2 init is faster on Intel CPUs without HT + initDatasetAVX2 = (xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads()); + } + else if (vendor == xmrig::ICpuInfo::VENDOR_AMD) { + switch (arch) { + case xmrig::ICpuInfo::ARCH_ZEN: + case xmrig::ICpuInfo::ARCH_ZEN_PLUS: + // AVX2 init is slower on Zen/Zen+ + initDatasetAVX2 = false; + break; + case xmrig::ICpuInfo::ARCH_ZEN2: + // AVX2 init is faster on Zen2 without SMT (mobile CPUs) + initDatasetAVX2 = (xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads()); + break; + case xmrig::ICpuInfo::ARCH_ZEN3: + // AVX2 init is faster on Zen3 + initDatasetAVX2 = true; + break; + } + } } } - // Sorry low-end Intel CPUs + // Sorry, low-end Intel CPUs if (!hasAVX2) { initDatasetAVX2 = false; } diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index f0f71895..5c43264c 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -49,7 +49,7 @@ namespace randomx { class JitCompilerX86 { public: - explicit JitCompilerX86(bool hugePagesEnable); + explicit JitCompilerX86(bool hugePagesEnable, bool optimizedInitDatasetEnable); ~JitCompilerX86(); void prepare(); void generateProgram(Program&, ProgramConfiguration&, uint32_t); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index 14aa7067..9986a33f 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -381,7 +381,7 @@ extern "C" { break; case RANDOMX_FLAG_JIT: - cache->jit = new randomx::JitCompiler(false); + cache->jit = new randomx::JitCompiler(false, true); cache->initialize = &randomx::initCacheCompile; cache->datasetInit = nullptr; cache->memory = memory; diff --git a/src/crypto/randomx/randomx.h b/src/crypto/randomx/randomx.h index 9a1fb3c7..f81df9db 100644 --- a/src/crypto/randomx/randomx.h +++ b/src/crypto/randomx/randomx.h @@ -170,6 +170,7 @@ void randomx_apply_config(const T& config) void randomx_set_scratchpad_prefetch_mode(int mode); void randomx_set_huge_pages_jit(bool hugePages); +void randomx_set_optimized_dataset_init(int value); #if defined(__cplusplus) extern "C" { diff --git a/src/crypto/randomx/vm_compiled.hpp b/src/crypto/randomx/vm_compiled.hpp index 2db99c75..0824d6bd 100644 --- a/src/crypto/randomx/vm_compiled.hpp +++ b/src/crypto/randomx/vm_compiled.hpp @@ -59,7 +59,7 @@ namespace randomx { protected: void execute(); - JitCompiler compiler{ true }; + JitCompiler compiler{ true, false }; }; using CompiledVmDefault = CompiledVm<1>; diff --git a/src/crypto/rx/Rx.cpp b/src/crypto/rx/Rx.cpp index ea671d01..40d3c612 100644 --- a/src/crypto/rx/Rx.cpp +++ b/src/crypto/rx/Rx.cpp @@ -96,6 +96,7 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode()); randomx_set_huge_pages_jit(cpu.isHugePagesJit()); + randomx_set_optimized_dataset_init(config.initDatasetAVX2()); if (!msrInitialized) { msrEnabled = msrInit(config, cpu.threads().get(seed.algorithm()).data()); diff --git a/src/crypto/rx/RxConfig.cpp b/src/crypto/rx/RxConfig.cpp index d9f05d4e..ae6215df 100644 --- a/src/crypto/rx/RxConfig.cpp +++ b/src/crypto/rx/RxConfig.cpp @@ -47,6 +47,7 @@ namespace xmrig { const char *RxConfig::kInit = "init"; +const char *RxConfig::kInitAVX2 = "init-avx2"; const char *RxConfig::kField = "randomx"; const char *RxConfig::kMode = "mode"; const char *RxConfig::kOneGbPages = "1gb-pages"; @@ -86,9 +87,10 @@ static_assert (kMsrArraySize == ICpuInfo::MSR_MOD_MAX, "kMsrArraySize and MSR_MO bool xmrig::RxConfig::read(const rapidjson::Value &value) { if (value.IsObject()) { - m_threads = Json::getInt(value, kInit, m_threads); - m_mode = readMode(Json::getValue(value, kMode)); - m_rdmsr = Json::getBool(value, kRdmsr, m_rdmsr); + m_threads = Json::getInt(value, kInit, m_threads); + m_initDatasetAVX2 = Json::getInt(value, kInitAVX2, m_initDatasetAVX2); + m_mode = readMode(Json::getValue(value, kMode)); + m_rdmsr = Json::getBool(value, kRdmsr, m_rdmsr); # ifdef XMRIG_FEATURE_MSR readMSR(Json::getValue(value, kWrmsr)); @@ -141,6 +143,7 @@ rapidjson::Value xmrig::RxConfig::toJSON(rapidjson::Document &doc) const Value obj(kObjectType); obj.AddMember(StringRef(kInit), m_threads, allocator); + obj.AddMember(StringRef(kInitAVX2), m_initDatasetAVX2, allocator); obj.AddMember(StringRef(kMode), StringRef(modeName()), allocator); obj.AddMember(StringRef(kOneGbPages), m_oneGbPages, allocator); obj.AddMember(StringRef(kRdmsr), m_rdmsr, allocator); diff --git a/src/crypto/rx/RxConfig.h b/src/crypto/rx/RxConfig.h index 1e79d468..fb3a656d 100644 --- a/src/crypto/rx/RxConfig.h +++ b/src/crypto/rx/RxConfig.h @@ -61,6 +61,7 @@ public: static const char *kCacheQoS; static const char *kField; static const char *kInit; + static const char *kInitAVX2; static const char *kMode; static const char *kOneGbPages; static const char *kRdmsr; @@ -83,6 +84,7 @@ public: const char *modeName() const; uint32_t threads(uint32_t limit = 100) const; + inline int initDatasetAVX2() const { return m_initDatasetAVX2; } inline bool isOneGbPages() const { return m_oneGbPages; } inline bool rdmsr() const { return m_rdmsr; } inline bool wrmsr() const { return m_wrmsr; } @@ -111,11 +113,12 @@ private: Mode readMode(const rapidjson::Value &value) const; - bool m_numa = true; - bool m_oneGbPages = false; - bool m_rdmsr = true; - int m_threads = -1; - Mode m_mode = AutoMode; + bool m_numa = true; + bool m_oneGbPages = false; + bool m_rdmsr = true; + int m_threads = -1; + int m_initDatasetAVX2 = -1; + Mode m_mode = AutoMode; ScratchpadPrefetchMode m_scratchpadPrefetchMode = ScratchpadPrefetchT0;