From d3429682111d3871b6e83b14c8d30149683df58c Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 21 Jan 2020 19:44:56 +0100 Subject: [PATCH] Added support for BMI2 instructions --- src/backend/cpu/interfaces/ICpuInfo.h | 1 + src/backend/cpu/platform/AdvancedCpuInfo.cpp | 1 + src/backend/cpu/platform/AdvancedCpuInfo.h | 2 + src/backend/cpu/platform/BasicCpuInfo.cpp | 11 +++++ src/backend/cpu/platform/BasicCpuInfo.h | 2 + src/crypto/randomx/jit_compiler_x86.cpp | 46 ++++++++++++++++++-- src/crypto/randomx/jit_compiler_x86.hpp | 2 + src/crypto/randomx/randomx.cpp | 21 ++++++++- 8 files changed, 81 insertions(+), 5 deletions(-) diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h index a272d3cd..d8e40394 100644 --- a/src/backend/cpu/interfaces/ICpuInfo.h +++ b/src/backend/cpu/interfaces/ICpuInfo.h @@ -63,6 +63,7 @@ public: virtual Assembly::Id assembly() const = 0; virtual bool hasAES() const = 0; virtual bool hasAVX2() const = 0; + virtual bool hasBMI2() const = 0; virtual bool hasOneGbPages() const = 0; virtual const char *backend() const = 0; virtual const char *brand() const = 0; diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.cpp b/src/backend/cpu/platform/AdvancedCpuInfo.cpp index 85bf9207..f6d42ae4 100644 --- a/src/backend/cpu/platform/AdvancedCpuInfo.cpp +++ b/src/backend/cpu/platform/AdvancedCpuInfo.cpp @@ -156,6 +156,7 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : } m_avx2 = data.flags[CPU_FEATURE_AVX2] && data.flags[CPU_FEATURE_OSXSAVE]; + m_bmi2 = data.flags[CPU_FEATURE_BMI2]; } diff --git a/src/backend/cpu/platform/AdvancedCpuInfo.h b/src/backend/cpu/platform/AdvancedCpuInfo.h index beafa57c..ec1043ed 100644 --- a/src/backend/cpu/platform/AdvancedCpuInfo.h +++ b/src/backend/cpu/platform/AdvancedCpuInfo.h @@ -43,6 +43,7 @@ protected: inline Assembly::Id assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } + inline bool hasBMI2() const override { return m_bmi2; } inline bool hasOneGbPages() const override { return m_pdpe1gb; } inline const char *backend() const override { return m_backend; } inline const char *brand() const override { return m_brand; } @@ -59,6 +60,7 @@ private: Assembly m_assembly; bool m_aes = false; bool m_avx2 = false; + bool m_bmi2 = false; bool m_L2_exclusive = false; char m_backend[32]{}; char m_brand[64 + 5]{}; diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index 24f9c012..c5a25b19 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -45,6 +45,10 @@ # define bit_AVX2 (1 << 5) #endif +#ifndef bit_BMI2 +# define bit_BMI2 (1 << 8) +#endif + #ifndef bit_PDPE1GB # define bit_PDPE1GB (1 << 26) #endif @@ -141,6 +145,12 @@ static inline bool has_avx2() } +static inline bool has_bmi2() +{ + return has_feature(EXTENDED_FEATURES, EBX_Reg, bit_BMI2); +} + + static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, bit_PDPE1GB); @@ -154,6 +164,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : m_threads(std::thread::hardware_concurrency()), m_aes(has_aes_ni()), m_avx2(has_avx2()), + m_bmi2(has_bmi2()), m_pdpe1gb(has_pdpe1gb()) { cpu_brand_string(m_brand); diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h index b1139920..407e4177 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.h +++ b/src/backend/cpu/platform/BasicCpuInfo.h @@ -44,6 +44,7 @@ protected: inline Assembly::Id assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } + inline bool hasBMI2() const override { return m_bmi2; } inline bool hasOneGbPages() const override { return m_pdpe1gb; } inline const char *brand() const override { return m_brand; } inline MsrMod msrMod() const override { return m_msrMod; } @@ -63,6 +64,7 @@ private: Assembly m_assembly = Assembly::NONE; bool m_aes = false; const bool m_avx2 = false; + const bool m_bmi2 = false; const bool m_pdpe1gb = false; MsrMod m_msrMod = MSR_MOD_NONE; Vendor m_vendor = VENDOR_UNKNOWN; diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 135203fe..b3135526 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -721,14 +721,31 @@ namespace randomx { uint8_t* const p = code; int pos = codePos; + const uint32_t dst = instr.dst; + emit(REX_MOV_RR64, p, pos); - emitByte(0xc0 + instr.dst, p, pos); + emitByte(0xc0 + dst, p, pos); emit(REX_MUL_R, p, pos); emitByte(0xe0 + instr.src, p, pos); emit(REX_MOV_R64R, p, pos); - emitByte(0xc2 + 8 * instr.dst, p, pos); + emitByte(0xc2 + 8 * dst, p, pos); - registerUsage[instr.dst] = pos; + registerUsage[dst] = pos; + codePos = pos; + } + + void JitCompilerX86::h_IMULH_R_BMI2(const Instruction& instr) { + uint8_t* const p = code; + int pos = codePos; + + const uint32_t src = instr.src; + const uint32_t dst = instr.dst; + + *(uint32_t*)(p + pos) = 0xC4D08B49 + (dst << 16); + *(uint32_t*)(p + pos + 4) = 0xC0F6FB42 + (dst << 27) + (src << 24); + pos += 8; + + registerUsage[dst] = pos; codePos = pos; } @@ -756,6 +773,29 @@ namespace randomx { codePos = pos; } + void JitCompilerX86::h_IMULH_M_BMI2(const Instruction& instr) { + uint8_t* const p = code; + int pos = codePos; + + const uint64_t src = instr.src; + const uint64_t dst = instr.dst; + + if (src != dst) { + genAddressReg(instr, p, pos); + *(uint32_t*)(p + pos) = static_cast(0xC4D08B49 + (dst << 16)); + *(uint64_t*)(p + pos + 4) = 0x0E04F6FB62ULL + (dst << 27); + pos += 9; + } + else { + *(uint64_t*)(p + pos) = 0x86F6FB62C4D08B49ULL + (dst << 16) + (dst << 59); + *(uint32_t*)(p + pos + 8) = instr.getImm32() & ScratchpadL3Mask; + pos += 12; + } + + registerUsage[dst] = pos; + codePos = pos; + } + void JitCompilerX86::h_ISMULH_R(const Instruction& instr) { uint8_t* const p = code; int pos = codePos; diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index e330470a..a194f1af 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -123,7 +123,9 @@ namespace randomx { void h_IMUL_R(const Instruction&); void h_IMUL_M(const Instruction&); void h_IMULH_R(const Instruction&); + void h_IMULH_R_BMI2(const Instruction&); void h_IMULH_M(const Instruction&); + void h_IMULH_M_BMI2(const Instruction&); void h_ISMULH_R(const Instruction&); void h_ISMULH_M(const Instruction&); void h_IMUL_RCP(const Instruction&); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index ed9be8c9..88f7b190 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -41,6 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "crypto/randomx/jit_compiler_a64_static.hpp" #endif +#include "backend/cpu/Cpu.h" + #include RandomX_ConfigurationWownero::RandomX_ConfigurationWownero() @@ -235,14 +237,29 @@ void RandomX_ConfigurationBase::Apply() CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ for (; k < CEIL_##x; ++k) { JIT_HANDLE(x, prev); } +#define INST_HANDLE2(x, func_name, prev) \ + CEIL_##x = CEIL_##prev + RANDOMX_FREQ_##x; \ + for (; k < CEIL_##x; ++k) { JIT_HANDLE(func_name, prev); } + INST_HANDLE(IADD_RS, NULL); INST_HANDLE(IADD_M, IADD_RS); INST_HANDLE(ISUB_R, IADD_M); INST_HANDLE(ISUB_M, ISUB_R); INST_HANDLE(IMUL_R, ISUB_M); INST_HANDLE(IMUL_M, IMUL_R); - INST_HANDLE(IMULH_R, IMUL_M); - INST_HANDLE(IMULH_M, IMULH_R); + +#if defined(_M_X64) || defined(__x86_64__) + if (xmrig::Cpu::info()->hasBMI2()) { + INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M); + INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R); + } + else +#endif + { + INST_HANDLE(IMULH_R, IMUL_M); + INST_HANDLE(IMULH_M, IMULH_R); + } + INST_HANDLE(ISMULH_R, IMULH_M); INST_HANDLE(ISMULH_M, ISMULH_R); INST_HANDLE(IMUL_RCP, ISMULH_M);