From 83c7fad8821e04f0ca4c7765984c889973dd14da Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 19 Aug 2019 04:26:37 +0700 Subject: [PATCH 01/18] v3.1.1-dev --- src/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.h b/src/version.h index e36483df..3cb05a7d 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "3.1.0" +#define APP_VERSION "3.1.1-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" @@ -36,7 +36,7 @@ #define APP_VER_MAJOR 3 #define APP_VER_MINOR 1 -#define APP_VER_PATCH 0 +#define APP_VER_PATCH 1 #ifdef _MSC_VER # if (_MSC_VER >= 1920) From 39e69c27231401d22ce2b2a96ab05ba08bef50f5 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 23 Aug 2019 00:16:11 +0700 Subject: [PATCH 02/18] #1133 Fixed syslog regression. --- src/base/io/log/Log.cpp | 2 +- src/base/io/log/backends/SysLog.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/base/io/log/Log.cpp b/src/base/io/log/Log.cpp index 4e3bd5a5..8d56f7c9 100644 --- a/src/base/io/log/Log.cpp +++ b/src/base/io/log/Log.cpp @@ -111,7 +111,7 @@ public: if (!m_backends.empty()) { for (ILogBackend *backend : m_backends) { backend->print(level, m_buf, offset, size, true); - backend->print(level, txt.c_str(), offset, txt.size(), false); + backend->print(level, txt.c_str(), offset ? (offset - 11) : 0, txt.size(), false); } } else { diff --git a/src/base/io/log/backends/SysLog.h b/src/base/io/log/backends/SysLog.h index 0e8d0f8e..d131784b 100644 --- a/src/base/io/log/backends/SysLog.h +++ b/src/base/io/log/backends/SysLog.h @@ -37,7 +37,7 @@ class SysLog : public ILogBackend { public: SysLog(); - ~SysLog(); + ~SysLog() override; protected: void print(int level, const char *line, size_t offset, size_t size, bool colors) override; From 21a56c9cbfb9f6e4cfaeb20ea111e8d00303a068 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 27 Aug 2019 16:12:13 +0200 Subject: [PATCH 03/18] Updated RandomX --- src/crypto/randomx/argon2_core.c | 13 --- src/crypto/randomx/blake2_generator.cpp | 2 +- src/crypto/randomx/blake2_generator.hpp | 2 +- src/crypto/randomx/bytecode_machine.cpp | 2 +- src/crypto/randomx/common.hpp | 2 +- src/crypto/randomx/intrin_portable.h | 139 ++++++++++++++++++++++-- src/crypto/randomx/jit_compiler_x86.cpp | 4 +- src/crypto/randomx/randomx.cpp | 9 +- src/crypto/randomx/superscalar.cpp | 14 +-- 9 files changed, 151 insertions(+), 36 deletions(-) diff --git a/src/crypto/randomx/argon2_core.c b/src/crypto/randomx/argon2_core.c index f9babbc4..97176608 100644 --- a/src/crypto/randomx/argon2_core.c +++ b/src/crypto/randomx/argon2_core.c @@ -263,19 +263,6 @@ int rxa2_validate_inputs(const argon2_context *context) { return ARGON2_INCORRECT_PARAMETER; } - if (NULL == context->out) { - return ARGON2_OUTPUT_PTR_NULL; - } - - /* Validate output length */ - if (ARGON2_MIN_OUTLEN > context->outlen) { - return ARGON2_OUTPUT_TOO_SHORT; - } - - if (ARGON2_MAX_OUTLEN < context->outlen) { - return ARGON2_OUTPUT_TOO_LONG; - } - /* Validate password (required param) */ if (NULL == context->pwd) { if (0 != context->pwdlen) { diff --git a/src/crypto/randomx/blake2_generator.cpp b/src/crypto/randomx/blake2_generator.cpp index 83789129..edfe2e34 100644 --- a/src/crypto/randomx/blake2_generator.cpp +++ b/src/crypto/randomx/blake2_generator.cpp @@ -46,7 +46,7 @@ namespace randomx { return data[dataIndex++]; } - uint32_t Blake2Generator::getInt32() { + uint32_t Blake2Generator::getUInt32() { checkData(4); auto ret = load32(&data[dataIndex]); dataIndex += 4; diff --git a/src/crypto/randomx/blake2_generator.hpp b/src/crypto/randomx/blake2_generator.hpp index b5ac0801..5e7f61f2 100644 --- a/src/crypto/randomx/blake2_generator.hpp +++ b/src/crypto/randomx/blake2_generator.hpp @@ -36,7 +36,7 @@ namespace randomx { public: Blake2Generator(const void* seed, size_t seedSize, int nonce = 0); uint8_t getByte(); - uint32_t getInt32(); + uint32_t getUInt32(); private: void checkData(const size_t); diff --git a/src/crypto/randomx/bytecode_machine.cpp b/src/crypto/randomx/bytecode_machine.cpp index 6c51b86c..55a63935 100644 --- a/src/crypto/randomx/bytecode_machine.cpp +++ b/src/crypto/randomx/bytecode_machine.cpp @@ -244,7 +244,7 @@ namespace randomx { if (opcode < RandomX_CurrentConfig.CEIL_IMUL_RCP) { uint64_t divisor = instr.getImm32(); - if (!isPowerOf2(divisor)) { + if (!isZeroOrPowerOf2(divisor)) { auto dst = instr.dst % RegistersCount; ibc.type = InstructionType::IMUL_R; ibc.idst = &nreg->r[dst]; diff --git a/src/crypto/randomx/common.hpp b/src/crypto/randomx/common.hpp index 31b18ce4..da36f2c5 100644 --- a/src/crypto/randomx/common.hpp +++ b/src/crypto/randomx/common.hpp @@ -137,7 +137,7 @@ namespace randomx { constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register constexpr int RegisterNeedsSib = 4; //x86 r12 register - inline bool isPowerOf2(uint64_t x) { + inline bool isZeroOrPowerOf2(uint64_t x) { return (x & (x - 1)) == 0; } diff --git a/src/crypto/randomx/intrin_portable.h b/src/crypto/randomx/intrin_portable.h index e4916096..338d6d89 100644 --- a/src/crypto/randomx/intrin_portable.h +++ b/src/crypto/randomx/intrin_portable.h @@ -376,11 +376,131 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { #define RANDOMX_DEFAULT_FENV -void rx_reset_float_state(); +#elif defined(__aarch64__) -void rx_set_rounding_mode(uint32_t mode); +#include +#include +#include -#else //end altivec +typedef uint8x16_t rx_vec_i128; +typedef float64x2_t rx_vec_f128; + +#define rx_aligned_alloc(size, align) aligned_alloc(align, size) +#define rx_aligned_free(a) free(a) + +inline void rx_prefetch_nta(void* ptr) { + asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr)); +} + +FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { + return vld1q_f64((const float64_t*)pd); +} + +FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) { + vst1q_f64((float64_t*)mem_addr, val); +} + +FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { + float64x2_t temp; + temp = vcopyq_laneq_f64(temp, 1, a, 1); + a = vcopyq_laneq_f64(a, 1, a, 0); + return vcopyq_laneq_f64(a, 0, temp, 1); +} + +FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) { + uint64x2_t temp0 = vdupq_n_u64(x0); + uint64x2_t temp1 = vdupq_n_u64(x1); + return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0)); +} + +FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { + return vreinterpretq_f64_u64(vdupq_n_u64(x)); +} + +#define rx_add_vec_f128 vaddq_f64 +#define rx_sub_vec_f128 vsubq_f64 +#define rx_mul_vec_f128 vmulq_f64 +#define rx_div_vec_f128 vdivq_f64 +#define rx_sqrt_vec_f128 vsqrtq_f64 + +FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); +} + +FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); +} + +FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); +} + +#ifdef __ARM_FEATURE_CRYPTO + + +FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) { + const uint8x16_t zero = { 0 }; + return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key; +} + +FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) { + const uint8x16_t zero = { 0 }; + return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key; +} + +#define HAVE_AES + +#endif + +#define rx_xor_vec_i128 veorq_u8 + +FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { + return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0); +} + +FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) { + return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1); +} + +FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) { + return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2); +} + +FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) { + return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3); +} + +FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) { + int32_t data[4]; + data[0] = _I0; + data[1] = _I1; + data[2] = _I2; + data[3] = _I3; + return vreinterpretq_u8_s32(vld1q_s32(data)); +}; + +#define rx_xor_vec_i128 veorq_u8 + +FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) { + return vld1q_u8((const uint8_t*)mem_addr); +} + +FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) { + vst1q_u8((uint8_t*)mem_addr, val); +} + +FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { + double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); + double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); + rx_vec_f128 x; + x = vsetq_lane_f64(lo, x, 0); + x = vsetq_lane_f64(hi, x, 1); + return x; +} + +#define RANDOMX_DEFAULT_FENV + +#else //portable fallback #include #include @@ -487,7 +607,6 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { return v; } - FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { rx_vec_f128 x; x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0]; @@ -578,10 +697,6 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { #define RANDOMX_DEFAULT_FENV -void rx_reset_float_state(); - -void rx_set_rounding_mode(uint32_t mode); - #endif #ifndef HAVE_AES @@ -598,6 +713,14 @@ FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { } #endif +#ifdef RANDOMX_DEFAULT_FENV + +void rx_reset_float_state(); + +void rx_set_rounding_mode(uint32_t mode); + +#endif + double loadDoublePortable(const void* addr); uint64_t mulh(uint64_t, uint64_t); int64_t smulh(int64_t, int64_t); diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index dd579c69..043874a8 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -197,7 +197,7 @@ namespace randomx { // static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 }; size_t JitCompilerX86::getCodeSize() { - return codePos - prologueSize; + return codePos < prologueSize ? 0 : codePos - prologueSize; } JitCompilerX86::JitCompilerX86() { @@ -580,7 +580,7 @@ namespace randomx { void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { uint64_t divisor = instr.getImm32(); - if (!isPowerOf2(divisor)) { + if (!isZeroOrPowerOf2(divisor)) { registerUsage[instr.dst] = i; emit(MOV_RAX_I); emit64(randomx_reciprocal_fast(divisor)); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index ffd9e2c5..a5f6bc08 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -44,12 +44,14 @@ RandomX_ConfigurationWownero::RandomX_ConfigurationWownero() ScratchpadL2_Size = 131072; ScratchpadL3_Size = 1048576; + RANDOMX_FREQ_IADD_RS = 25; RANDOMX_FREQ_IROR_R = 10; RANDOMX_FREQ_IROL_R = 0; RANDOMX_FREQ_FSWAP_R = 8; RANDOMX_FREQ_FADD_R = 20; RANDOMX_FREQ_FSUB_R = 20; RANDOMX_FREQ_FMUL_R = 20; + RANDOMX_FREQ_CBRANCH = 16; fillAes4Rx4_Key[0] = rx_set_int_vec_i128(0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d); fillAes4Rx4_Key[1] = rx_set_int_vec_i128(0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450); @@ -68,6 +70,9 @@ RandomX_ConfigurationLoki::RandomX_ConfigurationLoki() ArgonSalt = "RandomXL\x12"; ProgramSize = 320; ProgramCount = 7; + + RANDOMX_FREQ_IADD_RS = 25; + RANDOMX_FREQ_CBRANCH = 16; } RandomX_ConfigurationBase::RandomX_ConfigurationBase() @@ -87,7 +92,7 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() , ProgramCount(8) , JumpBits(8) , JumpOffset(8) - , RANDOMX_FREQ_IADD_RS(25) + , RANDOMX_FREQ_IADD_RS(16) , RANDOMX_FREQ_IADD_M(7) , RANDOMX_FREQ_ISUB_R(16) , RANDOMX_FREQ_ISUB_M(7) @@ -113,7 +118,7 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() , RANDOMX_FREQ_FMUL_R(32) , RANDOMX_FREQ_FDIV_M(4) , RANDOMX_FREQ_FSQRT_R(6) - , RANDOMX_FREQ_CBRANCH(16) + , RANDOMX_FREQ_CBRANCH(25) , RANDOMX_FREQ_CFROUND(1) , RANDOMX_FREQ_ISTORE(16) , RANDOMX_FREQ_NOP(0) diff --git a/src/crypto/randomx/superscalar.cpp b/src/crypto/randomx/superscalar.cpp index 4ed993f4..aaa91f62 100644 --- a/src/crypto/randomx/superscalar.cpp +++ b/src/crypto/randomx/superscalar.cpp @@ -329,7 +329,7 @@ namespace randomx { return false; if (availableRegisters.size() > 1) { - index = gen.getInt32() % availableRegisters.size(); + index = gen.getUInt32() % availableRegisters.size(); } else { index = 0; @@ -442,7 +442,7 @@ namespace randomx { case SuperscalarInstructionType::IADD_C8: case SuperscalarInstructionType::IADD_C9: { mod_ = 0; - imm32_ = gen.getInt32(); + imm32_ = gen.getUInt32(); opGroup_ = SuperscalarInstructionType::IADD_C7; opGroupPar_ = -1; } break; @@ -451,7 +451,7 @@ namespace randomx { case SuperscalarInstructionType::IXOR_C8: case SuperscalarInstructionType::IXOR_C9: { mod_ = 0; - imm32_ = gen.getInt32(); + imm32_ = gen.getUInt32(); opGroup_ = SuperscalarInstructionType::IXOR_C7; opGroupPar_ = -1; } break; @@ -461,7 +461,7 @@ namespace randomx { mod_ = 0; imm32_ = 0; opGroup_ = SuperscalarInstructionType::IMULH_R; - opGroupPar_ = gen.getInt32(); + opGroupPar_ = gen.getUInt32(); } break; case SuperscalarInstructionType::ISMULH_R: { @@ -469,14 +469,14 @@ namespace randomx { mod_ = 0; imm32_ = 0; opGroup_ = SuperscalarInstructionType::ISMULH_R; - opGroupPar_ = gen.getInt32(); + opGroupPar_ = gen.getUInt32(); } break; case SuperscalarInstructionType::IMUL_RCP: { mod_ = 0; do { - imm32_ = gen.getInt32(); - } while ((imm32_ & (imm32_ - 1)) == 0); + imm32_ = gen.getUInt32(); + } while (isZeroOrPowerOf2(imm32_)); opGroup_ = SuperscalarInstructionType::IMUL_RCP; opGroupPar_ = -1; } break; From 8b84d7650b9997339e0e046b21e56028968d502c Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 27 Aug 2019 20:18:56 +0200 Subject: [PATCH 04/18] Optimized RandomX JIT compiler Hashrate improved by 0.5-1.5% depending on RandomX version and CPU. --- src/crypto/randomx/instruction.hpp | 10 +- src/crypto/randomx/jit_compiler_x86.cpp | 676 +++++++++++++++--------- src/crypto/randomx/jit_compiler_x86.hpp | 25 +- 3 files changed, 433 insertions(+), 278 deletions(-) diff --git a/src/crypto/randomx/instruction.hpp b/src/crypto/randomx/instruction.hpp index a3ad6b7a..446ebfa8 100644 --- a/src/crypto/randomx/instruction.hpp +++ b/src/crypto/randomx/instruction.hpp @@ -77,13 +77,13 @@ namespace randomx { void setImm32(uint32_t val) { return store32(&imm32, val); } - int getModMem() const { - return mod % 4; //bits 0-1 + uint32_t getModMem() const { + return mod & 3; //bits 0-1 } - int getModShift() const { - return (mod >> 2) % 4; //bits 2-3 + uint32_t getModShift() const { + return (mod >> 2) & 3; //bits 2-3 } - int getModCond() const { + uint32_t getModCond() const { return mod >> 4; //bits 4-7 } void setMod(uint8_t val) { diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 043874a8..d2ac8370 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -181,7 +181,7 @@ namespace randomx { static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; static const uint8_t JZ[] = { 0x0f, 0x84 }; static const uint8_t RET = 0xc3; - static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d }; + static const uint8_t LEA_32[] = { 0x41, 0x8d }; static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 }; @@ -219,12 +219,12 @@ namespace randomx { void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) { generateProgramPrologue(prog, pcfg); - emit(RandomX_CurrentConfig.codeReadDatasetLightSshInitTweaked, readDatasetLightInitSize); - emit(ADD_EBX_I); - emit32(datasetOffset / CacheLineSize); - emitByte(CALL); - emit32(superScalarHashOffset - (codePos + 4)); - emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); + emit(RandomX_CurrentConfig.codeReadDatasetLightSshInitTweaked, readDatasetLightInitSize, code, codePos); + emit(ADD_EBX_I, code, codePos); + emit32(datasetOffset / CacheLineSize, code, codePos); + emitByte(CALL, code, codePos); + emit32(superScalarHashOffset - (codePos + 4), code, codePos); + emit(codeReadDatasetLightSshFin, readDatasetLightFinSize, code, codePos); generateProgramEpilogue(prog); } @@ -238,23 +238,23 @@ namespace randomx { Instruction& instr = prog(i); generateSuperscalarCode(instr, reciprocalCache); } - emit(codeShhLoad, codeSshLoadSize); + emit(codeShhLoad, codeSshLoadSize, code, codePos); if (j < RandomX_CurrentConfig.CacheAccesses - 1) { - emit(REX_MOV_RR64); - emitByte(0xd8 + prog.getAddressRegister()); - emit(RandomX_CurrentConfig.codeShhPrefetchTweaked, codeSshPrefetchSize); + emit(REX_MOV_RR64, code, codePos); + emitByte(0xd8 + prog.getAddressRegister(), code, codePos); + emit(RandomX_CurrentConfig.codeShhPrefetchTweaked, codeSshPrefetchSize, code, codePos); #ifdef RANDOMX_ALIGN int align = (codePos % 16); while (align != 0) { int nopSize = 16 - align; if (nopSize > 8) nopSize = 8; - emit(NOPX[nopSize - 1], nopSize); + emit(NOPX[nopSize - 1], nopSize, code, codePos); align = (codePos % 16); } #endif } } - emitByte(RET); + emitByte(RET, code, codePos); } template @@ -265,508 +265,664 @@ namespace randomx { } void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { - instructionOffsets.clear(); - for (unsigned i = 0; i < 8; ++i) { - registerUsage[i] = -1; - } + memset(registerUsage, -1, sizeof(registerUsage)); codePos = prologueSize; memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); - emit(REX_XOR_RAX_R64); - emitByte(0xc0 + pcfg.readReg0); - emit(REX_XOR_RAX_R64); - emitByte(0xc0 + pcfg.readReg1); + emit(REX_XOR_RAX_R64, code, codePos); + emitByte(0xc0 + pcfg.readReg0, code, codePos); + emit(REX_XOR_RAX_R64, code, codePos); + emitByte(0xc0 + pcfg.readReg1, code, codePos); memcpy(code + codePos, RandomX_CurrentConfig.codeLoopLoadTweaked, loopLoadSize); codePos += loopLoadSize; for (unsigned i = 0; i < prog.getSize(); ++i) { Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; - generateCode(instr, i); + instructionOffsets[i] = codePos; + (this->*(engine[instr.opcode]))(instr, i); } - emit(REX_MOV_RR); - emitByte(0xc0 + pcfg.readReg2); - emit(REX_XOR_EAX); - emitByte(0xc0 + pcfg.readReg3); + emit(REX_MOV_RR, code, codePos); + emitByte(0xc0 + pcfg.readReg2, code, codePos); + emit(REX_XOR_EAX, code, codePos); + emitByte(0xc0 + pcfg.readReg3, code, codePos); } void JitCompilerX86::generateProgramEpilogue(Program& prog) { memcpy(code + codePos, codeLoopStore, loopStoreSize); codePos += loopStoreSize; - emit(SUB_EBX); - emit(JNZ); - emit32(prologueSize - codePos - 4); - emitByte(JMP); - emit32(epilogueOffset - codePos - 4); - } - - void JitCompilerX86::generateCode(Instruction& instr, int i) { - instructionOffsets.push_back(codePos); - auto generator = engine[instr.opcode]; - (this->*generator)(instr, i); + emit(SUB_EBX, code, codePos); + emit(JNZ, code, codePos); + emit32(prologueSize - codePos - 4, code, codePos); + emitByte(JMP, code, codePos); + emit32(epilogueOffset - codePos - 4, code, codePos); } void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector &reciprocalCache) { switch ((SuperscalarInstructionType)instr.opcode) { case randomx::SuperscalarInstructionType::ISUB_R: - emit(REX_SUB_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); + emit(REX_SUB_RR, code, codePos); + emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos); break; case randomx::SuperscalarInstructionType::IXOR_R: - emit(REX_XOR_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); + emit(REX_XOR_RR, code, codePos); + emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos); break; case randomx::SuperscalarInstructionType::IADD_RS: - emit(REX_LEA); - emitByte(0x04 + 8 * instr.dst); - genSIB(instr.getModShift(), instr.src, instr.dst); + emit(REX_LEA, code, codePos); + emitByte(0x04 + 8 * instr.dst, code, codePos); + genSIB(instr.getModShift(), instr.src, instr.dst, code, codePos); break; case randomx::SuperscalarInstructionType::IMUL_R: - emit(REX_IMUL_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); + emit(REX_IMUL_RR, code, codePos); + emitByte(0xc0 + 8 * instr.dst + instr.src, code, codePos); break; case randomx::SuperscalarInstructionType::IROR_C: - emit(REX_ROT_I8); - emitByte(0xc8 + instr.dst); - emitByte(instr.getImm32() & 63); + emit(REX_ROT_I8, code, codePos); + emitByte(0xc8 + instr.dst, code, codePos); + emitByte(instr.getImm32() & 63, code, codePos); break; case randomx::SuperscalarInstructionType::IADD_C7: - emit(REX_81); - emitByte(0xc0 + instr.dst); - emit32(instr.getImm32()); + emit(REX_81, code, codePos); + emitByte(0xc0 + instr.dst, code, codePos); + emit32(instr.getImm32(), code, codePos); break; case randomx::SuperscalarInstructionType::IXOR_C7: - emit(REX_XOR_RI); - emitByte(0xf0 + instr.dst); - emit32(instr.getImm32()); + emit(REX_XOR_RI, code, codePos); + emitByte(0xf0 + instr.dst, code, codePos); + emit32(instr.getImm32(), code, codePos); break; case randomx::SuperscalarInstructionType::IADD_C8: - emit(REX_81); - emitByte(0xc0 + instr.dst); - emit32(instr.getImm32()); + emit(REX_81, code, codePos); + emitByte(0xc0 + instr.dst, code, codePos); + emit32(instr.getImm32(), code, codePos); #ifdef RANDOMX_ALIGN - emit(NOP1); + emit(NOP1, code, codePos); #endif break; case randomx::SuperscalarInstructionType::IXOR_C8: - emit(REX_XOR_RI); - emitByte(0xf0 + instr.dst); - emit32(instr.getImm32()); + emit(REX_XOR_RI, code, codePos); + emitByte(0xf0 + instr.dst, code, codePos); + emit32(instr.getImm32(), code, codePos); #ifdef RANDOMX_ALIGN - emit(NOP1); + emit(NOP1, code, codePos); #endif break; case randomx::SuperscalarInstructionType::IADD_C9: - emit(REX_81); - emitByte(0xc0 + instr.dst); - emit32(instr.getImm32()); + emit(REX_81, code, codePos); + emitByte(0xc0 + instr.dst, code, codePos); + emit32(instr.getImm32(), code, codePos); #ifdef RANDOMX_ALIGN - emit(NOP2); + emit(NOP2, code, codePos); #endif break; case randomx::SuperscalarInstructionType::IXOR_C9: - emit(REX_XOR_RI); - emitByte(0xf0 + instr.dst); - emit32(instr.getImm32()); + emit(REX_XOR_RI, code, codePos); + emitByte(0xf0 + instr.dst, code, codePos); + emit32(instr.getImm32(), code, codePos); #ifdef RANDOMX_ALIGN - emit(NOP2); + emit(NOP2, code, codePos); #endif break; case randomx::SuperscalarInstructionType::IMULH_R: - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_R); - emitByte(0xe0 + instr.src); - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); + emit(REX_MOV_RR64, code, codePos); + emitByte(0xc0 + instr.dst, code, codePos); + emit(REX_MUL_R, code, codePos); + emitByte(0xe0 + instr.src, code, codePos); + emit(REX_MOV_R64R, code, codePos); + emitByte(0xc2 + 8 * instr.dst, code, codePos); break; case randomx::SuperscalarInstructionType::ISMULH_R: - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_R); - emitByte(0xe8 + instr.src); - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); + emit(REX_MOV_RR64, code, codePos); + emitByte(0xc0 + instr.dst, code, codePos); + emit(REX_MUL_R, code, codePos); + emitByte(0xe8 + instr.src, code, codePos); + emit(REX_MOV_R64R, code, codePos); + emitByte(0xc2 + 8 * instr.dst, code, codePos); break; case randomx::SuperscalarInstructionType::IMUL_RCP: - emit(MOV_RAX_I); - emit64(reciprocalCache[instr.getImm32()]); - emit(REX_IMUL_RM); - emitByte(0xc0 + 8 * instr.dst); + emit(MOV_RAX_I, code, codePos); + emit64(reciprocalCache[instr.getImm32()], code, codePos); + emit(REX_IMUL_RM, code, codePos); + emitByte(0xc0 + 8 * instr.dst, code, codePos); break; default: UNREACHABLE; } } - void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { - emit(LEA_32); - emitByte(0x80 + instr.src + (rax ? 0 : 8)); + void JitCompilerX86::genAddressReg(Instruction& instr, uint8_t* code, int& codePos, bool rax) { + emit(LEA_32, code, codePos); + emitByte(0x80 + instr.src + (rax ? 0 : 8), code, codePos); if (instr.src == RegisterNeedsSib) { - emitByte(0x24); + emitByte(0x24, code, codePos); } - emit32(instr.getImm32()); + emit32(instr.getImm32(), code, codePos); if (rax) - emitByte(AND_EAX_I); + emitByte(AND_EAX_I, code, codePos); else - emit(AND_ECX_I); - emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + emit(AND_ECX_I, code, codePos); + emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos); } - void JitCompilerX86::genAddressRegDst(Instruction& instr) { - emit(LEA_32); - emitByte(0x80 + instr.dst); + void JitCompilerX86::genAddressRegDst(Instruction& instr, uint8_t* code, int& codePos) { + emit(LEA_32, code, codePos); + emitByte(0x80 + instr.dst, code, codePos); if (instr.dst == RegisterNeedsSib) { - emitByte(0x24); + emitByte(0x24, code, codePos); } - emit32(instr.getImm32()); - emitByte(AND_EAX_I); + emit32(instr.getImm32(), code, codePos); + emitByte(AND_EAX_I, code, codePos); if (instr.getModCond() < StoreL3Condition) { - emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos); } else { - emit32(ScratchpadL3Mask); + emit32(ScratchpadL3Mask, code, codePos); } } - void JitCompilerX86::genAddressImm(Instruction& instr) { - emit32(instr.getImm32() & ScratchpadL3Mask); + void JitCompilerX86::genAddressImm(Instruction& instr, uint8_t* code, int& codePos) { + emit32(instr.getImm32() & ScratchpadL3Mask, code, codePos); } + static const uint32_t template_IADD_RS[8] = { + 0x048d4f, + 0x0c8d4f, + 0x148d4f, + 0x1c8d4f, + 0x248d4f, + 0xac8d4f, + 0x348d4f, + 0x3c8d4f, + }; + void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) { + int pos = codePos; + uint8_t* const p = code + pos; + registerUsage[instr.dst] = i; - emit(REX_LEA); - if (instr.dst == RegisterNeedsDisplacement) - emitByte(0xac); - else - emitByte(0x04 + 8 * instr.dst); - genSIB(instr.getModShift(), instr.src, instr.dst); - if (instr.dst == RegisterNeedsDisplacement) - emit32(instr.getImm32()); + + const uint32_t sib = (instr.getModShift() << 6) | (instr.src << 3) | instr.dst; + *(uint32_t*)(p) = template_IADD_RS[instr.dst] | (sib << 24); + *(uint32_t*)(p + 4) = instr.getImm32(); + + codePos = pos + ((instr.dst == RegisterNeedsDisplacement) ? 8 : 4); } + static const uint32_t template_IADD_M[8] = { + 0x0604034c, + 0x060c034c, + 0x0614034c, + 0x061c034c, + 0x0624034c, + 0x062c034c, + 0x0634034c, + 0x063c034c, + }; + void JitCompilerX86::h_IADD_M(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - genAddressReg(instr); - emit(REX_ADD_RM); - emitByte(0x04 + 8 * instr.dst); - emitByte(0x06); + genAddressReg(instr, p, pos); + emit32(template_IADD_M[instr.dst], p, pos); } else { - emit(REX_ADD_RM); - emitByte(0x86 + 8 * instr.dst); - genAddressImm(instr); + emit(REX_ADD_RM, p, pos); + emitByte(0x86 + 8 * instr.dst, p, pos); + genAddressImm(instr, p, pos); } + + codePos = pos; } - void JitCompilerX86::genSIB(int scale, int index, int base) { - emitByte((scale << 6) | (index << 3) | base); + void JitCompilerX86::genSIB(int scale, int index, int base, uint8_t* code, int& codePos) { + emitByte((scale << 6) | (index << 3) | base, code, codePos); } void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - emit(REX_SUB_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); + emit(REX_SUB_RR, p, pos); + emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos); } else { - emit(REX_81); - emitByte(0xe8 + instr.dst); - emit32(instr.getImm32()); + emit(REX_81, p, pos); + emitByte(0xe8 + instr.dst, p, pos); + emit32(instr.getImm32(), p, pos); } + + codePos = pos; } void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - genAddressReg(instr); - emit(REX_SUB_RM); - emitByte(0x04 + 8 * instr.dst); - emitByte(0x06); + genAddressReg(instr, p, pos); + emit(REX_SUB_RM, p, pos); + emitByte(0x04 + 8 * instr.dst, p, pos); + emitByte(0x06, p, pos); } else { - emit(REX_SUB_RM); - emitByte(0x86 + 8 * instr.dst); - genAddressImm(instr); + emit(REX_SUB_RM, p, pos); + emitByte(0x86 + 8 * instr.dst, p, pos); + genAddressImm(instr, p, pos); } + + codePos = pos; } void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - emit(REX_IMUL_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); + emit(REX_IMUL_RR, p, pos); + emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos); } else { - emit(REX_IMUL_RRI); - emitByte(0xc0 + 9 * instr.dst); - emit32(instr.getImm32()); + emit(REX_IMUL_RRI, p, pos); + emitByte(0xc0 + 9 * instr.dst, p, pos); + emit32(instr.getImm32(), p, pos); } + + codePos = pos; } void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - genAddressReg(instr); - emit(REX_IMUL_RM); - emitByte(0x04 + 8 * instr.dst); - emitByte(0x06); + genAddressReg(instr, p, pos); + emit(REX_IMUL_RM, p, pos); + emitByte(0x04 + 8 * instr.dst, p, pos); + emitByte(0x06, p, pos); } else { - emit(REX_IMUL_RM); - emitByte(0x86 + 8 * instr.dst); - genAddressImm(instr); + emit(REX_IMUL_RM, p, pos); + emitByte(0x86 + 8 * instr.dst, p, pos); + genAddressImm(instr, p, pos); } + + codePos = pos; } void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_R); - emitByte(0xe0 + instr.src); - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); + emit(REX_MOV_RR64, p, pos); + emitByte(0xc0 + instr.dst, p, pos); + emit(REX_MUL_R, p, pos); + emitByte(0xe0 + instr.src, p, pos); + emit(REX_MOV_R64R, p, pos); + emitByte(0xc2 + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - genAddressReg(instr, false); - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_MEM); + genAddressReg(instr, p, pos, false); + emit(REX_MOV_RR64, p, pos); + emitByte(0xc0 + instr.dst, p, pos); + emit(REX_MUL_MEM, p, pos); } else { - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_M); - emitByte(0xa6); - genAddressImm(instr); + emit(REX_MOV_RR64, p, pos); + emitByte(0xc0 + instr.dst, p, pos); + emit(REX_MUL_M, p, pos); + emitByte(0xa6, p, pos); + genAddressImm(instr, p, pos); } - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); + emit(REX_MOV_R64R, p, pos); + emitByte(0xc2 + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_R); - emitByte(0xe8 + instr.src); - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); + emit(REX_MOV_RR64, p, pos); + emitByte(0xc0 + instr.dst, p, pos); + emit(REX_MUL_R, p, pos); + emitByte(0xe8 + instr.src, p, pos); + emit(REX_MOV_R64R, p, pos); + emitByte(0xc2 + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - genAddressReg(instr, false); - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_IMUL_MEM); + genAddressReg(instr, p, pos, false); + emit(REX_MOV_RR64, p, pos); + emitByte(0xc0 + instr.dst, p, pos); + emit(REX_IMUL_MEM, p, pos); } else { - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.dst); - emit(REX_MUL_M); - emitByte(0xae); - genAddressImm(instr); + emit(REX_MOV_RR64, p, pos); + emitByte(0xc0 + instr.dst, p, pos); + emit(REX_MUL_M, p, pos); + emitByte(0xae, p, pos); + genAddressImm(instr, p, pos); } - emit(REX_MOV_R64R); - emitByte(0xc2 + 8 * instr.dst); + emit(REX_MOV_R64R, p, pos); + emitByte(0xc2 + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + uint64_t divisor = instr.getImm32(); if (!isZeroOrPowerOf2(divisor)) { registerUsage[instr.dst] = i; - emit(MOV_RAX_I); - emit64(randomx_reciprocal_fast(divisor)); - emit(REX_IMUL_RM); - emitByte(0xc0 + 8 * instr.dst); + emit(MOV_RAX_I, p, pos); + emit64(randomx_reciprocal_fast(divisor), p, pos); + emit(REX_IMUL_RM, p, pos); + emitByte(0xc0 + 8 * instr.dst, p, pos); } + + codePos = pos; } void JitCompilerX86::h_INEG_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; - emit(REX_NEG); - emitByte(0xd8 + instr.dst); + emit(REX_NEG, p, pos); + emitByte(0xd8 + instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - emit(REX_XOR_RR); - emitByte(0xc0 + 8 * instr.dst + instr.src); + emit(REX_XOR_RR, p, pos); + emitByte(0xc0 + 8 * instr.dst + instr.src, p, pos); } else { - emit(REX_XOR_RI); - emitByte(0xf0 + instr.dst); - emit32(instr.getImm32()); + emit(REX_XOR_RI, p, pos); + emitByte(0xf0 + instr.dst, p, pos); + emit32(instr.getImm32(), p, pos); } + + codePos = pos; } void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - genAddressReg(instr); - emit(REX_XOR_RM); - emitByte(0x04 + 8 * instr.dst); - emitByte(0x06); + genAddressReg(instr, p, pos); + emit(REX_XOR_RM, p, pos); + emitByte(0x04 + 8 * instr.dst, p, pos); + emitByte(0x06, p, pos); } else { - emit(REX_XOR_RM); - emitByte(0x86 + 8 * instr.dst); - genAddressImm(instr); + emit(REX_XOR_RM, p, pos); + emitByte(0x86 + 8 * instr.dst, p, pos); + genAddressImm(instr, p, pos); } + + codePos = pos; } void JitCompilerX86::h_IROR_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - emit(REX_MOV_RR); - emitByte(0xc8 + instr.src); - emit(REX_ROT_CL); - emitByte(0xc8 + instr.dst); + emit(REX_MOV_RR, p, pos); + emitByte(0xc8 + instr.src, p, pos); + emit(REX_ROT_CL, p, pos); + emitByte(0xc8 + instr.dst, p, pos); } else { - emit(REX_ROT_I8); - emitByte(0xc8 + instr.dst); - emitByte(instr.getImm32() & 63); + emit(REX_ROT_I8, p, pos); + emitByte(0xc8 + instr.dst, p, pos); + emitByte(instr.getImm32() & 63, p, pos); } + + codePos = pos; } void JitCompilerX86::h_IROL_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - emit(REX_MOV_RR); - emitByte(0xc8 + instr.src); - emit(REX_ROT_CL); - emitByte(0xc0 + instr.dst); + emit(REX_MOV_RR, p, pos); + emitByte(0xc8 + instr.src, p, pos); + emit(REX_ROT_CL, p, pos); + emitByte(0xc0 + instr.dst, p, pos); } else { - emit(REX_ROT_I8); - emitByte(0xc0 + instr.dst); - emitByte(instr.getImm32() & 63); + emit(REX_ROT_I8, p, pos); + emitByte(0xc0 + instr.dst, p, pos); + emitByte(instr.getImm32() & 63, p, pos); } + + codePos = pos; } void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + if (instr.src != instr.dst) { registerUsage[instr.dst] = i; registerUsage[instr.src] = i; - emit(REX_XCHG); - emitByte(0xc0 + instr.src + 8 * instr.dst); + emit(REX_XCHG, p, pos); + emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos); } + + codePos = pos; } void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) { - emit(SHUFPD); - emitByte(0xc0 + 9 * instr.dst); - emitByte(1); + uint8_t* const p = code; + int pos = codePos; + + emit(SHUFPD, p, pos); + emitByte(0xc0 + 9 * instr.dst, p, pos); + emitByte(1, p, pos); + + codePos = pos; } void JitCompilerX86::h_FADD_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + instr.dst %= RegisterCountFlt; instr.src %= RegisterCountFlt; - emit(REX_ADDPD); - emitByte(0xc0 + instr.src + 8 * instr.dst); + emit(REX_ADDPD, p, pos); + emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_FADD_M(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + instr.dst %= RegisterCountFlt; - genAddressReg(instr); - emit(REX_CVTDQ2PD_XMM12); - emit(REX_ADDPD); - emitByte(0xc4 + 8 * instr.dst); + genAddressReg(instr, p, pos); + emit(REX_CVTDQ2PD_XMM12, p, pos); + emit(REX_ADDPD, p, pos); + emitByte(0xc4 + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + instr.dst %= RegisterCountFlt; instr.src %= RegisterCountFlt; - emit(REX_SUBPD); - emitByte(0xc0 + instr.src + 8 * instr.dst); + emit(REX_SUBPD, p, pos); + emitByte(0xc0 + instr.src + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + instr.dst %= RegisterCountFlt; - genAddressReg(instr); - emit(REX_CVTDQ2PD_XMM12); - emit(REX_SUBPD); - emitByte(0xc4 + 8 * instr.dst); + genAddressReg(instr, p, pos); + emit(REX_CVTDQ2PD_XMM12, p, pos); + emit(REX_SUBPD, p, pos); + emitByte(0xc4 + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + instr.dst %= RegisterCountFlt; - emit(REX_XORPS); - emitByte(0xc7 + 8 * instr.dst); + emit(REX_XORPS, p, pos); + emitByte(0xc7 + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + instr.dst %= RegisterCountFlt; instr.src %= RegisterCountFlt; - emit(REX_MULPD); - emitByte(0xe0 + instr.src + 8 * instr.dst); + emit(REX_MULPD, p, pos); + emitByte(0xe0 + instr.src + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + instr.dst %= RegisterCountFlt; - genAddressReg(instr); - emit(REX_CVTDQ2PD_XMM12); - emit(REX_ANDPS_XMM12); - emit(REX_DIVPD); - emitByte(0xe4 + 8 * instr.dst); + genAddressReg(instr, p, pos); + emit(REX_CVTDQ2PD_XMM12, p, pos); + emit(REX_ANDPS_XMM12, p, pos); + emit(REX_DIVPD, p, pos); + emitByte(0xe4 + 8 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + instr.dst %= RegisterCountFlt; - emit(SQRTPD); - emitByte(0xe4 + 9 * instr.dst); + emit(SQRTPD, p, pos); + emitByte(0xe4 + 9 * instr.dst, p, pos); + + codePos = pos; } void JitCompilerX86::h_CFROUND(Instruction& instr, int i) { - emit(REX_MOV_RR64); - emitByte(0xc0 + instr.src); + uint8_t* const p = code; + int pos = codePos; + + emit(REX_MOV_RR64, p, pos); + emitByte(0xc0 + instr.src, p, pos); int rotate = (13 - (instr.getImm32() & 63)) & 63; if (rotate != 0) { - emit(ROL_RAX); - emitByte(rotate); + emit(ROL_RAX, p, pos); + emitByte(rotate, p, pos); } - emit(AND_OR_MOV_LDMXCSR); + emit(AND_OR_MOV_LDMXCSR, p, pos); + + codePos = pos; } void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) { + uint8_t* const p = code; + int pos = codePos; + int reg = instr.dst; int target = registerUsage[reg] + 1; - emit(REX_ADD_I); - emitByte(0xc0 + reg); + emit(REX_ADD_I, p, pos); + emitByte(0xc0 + reg, p, pos); int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset; uint32_t imm = instr.getImm32() | (1UL << shift); if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0) imm &= ~(1UL << (shift - 1)); - emit32(imm); - emit(REX_TEST); - emitByte(0xc0 + reg); - emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift); - emit(JZ); - emit32(instructionOffsets[target] - (codePos + 4)); + emit32(imm, p, pos); + emit(REX_TEST, p, pos); + emitByte(0xc0 + reg, p, pos); + emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift, p, pos); + emit(JZ, p, pos); + emit32(instructionOffsets[target] - (pos + 4), p, pos); //mark all registers as used - for (unsigned j = 0; j < RegistersCount; ++j) { - registerUsage[j] = i; + uint64_t* r = (uint64_t*) registerUsage; + uint64_t k = i; + k |= k << 32; + for (unsigned j = 0; j < RegistersCount / 2; ++j) { + r[j] = k; } + + codePos = pos; } void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { - genAddressRegDst(instr); - emit(REX_MOV_MR); - emitByte(0x04 + 8 * instr.src); - emitByte(0x06); + uint8_t* const p = code; + int pos = codePos; + + genAddressRegDst(instr, p, pos); + emit(REX_MOV_MR, p, pos); + emitByte(0x04 + 8 * instr.src, p, pos); + emitByte(0x06, p, pos); + + codePos = pos; } void JitCompilerX86::h_NOP(Instruction& instr, int i) { - emit(NOP1); + emit(NOP1, code, codePos); } InstructionGeneratorX86 JitCompilerX86::engine[256] = {}; diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 497e66cb..f72bce86 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -66,42 +66,41 @@ namespace randomx { size_t getCodeSize(); static InstructionGeneratorX86 engine[256]; - std::vector instructionOffsets; + int32_t instructionOffsets[512]; int registerUsage[RegistersCount]; uint8_t* code; int32_t codePos; void generateProgramPrologue(Program&, ProgramConfiguration&); void generateProgramEpilogue(Program&); - void genAddressReg(Instruction&, bool); - void genAddressRegDst(Instruction&); - void genAddressImm(Instruction&); - void genSIB(int scale, int index, int base); + static void genAddressReg(Instruction&, uint8_t* code, int& codePos, bool rax = true); + static void genAddressRegDst(Instruction&, uint8_t* code, int& codePos); + static void genAddressImm(Instruction&, uint8_t* code, int& codePos); + static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos); - void generateCode(Instruction&, int); void generateSuperscalarCode(Instruction &, std::vector &); - void emitByte(uint8_t val) { + static void emitByte(uint8_t val, uint8_t* code, int& codePos) { code[codePos] = val; - codePos++; + ++codePos; } - void emit32(uint32_t val) { + static void emit32(uint32_t val, uint8_t* code, int& codePos) { memcpy(code + codePos, &val, sizeof val); codePos += sizeof val; } - void emit64(uint64_t val) { + static void emit64(uint64_t val, uint8_t* code, int& codePos) { memcpy(code + codePos, &val, sizeof val); codePos += sizeof val; } template - void emit(const uint8_t (&src)[N]) { - emit(src, N); + static void emit(const uint8_t (&src)[N], uint8_t* code, int& codePos) { + emit(src, N, code, codePos); } - void emit(const uint8_t* src, size_t count) { + static void emit(const uint8_t* src, size_t count, uint8_t* code, int& codePos) { memcpy(code + codePos, src, count); codePos += count; } From 87fe8a4f7e372003f8fa7f05f61ea753f750513e Mon Sep 17 00:00:00 2001 From: xmrig Date: Wed, 28 Aug 2019 04:14:57 +0700 Subject: [PATCH 05/18] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13202a40..dd78cb11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# v3.1.1 +- [#1142](https://github.com/xmrig/xmrig/pull/1142) RandomX hashrate improved by 0.5-1.5% depending on variant and CPU. +- [#1133](https://github.com/xmrig/xmrig/issues/1133) Fixed syslog regression. + # v3.1.0 - [#1107](https://github.com/xmrig/xmrig/issues/1107#issuecomment-522235892) Added Argon2 algorithm family: `argon2/chukwa` and `argon2/wrkz`. From 0a58781b0cbffc6ba4aceb077e6e684c840527d8 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 28 Aug 2019 07:20:01 +0200 Subject: [PATCH 06/18] Reverted intrin_portable.h --- src/crypto/randomx/intrin_portable.h | 139 ++------------------------- 1 file changed, 8 insertions(+), 131 deletions(-) diff --git a/src/crypto/randomx/intrin_portable.h b/src/crypto/randomx/intrin_portable.h index 338d6d89..e4916096 100644 --- a/src/crypto/randomx/intrin_portable.h +++ b/src/crypto/randomx/intrin_portable.h @@ -376,131 +376,11 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { #define RANDOMX_DEFAULT_FENV -#elif defined(__aarch64__) +void rx_reset_float_state(); -#include -#include -#include +void rx_set_rounding_mode(uint32_t mode); -typedef uint8x16_t rx_vec_i128; -typedef float64x2_t rx_vec_f128; - -#define rx_aligned_alloc(size, align) aligned_alloc(align, size) -#define rx_aligned_free(a) free(a) - -inline void rx_prefetch_nta(void* ptr) { - asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr)); -} - -FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { - return vld1q_f64((const float64_t*)pd); -} - -FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) { - vst1q_f64((float64_t*)mem_addr, val); -} - -FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { - float64x2_t temp; - temp = vcopyq_laneq_f64(temp, 1, a, 1); - a = vcopyq_laneq_f64(a, 1, a, 0); - return vcopyq_laneq_f64(a, 0, temp, 1); -} - -FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) { - uint64x2_t temp0 = vdupq_n_u64(x0); - uint64x2_t temp1 = vdupq_n_u64(x1); - return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0)); -} - -FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { - return vreinterpretq_f64_u64(vdupq_n_u64(x)); -} - -#define rx_add_vec_f128 vaddq_f64 -#define rx_sub_vec_f128 vsubq_f64 -#define rx_mul_vec_f128 vmulq_f64 -#define rx_div_vec_f128 vdivq_f64 -#define rx_sqrt_vec_f128 vsqrtq_f64 - -FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); -} - -FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); -} - -FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { - return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); -} - -#ifdef __ARM_FEATURE_CRYPTO - - -FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) { - const uint8x16_t zero = { 0 }; - return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key; -} - -FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) { - const uint8x16_t zero = { 0 }; - return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key; -} - -#define HAVE_AES - -#endif - -#define rx_xor_vec_i128 veorq_u8 - -FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { - return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0); -} - -FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) { - return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1); -} - -FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) { - return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2); -} - -FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) { - return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3); -} - -FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) { - int32_t data[4]; - data[0] = _I0; - data[1] = _I1; - data[2] = _I2; - data[3] = _I3; - return vreinterpretq_u8_s32(vld1q_s32(data)); -}; - -#define rx_xor_vec_i128 veorq_u8 - -FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) { - return vld1q_u8((const uint8_t*)mem_addr); -} - -FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) { - vst1q_u8((uint8_t*)mem_addr, val); -} - -FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { - double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); - double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); - rx_vec_f128 x; - x = vsetq_lane_f64(lo, x, 0); - x = vsetq_lane_f64(hi, x, 1); - return x; -} - -#define RANDOMX_DEFAULT_FENV - -#else //portable fallback +#else //end altivec #include #include @@ -607,6 +487,7 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { return v; } + FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { rx_vec_f128 x; x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0]; @@ -697,6 +578,10 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { #define RANDOMX_DEFAULT_FENV +void rx_reset_float_state(); + +void rx_set_rounding_mode(uint32_t mode); + #endif #ifndef HAVE_AES @@ -713,14 +598,6 @@ FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { } #endif -#ifdef RANDOMX_DEFAULT_FENV - -void rx_reset_float_state(); - -void rx_set_rounding_mode(uint32_t mode); - -#endif - double loadDoublePortable(const void* addr); uint64_t mulh(uint64_t, uint64_t); int64_t smulh(int64_t, int64_t); From d5e7ab4985db228ab84c2c1b9a89dbd556fc1f8d Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 28 Aug 2019 18:28:04 +0200 Subject: [PATCH 07/18] Fixed race condition in RandomX thread init Thread could deadlock if it started before dataset struct was allocated. --- src/backend/cpu/CpuWorker.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 0fc77f8d..0abff909 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -90,6 +90,8 @@ void xmrig::CpuWorker::allocateRandomX_VM() if (Nonce::sequence(Nonce::CPU) == 0) { return; } + + dataset = Rx::dataset(m_job.currentJob(), m_node); } if (!m_vm) { From 76fdc4fc4bef5e8881ec5198a39dbfcf20a2fa41 Mon Sep 17 00:00:00 2001 From: xmrig Date: Thu, 29 Aug 2019 04:22:55 +0700 Subject: [PATCH 08/18] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd78cb11..75c58407 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # v3.1.1 - [#1142](https://github.com/xmrig/xmrig/pull/1142) RandomX hashrate improved by 0.5-1.5% depending on variant and CPU. +- [#1146](https://github.com/xmrig/xmrig/pull/1146) Fixed race condition in RandomX thread init. - [#1133](https://github.com/xmrig/xmrig/issues/1133) Fixed syslog regression. # v3.1.0 From df973763bbc34c8cc258c48f6508f2f072c13acc Mon Sep 17 00:00:00 2001 From: Matt Smith Date: Thu, 29 Aug 2019 14:12:43 +0100 Subject: [PATCH 09/18] Fix linker marking entire executable as executable stack See: https://wiki.ubuntu.com/SecurityTeam/Roadmap/ExecutableStacks See: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart --- src/crypto/cn/asm/CryptonightR_template.S | 4 ++++ src/crypto/cn/asm/cn_main_loop.S | 4 ++++ src/crypto/cn/asm/win64/cn_main_loop.S | 4 ++++ src/crypto/randomx/jit_compiler_x86_static.S | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/src/crypto/cn/asm/CryptonightR_template.S b/src/crypto/cn/asm/CryptonightR_template.S index d2974d16..bfedeb30 100644 --- a/src/crypto/cn/asm/CryptonightR_template.S +++ b/src/crypto/cn/asm/CryptonightR_template.S @@ -1593,3 +1593,7 @@ FN_PREFIX(CryptonightR_instruction_mov254): FN_PREFIX(CryptonightR_instruction_mov255): FN_PREFIX(CryptonightR_instruction_mov256): + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/src/crypto/cn/asm/cn_main_loop.S b/src/crypto/cn/asm/cn_main_loop.S index 7aed6c20..609b0fe8 100644 --- a/src/crypto/cn/asm/cn_main_loop.S +++ b/src/crypto/cn/asm/cn_main_loop.S @@ -71,3 +71,7 @@ FN_PREFIX(cnv2_rwz_double_mainloop_asm): add rsp, 48 ret 0 mov eax, 3735929054 + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/src/crypto/cn/asm/win64/cn_main_loop.S b/src/crypto/cn/asm/win64/cn_main_loop.S index 63c3a8ba..7dcad89e 100644 --- a/src/crypto/cn/asm/win64/cn_main_loop.S +++ b/src/crypto/cn/asm/win64/cn_main_loop.S @@ -43,3 +43,7 @@ cnv2_rwz_double_mainloop_asm: #include "cn2/cnv2_rwz_double_main_loop.inc" ret 0 mov eax, 3735929054 + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index 67d2bdbc..b6338d85 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -206,3 +206,7 @@ DECL(randomx_reciprocal_fast): mov rcx, rdi #endif #include "asm/randomx_reciprocal.inc" + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif From df91a8512880592594f289a11d675aacf6fb26f5 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 30 Aug 2019 07:09:14 +0700 Subject: [PATCH 10/18] Fixed bug in keepalive feature. --- src/base/net/stratum/Client.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/base/net/stratum/Client.cpp b/src/base/net/stratum/Client.cpp index fe182336..1da41692 100644 --- a/src/base/net/stratum/Client.cpp +++ b/src/base/net/stratum/Client.cpp @@ -234,6 +234,8 @@ void xmrig::Client::tick(uint64_t now) else if (m_keepAlive && now > m_keepAlive) { ping(); } + + return; } if (m_expire && now > m_expire && m_state == ConnectingState) { @@ -754,6 +756,8 @@ void xmrig::Client::parseResponse(int64_t id, const rapidjson::Value &result, co void xmrig::Client::ping() { send(snprintf(m_sendBuf, sizeof(m_sendBuf), "{\"id\":%" PRId64 ",\"jsonrpc\":\"2.0\",\"method\":\"keepalived\",\"params\":{\"id\":\"%s\"}}\n", m_sequence, m_rpcId.data())); + + m_keepAlive = 0; } From 5678d15841a7ff0cc9434e57393a8084bdc26995 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 30 Aug 2019 10:04:12 +0700 Subject: [PATCH 11/18] #1138 Fixed reconnect. --- src/base/net/stratum/BaseClient.cpp | 3 ++- src/base/net/stratum/BaseClient.h | 3 ++- src/base/net/stratum/Client.cpp | 42 ++++++++++++++++++----------- src/base/net/stratum/Client.h | 23 ++++++++-------- 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/src/base/net/stratum/BaseClient.cpp b/src/base/net/stratum/BaseClient.cpp index f44415d5..325fce1c 100644 --- a/src/base/net/stratum/BaseClient.cpp +++ b/src/base/net/stratum/BaseClient.cpp @@ -42,7 +42,8 @@ xmrig::BaseClient::BaseClient(int id, IClientListener *listener) : m_retries(5), m_failures(0), m_state(UnconnectedState), - m_retryPause(5000) + m_retryPause(5000), + m_enabled(true) { } diff --git a/src/base/net/stratum/BaseClient.h b/src/base/net/stratum/BaseClient.h index 9e1c7ffb..56bdc126 100644 --- a/src/base/net/stratum/BaseClient.h +++ b/src/base/net/stratum/BaseClient.h @@ -64,7 +64,8 @@ protected: HostLookupState, ConnectingState, ConnectedState, - ClosingState + ClosingState, + ReconnectingState }; inline bool isQuiet() const { return m_quiet || m_failures >= m_retries; } diff --git a/src/base/net/stratum/Client.cpp b/src/base/net/stratum/Client.cpp index 1da41692..229147ba 100644 --- a/src/base/net/stratum/Client.cpp +++ b/src/base/net/stratum/Client.cpp @@ -70,21 +70,15 @@ static const char *states[] = { "host-lookup", "connecting", "connected", - "closing" + "closing", + "reconnecting" }; #endif xmrig::Client::Client(int id, const char *agent, IClientListener *listener) : BaseClient(id, listener), - m_agent(agent), - m_tls(nullptr), - m_expire(0), - m_jobs(0), - m_keepAlive(0), - m_key(0), - m_stream(nullptr), - m_socket(nullptr) + m_agent(agent) { m_key = m_storage.add(this); m_dns = new Dns(this); @@ -238,8 +232,12 @@ void xmrig::Client::tick(uint64_t now) return; } - if (m_expire && now > m_expire && m_state == ConnectingState) { - connect(); + if (m_state == ReconnectingState && m_expire && now > m_expire) { + return connect(); + } + + if (m_state == ConnectingState && m_expire && now > m_expire) { + return reconnect(); } } @@ -449,7 +447,6 @@ int xmrig::Client::resolve(const String &host) { setState(HostLookupState); - m_expire = 0; m_recvBuf.reset(); if (m_failures == -1) { @@ -814,12 +811,10 @@ void xmrig::Client::reconnect() return m_listener->onClose(this, -1); } - setState(ConnectingState); + setState(ReconnectingState); m_failures++; m_listener->onClose(this, static_cast(m_failures)); - - m_expire = Chrono::steadyMSecs() + m_retryPause; } @@ -831,6 +826,23 @@ void xmrig::Client::setState(SocketState state) return; } + switch (state) { + case HostLookupState: + m_expire = 0; + break; + + case ConnectingState: + m_expire = Chrono::steadyMSecs() + kConnectTimeout; + break; + + case ReconnectingState: + m_expire = Chrono::steadyMSecs() + m_retryPause; + break; + + default: + break; + } + m_state = state; } diff --git a/src/base/net/stratum/Client.h b/src/base/net/stratum/Client.h index 46030aba..ff2bf7f6 100644 --- a/src/base/net/stratum/Client.h +++ b/src/base/net/stratum/Client.h @@ -56,12 +56,13 @@ class JobResult; class Client : public BaseClient, public IDnsListener, public ILineListener { public: - constexpr static int kResponseTimeout = 20 * 1000; + constexpr static uint64_t kConnectTimeout = 20 * 1000; + constexpr static uint64_t kResponseTimeout = 20 * 1000; # ifdef XMRIG_FEATURE_TLS - constexpr static int kInputBufferSize = 1024 * 16; + constexpr static size_t kInputBufferSize = 1024 * 16; # else - constexpr static int kInputBufferSize = 1024 * 2; + constexpr static size_t kInputBufferSize = 1024 * 2; # endif Client(int id, const char *agent, IClientListener *listener); @@ -122,19 +123,19 @@ private: static inline Client *getClient(void *data) { return m_storage.get(data); } - char m_sendBuf[2048]; + char m_sendBuf[2048] = { 0 }; const char *m_agent; Dns *m_dns; RecvBuf m_recvBuf; std::bitset m_extensions; String m_rpcId; - Tls *m_tls; - uint64_t m_expire; - uint64_t m_jobs; - uint64_t m_keepAlive; - uintptr_t m_key; - uv_stream_t *m_stream; - uv_tcp_t *m_socket; + Tls *m_tls = nullptr; + uint64_t m_expire = 0; + uint64_t m_jobs = 0; + uint64_t m_keepAlive = 0; + uintptr_t m_key = 0; + uv_stream_t *m_stream = nullptr; + uv_tcp_t *m_socket = nullptr; static Storage m_storage; }; From 9cfbce5e0992026522beb3608eb79d0f3634568f Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 30 Aug 2019 13:49:58 +0700 Subject: [PATCH 12/18] Removed Linux specific code from Windows only ASM file. --- src/crypto/cn/asm/win64/cn_main_loop.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/crypto/cn/asm/win64/cn_main_loop.S b/src/crypto/cn/asm/win64/cn_main_loop.S index 7dcad89e..63c3a8ba 100644 --- a/src/crypto/cn/asm/win64/cn_main_loop.S +++ b/src/crypto/cn/asm/win64/cn_main_loop.S @@ -43,7 +43,3 @@ cnv2_rwz_double_mainloop_asm: #include "cn2/cnv2_rwz_double_main_loop.inc" ret 0 mov eax, 3735929054 - -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif From 372183555be6538b32db4984eccebf85bcb73468 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 30 Aug 2019 14:46:38 +0700 Subject: [PATCH 13/18] #1141 Fixed log in background mode. --- src/base/io/log/Log.cpp | 9 +++++++-- src/base/io/log/Log.h | 1 + src/base/kernel/Base.cpp | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/base/io/log/Log.cpp b/src/base/io/log/Log.cpp index 8d56f7c9..250bc3c4 100644 --- a/src/base/io/log/Log.cpp +++ b/src/base/io/log/Log.cpp @@ -91,6 +91,10 @@ public: std::lock_guard lock(m_mutex); + if (Log::background && m_backends.empty()) { + return; + } + timestamp(level, size, offset); color(level, size); @@ -190,8 +194,9 @@ private: }; -bool Log::colors = true; -LogPrivate *Log::d = new LogPrivate(); +bool Log::background = false; +bool Log::colors = true; +LogPrivate *Log::d = new LogPrivate(); } /* namespace xmrig */ diff --git a/src/base/io/log/Log.h b/src/base/io/log/Log.h index d8bcb44a..3517b61d 100644 --- a/src/base/io/log/Log.h +++ b/src/base/io/log/Log.h @@ -54,6 +54,7 @@ public: static void print(const char *fmt, ...); static void print(Level level, const char *fmt, ...); + static bool background; static bool colors; private: diff --git a/src/base/kernel/Base.cpp b/src/base/kernel/Base.cpp index 152f6bcc..3740655d 100644 --- a/src/base/kernel/Base.cpp +++ b/src/base/kernel/Base.cpp @@ -184,7 +184,10 @@ int xmrig::Base::init() Platform::setProcessPriority(config()->cpu().priority()); # endif - if (!config()->isBackground()) { + if (config()->isBackground()) { + Log::background = true; + } + else { Log::add(new ConsoleLog()); } From fa983eee0e253e3eef5ef22e9584cd5d0c5a70ba Mon Sep 17 00:00:00 2001 From: xmrig Date: Fri, 30 Aug 2019 15:14:52 +0700 Subject: [PATCH 14/18] Update CHANGELOG.md --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75c58407..7507b7f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,10 @@ # v3.1.1 +- [#1133](https://github.com/xmrig/xmrig/issues/1133) Fixed syslog regression. +- [#1138](https://github.com/xmrig/xmrig/issues/1138) Fixed multiple network bugs. +- [#1141](https://github.com/xmrig/xmrig/issues/1141) Fixed log in background mode. - [#1142](https://github.com/xmrig/xmrig/pull/1142) RandomX hashrate improved by 0.5-1.5% depending on variant and CPU. - [#1146](https://github.com/xmrig/xmrig/pull/1146) Fixed race condition in RandomX thread init. -- [#1133](https://github.com/xmrig/xmrig/issues/1133) Fixed syslog regression. +- [#1148](https://github.com/xmrig/xmrig/pull/1148) Fixed, on Linux linker marking entire executable as having an executable stack. # v3.1.0 - [#1107](https://github.com/xmrig/xmrig/issues/1107#issuecomment-522235892) Added Argon2 algorithm family: `argon2/chukwa` and `argon2/wrkz`. From df90b299f27ccc7fecc616d5d659b048f2bbeb92 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 30 Aug 2019 18:55:53 +0700 Subject: [PATCH 15/18] Fixed, for Argon2 algorithms command line options, like `--threads` was ignored. --- CHANGELOG.md | 1 + src/core/config/ConfigTransform.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7507b7f4..d6efd0f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - [#1142](https://github.com/xmrig/xmrig/pull/1142) RandomX hashrate improved by 0.5-1.5% depending on variant and CPU. - [#1146](https://github.com/xmrig/xmrig/pull/1146) Fixed race condition in RandomX thread init. - [#1148](https://github.com/xmrig/xmrig/pull/1148) Fixed, on Linux linker marking entire executable as having an executable stack. +- Fixed, for Argon2 algorithms command line options, like `--threads` was ignored. # v3.1.0 - [#1107](https://github.com/xmrig/xmrig/issues/1107#issuecomment-522235892) Added Argon2 algorithm family: `argon2/chukwa` and `argon2/wrkz`. diff --git a/src/core/config/ConfigTransform.cpp b/src/core/config/ConfigTransform.cpp index 622855af..ce0d324c 100644 --- a/src/core/config/ConfigTransform.cpp +++ b/src/core/config/ConfigTransform.cpp @@ -96,6 +96,8 @@ void xmrig::ConfigTransform::finalize(rapidjson::Document &doc) BaseTransform::finalize(doc); if (m_threads) { + doc.AddMember("version", 1, allocator); + if (!doc.HasMember(kCpu)) { doc.AddMember(StringRef(kCpu), Value(kObjectType), allocator); } From 53a71f72266360ca55dfd126241c0ea96dc9e772 Mon Sep 17 00:00:00 2001 From: xmrig Date: Fri, 30 Aug 2019 19:16:05 +0700 Subject: [PATCH 16/18] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6efd0f8..6ca3d363 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ - [#1142](https://github.com/xmrig/xmrig/pull/1142) RandomX hashrate improved by 0.5-1.5% depending on variant and CPU. - [#1146](https://github.com/xmrig/xmrig/pull/1146) Fixed race condition in RandomX thread init. - [#1148](https://github.com/xmrig/xmrig/pull/1148) Fixed, on Linux linker marking entire executable as having an executable stack. -- Fixed, for Argon2 algorithms command line options, like `--threads` was ignored. +- Fixed, for Argon2 algorithms command line options like `--threads` was ignored. # v3.1.0 - [#1107](https://github.com/xmrig/xmrig/issues/1107#issuecomment-522235892) Added Argon2 algorithm family: `argon2/chukwa` and `argon2/wrkz`. From 13e38df391da5e293293b51929ef70e5e82d4f9f Mon Sep 17 00:00:00 2001 From: XMRig Date: Sat, 31 Aug 2019 06:18:32 +0700 Subject: [PATCH 17/18] Fixed command line options for single pool, free order allowed again. --- CHANGELOG.md | 1 + src/base/kernel/config/BaseTransform.cpp | 23 ++++++++++++++++++----- src/base/net/stratum/Pool.cpp | 3 +++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ca3d363..a5604822 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - [#1146](https://github.com/xmrig/xmrig/pull/1146) Fixed race condition in RandomX thread init. - [#1148](https://github.com/xmrig/xmrig/pull/1148) Fixed, on Linux linker marking entire executable as having an executable stack. - Fixed, for Argon2 algorithms command line options like `--threads` was ignored. +- Fixed command line options for single pool, free order allowed again. # v3.1.0 - [#1107](https://github.com/xmrig/xmrig/issues/1107#issuecomment-522235892) Added Argon2 algorithm family: `argon2/chukwa` and `argon2/wrkz`. diff --git a/src/base/kernel/config/BaseTransform.cpp b/src/base/kernel/config/BaseTransform.cpp index 8043c6e9..12e7d848 100644 --- a/src/base/kernel/config/BaseTransform.cpp +++ b/src/base/kernel/config/BaseTransform.cpp @@ -33,11 +33,12 @@ #endif -#include "base/kernel/config/BaseTransform.h" -#include "base/kernel/Process.h" -#include "base/io/log/Log.h" -#include "base/kernel/interfaces/IConfig.h" #include "base/io/json/JsonChain.h" +#include "base/io/log/Log.h" +#include "base/kernel/config/BaseTransform.h" +#include "base/kernel/interfaces/IConfig.h" +#include "base/kernel/Process.h" +#include "base/net/stratum/Pool.h" #include "core/config/Config_platform.h" @@ -138,7 +139,19 @@ void xmrig::BaseTransform::transform(rapidjson::Document &doc, int key, const ch break; case IConfig::UrlKey: /* --url */ - return add(doc, kPools, "url", arg, true); + { + if (!doc.HasMember(kPools)) { + doc.AddMember(rapidjson::StringRef(kPools), rapidjson::kArrayType, doc.GetAllocator()); + } + + rapidjson::Value &array = doc[kPools]; + if (array.Size() == 0 || Pool(array[array.Size() - 1]).isValid()) { + array.PushBack(rapidjson::kObjectType, doc.GetAllocator()); + } + + set(doc, array[array.Size() - 1], "url", arg); + break; + } case IConfig::UserKey: /* --user */ return add(doc, kPools, "user", arg); diff --git a/src/base/net/stratum/Pool.cpp b/src/base/net/stratum/Pool.cpp index 4d15ea47..ba31c35d 100644 --- a/src/base/net/stratum/Pool.cpp +++ b/src/base/net/stratum/Pool.cpp @@ -200,6 +200,9 @@ bool xmrig::Pool::isEqual(const Pool &other) const bool xmrig::Pool::parse(const char *url) { assert(url != nullptr); + if (url == nullptr) { + return false; + } const char *p = strstr(url, "://"); const char *base = url; From 5439f2d7b85c3ea878e061b37afafae78666c069 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sat, 31 Aug 2019 07:54:09 +0700 Subject: [PATCH 18/18] v3.1.1 --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 3cb05a7d..fdd4c7dc 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig CPU miner" -#define APP_VERSION "3.1.1-dev" +#define APP_VERSION "3.1.1" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com"