From 21a56c9cbfb9f6e4cfaeb20ea111e8d00303a068 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 27 Aug 2019 16:12:13 +0200 Subject: [PATCH] Updated RandomX --- src/crypto/randomx/argon2_core.c | 13 --- src/crypto/randomx/blake2_generator.cpp | 2 +- src/crypto/randomx/blake2_generator.hpp | 2 +- src/crypto/randomx/bytecode_machine.cpp | 2 +- src/crypto/randomx/common.hpp | 2 +- src/crypto/randomx/intrin_portable.h | 139 ++++++++++++++++++++++-- src/crypto/randomx/jit_compiler_x86.cpp | 4 +- src/crypto/randomx/randomx.cpp | 9 +- src/crypto/randomx/superscalar.cpp | 14 +-- 9 files changed, 151 insertions(+), 36 deletions(-) diff --git a/src/crypto/randomx/argon2_core.c b/src/crypto/randomx/argon2_core.c index f9babbc4..97176608 100644 --- a/src/crypto/randomx/argon2_core.c +++ b/src/crypto/randomx/argon2_core.c @@ -263,19 +263,6 @@ int rxa2_validate_inputs(const argon2_context *context) { return ARGON2_INCORRECT_PARAMETER; } - if (NULL == context->out) { - return ARGON2_OUTPUT_PTR_NULL; - } - - /* Validate output length */ - if (ARGON2_MIN_OUTLEN > context->outlen) { - return ARGON2_OUTPUT_TOO_SHORT; - } - - if (ARGON2_MAX_OUTLEN < context->outlen) { - return ARGON2_OUTPUT_TOO_LONG; - } - /* Validate password (required param) */ if (NULL == context->pwd) { if (0 != context->pwdlen) { diff --git a/src/crypto/randomx/blake2_generator.cpp b/src/crypto/randomx/blake2_generator.cpp index 83789129..edfe2e34 100644 --- a/src/crypto/randomx/blake2_generator.cpp +++ b/src/crypto/randomx/blake2_generator.cpp @@ -46,7 +46,7 @@ namespace randomx { return data[dataIndex++]; } - uint32_t Blake2Generator::getInt32() { + uint32_t Blake2Generator::getUInt32() { checkData(4); auto ret = load32(&data[dataIndex]); dataIndex += 4; diff --git a/src/crypto/randomx/blake2_generator.hpp b/src/crypto/randomx/blake2_generator.hpp index b5ac0801..5e7f61f2 100644 --- a/src/crypto/randomx/blake2_generator.hpp +++ b/src/crypto/randomx/blake2_generator.hpp @@ -36,7 +36,7 @@ namespace randomx { public: Blake2Generator(const void* seed, size_t seedSize, int nonce = 0); uint8_t getByte(); - uint32_t getInt32(); + uint32_t getUInt32(); private: void checkData(const size_t); diff --git a/src/crypto/randomx/bytecode_machine.cpp b/src/crypto/randomx/bytecode_machine.cpp index 6c51b86c..55a63935 100644 --- a/src/crypto/randomx/bytecode_machine.cpp +++ b/src/crypto/randomx/bytecode_machine.cpp @@ -244,7 +244,7 @@ namespace randomx { if (opcode < RandomX_CurrentConfig.CEIL_IMUL_RCP) { uint64_t divisor = instr.getImm32(); - if (!isPowerOf2(divisor)) { + if (!isZeroOrPowerOf2(divisor)) { auto dst = instr.dst % RegistersCount; ibc.type = InstructionType::IMUL_R; ibc.idst = &nreg->r[dst]; diff --git a/src/crypto/randomx/common.hpp b/src/crypto/randomx/common.hpp index 31b18ce4..da36f2c5 100644 --- a/src/crypto/randomx/common.hpp +++ b/src/crypto/randomx/common.hpp @@ -137,7 +137,7 @@ namespace randomx { constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register constexpr int RegisterNeedsSib = 4; //x86 r12 register - inline bool isPowerOf2(uint64_t x) { + inline bool isZeroOrPowerOf2(uint64_t x) { return (x & (x - 1)) == 0; } diff --git a/src/crypto/randomx/intrin_portable.h b/src/crypto/randomx/intrin_portable.h index e4916096..338d6d89 100644 --- a/src/crypto/randomx/intrin_portable.h +++ b/src/crypto/randomx/intrin_portable.h @@ -376,11 +376,131 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { #define RANDOMX_DEFAULT_FENV -void rx_reset_float_state(); +#elif defined(__aarch64__) -void rx_set_rounding_mode(uint32_t mode); +#include +#include +#include -#else //end altivec +typedef uint8x16_t rx_vec_i128; +typedef float64x2_t rx_vec_f128; + +#define rx_aligned_alloc(size, align) aligned_alloc(align, size) +#define rx_aligned_free(a) free(a) + +inline void rx_prefetch_nta(void* ptr) { + asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr)); +} + +FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) { + return vld1q_f64((const float64_t*)pd); +} + +FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) { + vst1q_f64((float64_t*)mem_addr, val); +} + +FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) { + float64x2_t temp; + temp = vcopyq_laneq_f64(temp, 1, a, 1); + a = vcopyq_laneq_f64(a, 1, a, 0); + return vcopyq_laneq_f64(a, 0, temp, 1); +} + +FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) { + uint64x2_t temp0 = vdupq_n_u64(x0); + uint64x2_t temp1 = vdupq_n_u64(x1); + return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0)); +} + +FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { + return vreinterpretq_f64_u64(vdupq_n_u64(x)); +} + +#define rx_add_vec_f128 vaddq_f64 +#define rx_sub_vec_f128 vsubq_f64 +#define rx_mul_vec_f128 vmulq_f64 +#define rx_div_vec_f128 vdivq_f64 +#define rx_sqrt_vec_f128 vsqrtq_f64 + +FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); +} + +FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); +} + +FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { + return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b))); +} + +#ifdef __ARM_FEATURE_CRYPTO + + +FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) { + const uint8x16_t zero = { 0 }; + return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key; +} + +FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) { + const uint8x16_t zero = { 0 }; + return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key; +} + +#define HAVE_AES + +#endif + +#define rx_xor_vec_i128 veorq_u8 + +FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) { + return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0); +} + +FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) { + return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1); +} + +FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) { + return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2); +} + +FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) { + return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3); +} + +FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) { + int32_t data[4]; + data[0] = _I0; + data[1] = _I1; + data[2] = _I2; + data[3] = _I3; + return vreinterpretq_u8_s32(vld1q_s32(data)); +}; + +#define rx_xor_vec_i128 veorq_u8 + +FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) { + return vld1q_u8((const uint8_t*)mem_addr); +} + +FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) { + vst1q_u8((uint8_t*)mem_addr, val); +} + +FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { + double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); + double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); + rx_vec_f128 x; + x = vsetq_lane_f64(lo, x, 0); + x = vsetq_lane_f64(hi, x, 1); + return x; +} + +#define RANDOMX_DEFAULT_FENV + +#else //portable fallback #include #include @@ -487,7 +607,6 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) { return v; } - FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) { rx_vec_f128 x; x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0]; @@ -578,10 +697,6 @@ FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { #define RANDOMX_DEFAULT_FENV -void rx_reset_float_state(); - -void rx_set_rounding_mode(uint32_t mode); - #endif #ifndef HAVE_AES @@ -598,6 +713,14 @@ FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) { } #endif +#ifdef RANDOMX_DEFAULT_FENV + +void rx_reset_float_state(); + +void rx_set_rounding_mode(uint32_t mode); + +#endif + double loadDoublePortable(const void* addr); uint64_t mulh(uint64_t, uint64_t); int64_t smulh(int64_t, int64_t); diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index dd579c69..043874a8 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -197,7 +197,7 @@ namespace randomx { // static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 }; size_t JitCompilerX86::getCodeSize() { - return codePos - prologueSize; + return codePos < prologueSize ? 0 : codePos - prologueSize; } JitCompilerX86::JitCompilerX86() { @@ -580,7 +580,7 @@ namespace randomx { void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { uint64_t divisor = instr.getImm32(); - if (!isPowerOf2(divisor)) { + if (!isZeroOrPowerOf2(divisor)) { registerUsage[instr.dst] = i; emit(MOV_RAX_I); emit64(randomx_reciprocal_fast(divisor)); diff --git a/src/crypto/randomx/randomx.cpp b/src/crypto/randomx/randomx.cpp index ffd9e2c5..a5f6bc08 100644 --- a/src/crypto/randomx/randomx.cpp +++ b/src/crypto/randomx/randomx.cpp @@ -44,12 +44,14 @@ RandomX_ConfigurationWownero::RandomX_ConfigurationWownero() ScratchpadL2_Size = 131072; ScratchpadL3_Size = 1048576; + RANDOMX_FREQ_IADD_RS = 25; RANDOMX_FREQ_IROR_R = 10; RANDOMX_FREQ_IROL_R = 0; RANDOMX_FREQ_FSWAP_R = 8; RANDOMX_FREQ_FADD_R = 20; RANDOMX_FREQ_FSUB_R = 20; RANDOMX_FREQ_FMUL_R = 20; + RANDOMX_FREQ_CBRANCH = 16; fillAes4Rx4_Key[0] = rx_set_int_vec_i128(0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d); fillAes4Rx4_Key[1] = rx_set_int_vec_i128(0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450); @@ -68,6 +70,9 @@ RandomX_ConfigurationLoki::RandomX_ConfigurationLoki() ArgonSalt = "RandomXL\x12"; ProgramSize = 320; ProgramCount = 7; + + RANDOMX_FREQ_IADD_RS = 25; + RANDOMX_FREQ_CBRANCH = 16; } RandomX_ConfigurationBase::RandomX_ConfigurationBase() @@ -87,7 +92,7 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() , ProgramCount(8) , JumpBits(8) , JumpOffset(8) - , RANDOMX_FREQ_IADD_RS(25) + , RANDOMX_FREQ_IADD_RS(16) , RANDOMX_FREQ_IADD_M(7) , RANDOMX_FREQ_ISUB_R(16) , RANDOMX_FREQ_ISUB_M(7) @@ -113,7 +118,7 @@ RandomX_ConfigurationBase::RandomX_ConfigurationBase() , RANDOMX_FREQ_FMUL_R(32) , RANDOMX_FREQ_FDIV_M(4) , RANDOMX_FREQ_FSQRT_R(6) - , RANDOMX_FREQ_CBRANCH(16) + , RANDOMX_FREQ_CBRANCH(25) , RANDOMX_FREQ_CFROUND(1) , RANDOMX_FREQ_ISTORE(16) , RANDOMX_FREQ_NOP(0) diff --git a/src/crypto/randomx/superscalar.cpp b/src/crypto/randomx/superscalar.cpp index 4ed993f4..aaa91f62 100644 --- a/src/crypto/randomx/superscalar.cpp +++ b/src/crypto/randomx/superscalar.cpp @@ -329,7 +329,7 @@ namespace randomx { return false; if (availableRegisters.size() > 1) { - index = gen.getInt32() % availableRegisters.size(); + index = gen.getUInt32() % availableRegisters.size(); } else { index = 0; @@ -442,7 +442,7 @@ namespace randomx { case SuperscalarInstructionType::IADD_C8: case SuperscalarInstructionType::IADD_C9: { mod_ = 0; - imm32_ = gen.getInt32(); + imm32_ = gen.getUInt32(); opGroup_ = SuperscalarInstructionType::IADD_C7; opGroupPar_ = -1; } break; @@ -451,7 +451,7 @@ namespace randomx { case SuperscalarInstructionType::IXOR_C8: case SuperscalarInstructionType::IXOR_C9: { mod_ = 0; - imm32_ = gen.getInt32(); + imm32_ = gen.getUInt32(); opGroup_ = SuperscalarInstructionType::IXOR_C7; opGroupPar_ = -1; } break; @@ -461,7 +461,7 @@ namespace randomx { mod_ = 0; imm32_ = 0; opGroup_ = SuperscalarInstructionType::IMULH_R; - opGroupPar_ = gen.getInt32(); + opGroupPar_ = gen.getUInt32(); } break; case SuperscalarInstructionType::ISMULH_R: { @@ -469,14 +469,14 @@ namespace randomx { mod_ = 0; imm32_ = 0; opGroup_ = SuperscalarInstructionType::ISMULH_R; - opGroupPar_ = gen.getInt32(); + opGroupPar_ = gen.getUInt32(); } break; case SuperscalarInstructionType::IMUL_RCP: { mod_ = 0; do { - imm32_ = gen.getInt32(); - } while ((imm32_ & (imm32_ - 1)) == 0); + imm32_ = gen.getUInt32(); + } while (isZeroOrPowerOf2(imm32_)); opGroup_ = SuperscalarInstructionType::IMUL_RCP; opGroupPar_ = -1; } break;