diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index 374eca45..a4c7fe1e 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -59,7 +59,15 @@ static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVer } else if (powVersion == PowVariant::POW_ALLOY) { CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad); } else if (powVersion == PowVariant::POW_XTL) { - CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); +#if defined(XMRIG_ARM) + CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); +#else + if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { + CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + } else { + CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); + } +#endif } else if (powVersion == PowVariant::POW_MSR) { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } else if (powVersion == PowVariant::POW_RTO) { @@ -95,7 +103,15 @@ static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powV } else if (powVersion == PowVariant::POW_ALLOY) { CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad); } else if (powVersion == PowVariant::POW_XTL) { +#if defined(XMRIG_ARM) CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); +#else + if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) { + CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization); + } else { + CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); + } +#endif } else if (powVersion == PowVariant::POW_MSR) { CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad); } else if (powVersion == PowVariant::POW_RTO) { @@ -201,8 +217,11 @@ bool CryptoNight::init(int algo, bool aesni) { for (int i = 0; i < 256; ++i) { - const uint64_t index = (((i >> 3) & 6) | (i & 1)) << 1; + const uint64_t index = (((i >> POW_DEFAULT_INDEX_SHIFT) & 6) | (i & 1)) << 1; + const uint64_t index_xtl = (((i >> POW_XLT_V4_INDEX_SHIFT) & 6) | (i & 1)) << 1; + variant1_table[i] = i ^ ((0x75310 >> index) & 0x30); + variant_xtl_table[i] = i ^ ((0x75310 >> index_xtl) & 0x30); } setCryptoNightHashMethods(static_cast(algo), aesni); diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h index 1275d669..259e365f 100644 --- a/src/crypto/CryptoNight.h +++ b/src/crypto/CryptoNight.h @@ -45,11 +45,12 @@ struct ScratchPad { // Additional stuff for asm impl uint8_t ctx_info[24]; const void* input; - uint8_t* variant1_table; + uint8_t* variant_table; const uint32_t* t_fn; }; alignas(64) static uint8_t variant1_table[256]; +alignas(64) static uint8_t variant_xtl_table[256]; class Job; class JobResult; diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 1b769904..22d35c8b 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -1409,14 +1409,18 @@ public: cn_explode_scratchpad((__m128i*) h, (__m128i*) l); #ifndef XMRIG_NO_ASM + if (INDEX_SHIFT == POW_DEFAULT_INDEX_SHIFT) { + scratchPad[0]->variant_table = variant1_table; + } else { + scratchPad[0]->variant_table = variant_xtl_table; + } + + scratchPad[0]->input = input; + if (SOFT_AES) { - scratchPad[0]->input = input; - scratchPad[0]->variant1_table = variant1_table; scratchPad[0]->t_fn = (const uint32_t*)saes_table; cnv1_mainloop_soft_aes_sandybridge_asm(scratchPad[0]); } else { - scratchPad[0]->input = input; - scratchPad[0]->variant1_table = variant1_table; cnv1_mainloop_sandybridge_asm(scratchPad[0]); } #endif @@ -2071,8 +2075,15 @@ public: uint64_t idx0 = h0[0] ^h0[4]; uint64_t idx1 = h1[0] ^h1[4]; +#if defined(__x86_64__) || defined(_M_AMD64) __m128i division_result_xmm = _mm_unpacklo_epi64(_mm_cvtsi64_si128(h0[12]), _mm_cvtsi64_si128(h1[12])); __m128i sqrt_result_xmm = _mm_unpacklo_epi64(_mm_cvtsi64_si128(h0[13]), _mm_cvtsi64_si128(h1[13])); +#else + __m128i division_result_xmm0 = _mm_cvtsi64_si128(h0[12]); + __m128i division_result_xmm1 = _mm_cvtsi64_si128(h1[12]); + uint64_t sqrt_result0 = h0[13]; + uint64_t sqrt_result1 = h1[13]; +#endif SET_ROUNDING_MODE_UP() @@ -2107,8 +2118,12 @@ public: cl = ((uint64_t*) &l0[idx0 & MASK])[0]; ch = ((uint64_t*) &l0[idx0 & MASK])[1]; +#if defined(__x86_64__) || defined(_M_AMD64) const uint64_t sqrt_result0 = _mm_cvtsi128_si64(sqrt_result_xmm); cl ^= static_cast(_mm_cvtsi128_si64(division_result_xmm)) ^ (sqrt_result0 << 32); +#else + INTEGER_MATH_V2(0, cl, cx0) +#endif lo = __umul128(idx0, cl, &hi); @@ -2127,9 +2142,11 @@ public: bx10 = bx00; bx00 = cx0; + cl = ((uint64_t*) &l1[idx1 & MASK])[0]; ch = ((uint64_t*) &l1[idx1 & MASK])[1]; +#if defined(__x86_64__) || defined(_M_AMD64) const uint64_t sqrt_result1 = _mm_cvtsi128_si64(_mm_srli_si128(sqrt_result_xmm, 8)); cl ^= static_cast(_mm_cvtsi128_si64(_mm_srli_si128(division_result_xmm, 8))) ^ (sqrt_result1 << 32); @@ -2173,6 +2190,9 @@ public: r1 = static_cast(_mm_cvtsi128_si64(_mm_srli_si128(_mm_castpd_si128(x), 8))); int_sqrt_v2_fixup(r1, _mm_cvtsi128_si64(_mm_srli_si128(sqrt_input, 8))); sqrt_result_xmm = _mm_set_epi64x(r1, r0); +#else + INTEGER_MATH_V2(1, cl, cx1) +#endif lo = __umul128(idx1, cl, &hi);