diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 0329d634..43b815e7 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -37,6 +37,7 @@ #include "crypto/CryptoNight.h" #include "crypto/CryptoNight_constants.h" #include "crypto/CryptoNight_monero.h" +#include "crypto/CryptoNight_x86_loop.h" #include "crypto/soft_aes.h" @@ -122,7 +123,6 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin } #endif - // This will shift and xor tmp1 into itself as 4 32-bit vals such as // sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) static inline __m128i sl_xor(__m128i tmp1) @@ -266,7 +266,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) } } - __m128i *outputTmpLimit = output + (MEM / sizeof(__m128i)); + const __m128i *outputTmpLimit = output + (MEM / sizeof(__m128i)); for (__m128i *outputTmp = output; outputTmp < outputTmpLimit; outputTmp += 8) { aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); @@ -309,7 +309,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output) xout6 = _mm_load_si128(output + 10); xout7 = _mm_load_si128(output + 11); - __m128i *inputTmpLimit = (__m128i*) input + MEM / sizeof(__m128i); + const __m128i *inputTmpLimit = (__m128i*) input + MEM / sizeof(__m128i); for (__m128i *inputTmp = (__m128i*) input; inputTmp < inputTmpLimit; inputTmp += 8) { @@ -339,8 +339,6 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output) } if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - __m128i *inputTmpLimit = (__m128i*) input + MEM / sizeof(__m128i); - for (__m128i *inputTmp = (__m128i*) input; inputTmp < inputTmpLimit; inputTmp += 8) { xout0 = _mm_xor_si128(_mm_load_si128(inputTmp), xout0); @@ -419,46 +417,37 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); uint64_t idx0 = h0[0] ^ h0[4]; + void* memoryPointer = ((uint8_t*) l0) + ((idx0) & MASK); - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx; - - if (SOFT_AES) { - cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + if(SOFT_AES && ALGO == xmrig::CRYPTONIGHT_HEAVY) { + for (size_t i = 0; i < ITERATIONS; i++) { + __m128i cx; + SINGLEHASH_LOOP_SOFTAES + SINGLEHASH_LOOP_COMMON + SINGLEHASH_LOOP_CNHEAVY } - else { - cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); + } else if(!SOFT_AES && ALGO == xmrig::CRYPTONIGHT_HEAVY) { + for (size_t i = 0; i < ITERATIONS; i++) { + __m128i cx; + SINGLEHASH_LOOP_HARDAES + SINGLEHASH_LOOP_COMMON + SINGLEHASH_LOOP_CNHEAVY } - _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); - VARIANT1_1(&l0[idx0 & MASK]); - idx0 = EXTRACT64(cx); - bx0 = cx; + } else { + for (size_t i = 0; i < ITERATIONS; i++) { + __m128i cx; - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - lo = __umul128(idx0, cl, &hi); + if (SOFT_AES) { + SINGLEHASH_LOOP_SOFTAES + } else { + SINGLEHASH_LOOP_HARDAES + } - al0 += hi; - ah0 += lo; + SINGLEHASH_LOOP_COMMON - VARIANT1_2(ah0, 0); - ((uint64_t*)&l0[idx0 & MASK])[0] = al0; - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; - VARIANT1_2(ah0, 0); - - ah0 ^= ch; - al0 ^= cl; - idx0 = al0; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; - int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; - int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - idx0 = d ^ q; + if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { + SINGLEHASH_LOOP_CNHEAVY + } } } diff --git a/src/crypto/CryptoNight_x86_loop.h b/src/crypto/CryptoNight_x86_loop.h new file mode 100644 index 00000000..fc44a145 --- /dev/null +++ b/src/crypto/CryptoNight_x86_loop.h @@ -0,0 +1,62 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018 Lee Clagett + * Copyright 2018 aegroto + * Copyright 2016-2018 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __CRYPTONIGHT_X86_LOOP_H__ +#define __CRYPTONIGHT_X86_LOOP_H__ + +#define SINGLEHASH_LOOP_COMMON \ + _mm_store_si128((__m128i *) memoryPointer, _mm_xor_si128(bx0, cx)); \ + VARIANT1_1(memoryPointer); \ + idx0 = EXTRACT64(cx); \ + memoryPointer = ((uint8_t*) l0) + ((idx0) & MASK); \ + bx0 = cx; \ + uint64_t hi, lo, cl, ch; \ + cl = ((uint64_t*) memoryPointer)[0]; \ + ch = ((uint64_t*) memoryPointer)[1]; \ + lo = __umul128(idx0, cl, &hi); \ + al0 += hi; \ + ah0 += lo; \ + VARIANT1_2(ah0, 0); \ + ((uint64_t*) memoryPointer)[0] = al0; \ + ((uint64_t*) memoryPointer)[1] = ah0; \ + VARIANT1_2(ah0, 0); \ + ah0 ^= ch; \ + al0 ^= cl; \ + memoryPointer = ((uint8_t*) l0) + ((al0) & MASK); + +#define SINGLEHASH_LOOP_CNHEAVY \ + int64_t n = ((int64_t*)memoryPointer)[0]; \ + int32_t d = ((int32_t*)memoryPointer)[2]; \ + int64_t q = n / (d | 0x5); \ + ((int64_t*) memoryPointer)[0] = n ^ q; + +#define SINGLEHASH_LOOP_SOFTAES \ + cx = soft_aesenc((uint32_t*) memoryPointer, _mm_set_epi64x(ah0, al0)); + +#define SINGLEHASH_LOOP_HARDAES \ + cx = _mm_load_si128((__m128i *) memoryPointer); \ + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); + +#endif /* __CRYPTONIGHT_X86_LOOP_H__ */ \ No newline at end of file