performance improvement to memory hard loop
This commit is contained in:
parent
4abcf937b2
commit
80796c4b48
2 changed files with 90 additions and 39 deletions
|
@ -37,6 +37,7 @@
|
||||||
#include "crypto/CryptoNight.h"
|
#include "crypto/CryptoNight.h"
|
||||||
#include "crypto/CryptoNight_constants.h"
|
#include "crypto/CryptoNight_constants.h"
|
||||||
#include "crypto/CryptoNight_monero.h"
|
#include "crypto/CryptoNight_monero.h"
|
||||||
|
#include "crypto/CryptoNight_x86_loop.h"
|
||||||
#include "crypto/soft_aes.h"
|
#include "crypto/soft_aes.h"
|
||||||
|
|
||||||
|
|
||||||
|
@ -122,7 +123,6 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||||
static inline __m128i sl_xor(__m128i tmp1)
|
static inline __m128i sl_xor(__m128i tmp1)
|
||||||
|
@ -266,7 +266,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__m128i *outputTmpLimit = output + (MEM / sizeof(__m128i));
|
const __m128i *outputTmpLimit = output + (MEM / sizeof(__m128i));
|
||||||
|
|
||||||
for (__m128i *outputTmp = output; outputTmp < outputTmpLimit; outputTmp += 8) {
|
for (__m128i *outputTmp = output; outputTmp < outputTmpLimit; outputTmp += 8) {
|
||||||
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||||
|
@ -309,7 +309,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
xout6 = _mm_load_si128(output + 10);
|
xout6 = _mm_load_si128(output + 10);
|
||||||
xout7 = _mm_load_si128(output + 11);
|
xout7 = _mm_load_si128(output + 11);
|
||||||
|
|
||||||
__m128i *inputTmpLimit = (__m128i*) input + MEM / sizeof(__m128i);
|
const __m128i *inputTmpLimit = (__m128i*) input + MEM / sizeof(__m128i);
|
||||||
|
|
||||||
for (__m128i *inputTmp = (__m128i*) input; inputTmp < inputTmpLimit; inputTmp += 8)
|
for (__m128i *inputTmp = (__m128i*) input; inputTmp < inputTmpLimit; inputTmp += 8)
|
||||||
{
|
{
|
||||||
|
@ -339,8 +339,6 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
||||||
__m128i *inputTmpLimit = (__m128i*) input + MEM / sizeof(__m128i);
|
|
||||||
|
|
||||||
for (__m128i *inputTmp = (__m128i*) input; inputTmp < inputTmpLimit; inputTmp += 8)
|
for (__m128i *inputTmp = (__m128i*) input; inputTmp < inputTmpLimit; inputTmp += 8)
|
||||||
{
|
{
|
||||||
xout0 = _mm_xor_si128(_mm_load_si128(inputTmp), xout0);
|
xout0 = _mm_xor_si128(_mm_load_si128(inputTmp), xout0);
|
||||||
|
@ -419,46 +417,37 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
|
|
||||||
uint64_t idx0 = h0[0] ^ h0[4];
|
uint64_t idx0 = h0[0] ^ h0[4];
|
||||||
|
void* memoryPointer = ((uint8_t*) l0) + ((idx0) & MASK);
|
||||||
|
|
||||||
|
if(SOFT_AES && ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
||||||
|
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||||
|
__m128i cx;
|
||||||
|
SINGLEHASH_LOOP_SOFTAES
|
||||||
|
SINGLEHASH_LOOP_COMMON
|
||||||
|
SINGLEHASH_LOOP_CNHEAVY
|
||||||
|
}
|
||||||
|
} else if(!SOFT_AES && ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
||||||
|
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||||
|
__m128i cx;
|
||||||
|
SINGLEHASH_LOOP_HARDAES
|
||||||
|
SINGLEHASH_LOOP_COMMON
|
||||||
|
SINGLEHASH_LOOP_CNHEAVY
|
||||||
|
}
|
||||||
|
} else {
|
||||||
for (size_t i = 0; i < ITERATIONS; i++) {
|
for (size_t i = 0; i < ITERATIONS; i++) {
|
||||||
__m128i cx;
|
__m128i cx;
|
||||||
|
|
||||||
if (SOFT_AES) {
|
if (SOFT_AES) {
|
||||||
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
|
SINGLEHASH_LOOP_SOFTAES
|
||||||
|
} else {
|
||||||
|
SINGLEHASH_LOOP_HARDAES
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
|
|
||||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
|
||||||
}
|
|
||||||
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
|
|
||||||
VARIANT1_1(&l0[idx0 & MASK]);
|
|
||||||
idx0 = EXTRACT64(cx);
|
|
||||||
bx0 = cx;
|
|
||||||
|
|
||||||
uint64_t hi, lo, cl, ch;
|
SINGLEHASH_LOOP_COMMON
|
||||||
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
|
|
||||||
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
|
|
||||||
lo = __umul128(idx0, cl, &hi);
|
|
||||||
|
|
||||||
al0 += hi;
|
|
||||||
ah0 += lo;
|
|
||||||
|
|
||||||
VARIANT1_2(ah0, 0);
|
|
||||||
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
|
|
||||||
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
|
|
||||||
VARIANT1_2(ah0, 0);
|
|
||||||
|
|
||||||
ah0 ^= ch;
|
|
||||||
al0 ^= cl;
|
|
||||||
idx0 = al0;
|
|
||||||
|
|
||||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
||||||
int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
|
SINGLEHASH_LOOP_CNHEAVY
|
||||||
int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
|
}
|
||||||
int64_t q = n / (d | 0x5);
|
|
||||||
|
|
||||||
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
|
|
||||||
idx0 = d ^ q;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
62
src/crypto/CryptoNight_x86_loop.h
Normal file
62
src/crypto/CryptoNight_x86_loop.h
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
/* XMRig
|
||||||
|
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||||
|
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||||
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
|
* Copyright 2018 aegroto
|
||||||
|
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __CRYPTONIGHT_X86_LOOP_H__
|
||||||
|
#define __CRYPTONIGHT_X86_LOOP_H__
|
||||||
|
|
||||||
|
#define SINGLEHASH_LOOP_COMMON \
|
||||||
|
_mm_store_si128((__m128i *) memoryPointer, _mm_xor_si128(bx0, cx)); \
|
||||||
|
VARIANT1_1(memoryPointer); \
|
||||||
|
idx0 = EXTRACT64(cx); \
|
||||||
|
memoryPointer = ((uint8_t*) l0) + ((idx0) & MASK); \
|
||||||
|
bx0 = cx; \
|
||||||
|
uint64_t hi, lo, cl, ch; \
|
||||||
|
cl = ((uint64_t*) memoryPointer)[0]; \
|
||||||
|
ch = ((uint64_t*) memoryPointer)[1]; \
|
||||||
|
lo = __umul128(idx0, cl, &hi); \
|
||||||
|
al0 += hi; \
|
||||||
|
ah0 += lo; \
|
||||||
|
VARIANT1_2(ah0, 0); \
|
||||||
|
((uint64_t*) memoryPointer)[0] = al0; \
|
||||||
|
((uint64_t*) memoryPointer)[1] = ah0; \
|
||||||
|
VARIANT1_2(ah0, 0); \
|
||||||
|
ah0 ^= ch; \
|
||||||
|
al0 ^= cl; \
|
||||||
|
memoryPointer = ((uint8_t*) l0) + ((al0) & MASK);
|
||||||
|
|
||||||
|
#define SINGLEHASH_LOOP_CNHEAVY \
|
||||||
|
int64_t n = ((int64_t*)memoryPointer)[0]; \
|
||||||
|
int32_t d = ((int32_t*)memoryPointer)[2]; \
|
||||||
|
int64_t q = n / (d | 0x5); \
|
||||||
|
((int64_t*) memoryPointer)[0] = n ^ q;
|
||||||
|
|
||||||
|
#define SINGLEHASH_LOOP_SOFTAES \
|
||||||
|
cx = soft_aesenc((uint32_t*) memoryPointer, _mm_set_epi64x(ah0, al0));
|
||||||
|
|
||||||
|
#define SINGLEHASH_LOOP_HARDAES \
|
||||||
|
cx = _mm_load_si128((__m128i *) memoryPointer); \
|
||||||
|
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||||
|
|
||||||
|
#endif /* __CRYPTONIGHT_X86_LOOP_H__ */
|
Loading…
Add table
Add a link
Reference in a new issue