Optimized cn-heavy for Zen3
- Uses scratchpad interleaving to access only the closest L3 slice from each CPU core. - Also activates MSR mod for cn-heavy because CPU prefetchers get confused with interleaving - 7-8% speedup on Zen3
This commit is contained in:
parent
b1e14dc1d3
commit
8af8df25aa
8 changed files with 187 additions and 81 deletions
|
@ -306,7 +306,21 @@ inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3
|
|||
namespace xmrig {
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
template<int interleave>
|
||||
static inline constexpr uint64_t interleaved_index(uint64_t k)
|
||||
{
|
||||
return ((k & ~63ULL) << interleave) | (k & 63);
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
inline constexpr uint64_t interleaved_index<0>(uint64_t k)
|
||||
{
|
||||
return k;
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
@ -354,19 +368,21 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|||
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + i + 0, xin0);
|
||||
_mm_store_si128(output + i + 1, xin1);
|
||||
_mm_store_si128(output + i + 2, xin2);
|
||||
_mm_store_si128(output + i + 3, xin3);
|
||||
_mm_store_si128(output + i + 4, xin4);
|
||||
_mm_store_si128(output + i + 5, xin5);
|
||||
_mm_store_si128(output + i + 6, xin6);
|
||||
_mm_store_si128(output + i + 7, xin7);
|
||||
_mm_store_si128(output + 0, xin0);
|
||||
_mm_store_si128(output + 1, xin1);
|
||||
_mm_store_si128(output + 2, xin2);
|
||||
_mm_store_si128(output + 3, xin3);
|
||||
output += (64 << interleave) / sizeof(__m128i);
|
||||
_mm_store_si128(output + 0, xin4);
|
||||
_mm_store_si128(output + 1, xin5);
|
||||
_mm_store_si128(output + 2, xin6);
|
||||
_mm_store_si128(output + 3, xin7);
|
||||
output += (64 << interleave) / sizeof(__m128i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
@ -387,15 +403,18 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
xout6 = _mm_load_si128(output + 10);
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
const __m128i* input_begin = input;
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
||||
input += (64 << interleave) / sizeof(__m128i);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
|
||||
input += (64 << interleave) / sizeof(__m128i);
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
@ -414,15 +433,18 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
}
|
||||
|
||||
if (IS_HEAVY) {
|
||||
input = input_begin;
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
||||
input += (64 << interleave) / sizeof(__m128i);
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
|
||||
input += (64 << interleave) / sizeof(__m128i);
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
@ -558,7 +580,7 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
|
|||
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
|
||||
}
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
@ -577,7 +599,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
|
||||
|
||||
uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint8_t *l0 = ctx[0]->memory;
|
||||
|
@ -620,7 +642,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
for (size_t i = 0; i < props.iterations(); i++) {
|
||||
__m128i cx;
|
||||
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
}
|
||||
|
@ -632,12 +654,12 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
else if (SOFT_AES) {
|
||||
if (ALGO == Algorithm::CN_CCX) {
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
|
||||
cryptonight_conceal_tweak(cx, conc_var);
|
||||
cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
else {
|
||||
cx = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx = soft_aesenc(&l0[interleaved_index<interleave>(idx0 & MASK)], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -645,16 +667,16 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
|
||||
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
||||
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
|
||||
} else {
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[idx0 & MASK]), _mm_xor_si128(bx0, cx));
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]), _mm_xor_si128(bx0, cx));
|
||||
}
|
||||
|
||||
idx0 = static_cast<uint64_t>(_mm_cvtsi128_si64(cx));
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[0];
|
||||
ch = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[1];
|
||||
cl = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[0];
|
||||
ch = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[1];
|
||||
|
||||
if (BASE == Algorithm::CN_2) {
|
||||
if (props.isR()) {
|
||||
|
@ -681,14 +703,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[0] = al0;
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = al0;
|
||||
|
||||
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
|
||||
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0 ^ al0;
|
||||
} else if (BASE == Algorithm::CN_1) {
|
||||
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0;
|
||||
} else {
|
||||
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0;
|
||||
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0;
|
||||
}
|
||||
|
||||
al0 ^= cl;
|
||||
|
@ -697,11 +719,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
if (props.isHeavy()) {
|
||||
int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
|
||||
int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
|
||||
int64_t n = ((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0];
|
||||
int32_t d = ((int32_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[2];
|
||||
int64_t q = n / (d | 0x5);
|
||||
|
||||
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
|
||||
((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = n ^ q;
|
||||
|
||||
if (ALGO == Algorithm::CN_HEAVY_XHV) {
|
||||
d = ~d;
|
||||
|
@ -722,7 +744,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
# endif
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
@ -810,7 +832,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
|
@ -887,7 +909,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
@ -909,8 +931,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
keccak(input, size, ctx[0]->state);
|
||||
keccak(input + size, size, ctx[1]->state);
|
||||
|
||||
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
cnv2_double_mainloop_sandybridge_asm(ctx);
|
||||
|
@ -939,8 +961,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
|||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
||||
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
|
@ -991,8 +1013,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
VARIANT4_RANDOM_MATH_INIT(1);
|
||||
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
|
@ -1187,8 +1209,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
@ -1333,7 +1355,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1378,7 +1400,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
@ -1407,7 +1429,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1460,7 +1482,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
@ -1489,7 +1511,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|||
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
|
@ -1550,7 +1572,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue