merge conflicts solved
This commit is contained in:
commit
bc6a6c8028
84 changed files with 1750 additions and 1087 deletions
|
@ -30,18 +30,8 @@
|
|||
#include <stdint.h>
|
||||
|
||||
|
||||
#define AEON_MEMORY 1048576
|
||||
#define AEON_MASK 0xFFFF0
|
||||
#define AEON_ITER 0x40000
|
||||
|
||||
#define MONERO_MEMORY 2097152
|
||||
#define MONERO_MASK 0x1FFFF0
|
||||
#define MONERO_ITER 0x80000
|
||||
|
||||
|
||||
struct cryptonight_ctx {
|
||||
alignas(16) uint8_t state0[200];
|
||||
alignas(16) uint8_t state1[200];
|
||||
alignas(16) uint8_t state[200];
|
||||
alignas(16) uint8_t* memory;
|
||||
};
|
||||
|
||||
|
|
|
@ -28,13 +28,7 @@
|
|||
#define __CRYPTONIGHT_ARM_H__
|
||||
|
||||
|
||||
#if defined(XMRIG_ARM) && !defined(__clang__)
|
||||
# include "aligned_malloc.h"
|
||||
#else
|
||||
# include <mm_malloc.h>
|
||||
#endif
|
||||
|
||||
|
||||
#include "common/utils/mm_malloc.h"
|
||||
#include "crypto/CryptoNight.h"
|
||||
#include "crypto/CryptoNight_constants.h"
|
||||
#include "crypto/CryptoNight_monero.h"
|
||||
|
@ -80,6 +74,19 @@ static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(const uint64
|
|||
}
|
||||
|
||||
|
||||
#ifdef XMRIG_ARMv8
|
||||
static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
|
||||
{
|
||||
alignas(16) const __m128i zero = { 0 };
|
||||
return veorq_u8(vaesmcq_u8(vaeseq_u8(v, zero)), rkey );
|
||||
}
|
||||
#else
|
||||
static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* this one was not implemented yet so here it is */
|
||||
static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i a)
|
||||
{
|
||||
|
@ -162,19 +169,19 @@ static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, _
|
|||
*k0 = xout0;
|
||||
*k1 = xout2;
|
||||
|
||||
SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : soft_aes_genkey_sub<0x01>(&xout0, &xout2);
|
||||
soft_aes_genkey_sub<0x01>(&xout0, &xout2);
|
||||
*k2 = xout0;
|
||||
*k3 = xout2;
|
||||
|
||||
SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : soft_aes_genkey_sub<0x02>(&xout0, &xout2);
|
||||
soft_aes_genkey_sub<0x02>(&xout0, &xout2);
|
||||
*k4 = xout0;
|
||||
*k5 = xout2;
|
||||
|
||||
SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : soft_aes_genkey_sub<0x04>(&xout0, &xout2);
|
||||
soft_aes_genkey_sub<0x04>(&xout0, &xout2);
|
||||
*k6 = xout0;
|
||||
*k7 = xout2;
|
||||
|
||||
SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : soft_aes_genkey_sub<0x08>(&xout0, &xout2);
|
||||
soft_aes_genkey_sub<0x08>(&xout0, &xout2);
|
||||
*k8 = xout0;
|
||||
*k9 = xout2;
|
||||
}
|
||||
|
@ -193,18 +200,16 @@ static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2,
|
|||
*x6 = soft_aesenc((uint32_t*)x6, key);
|
||||
*x7 = soft_aesenc((uint32_t*)x7, key);
|
||||
}
|
||||
# ifndef XMRIG_ARMv7
|
||||
else {
|
||||
*x0 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x0), key));
|
||||
*x1 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x1), key));
|
||||
*x2 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x2), key));
|
||||
*x3 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x3), key));
|
||||
*x4 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x4), key));
|
||||
*x5 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x5), key));
|
||||
*x6 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x6), key));
|
||||
*x7 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x7), key));
|
||||
*x0 = _mm_aesenc_si128(*x0, key);
|
||||
*x1 = _mm_aesenc_si128(*x1, key);
|
||||
*x2 = _mm_aesenc_si128(*x2, key);
|
||||
*x3 = _mm_aesenc_si128(*x3, key);
|
||||
*x4 = _mm_aesenc_si128(*x4, key);
|
||||
*x5 = _mm_aesenc_si128(*x5, key);
|
||||
*x6 = _mm_aesenc_si128(*x6, key);
|
||||
*x7 = _mm_aesenc_si128(*x7, key);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -241,10 +246,6 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|||
|
||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
||||
for (size_t i = 0; i < 16; i++) {
|
||||
if (!SOFT_AES) {
|
||||
aes_round<SOFT_AES>(_mm_setzero_si128(), &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
@ -254,20 +255,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|||
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
if (!SOFT_AES) {
|
||||
xin0 ^= k9;
|
||||
xin1 ^= k9;
|
||||
xin2 ^= k9;
|
||||
xin3 ^= k9;
|
||||
xin4 ^= k9;
|
||||
xin5 ^= k9;
|
||||
xin6 ^= k9;
|
||||
xin7 ^= k9;
|
||||
}
|
||||
else {
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
}
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
}
|
||||
|
@ -289,20 +277,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
|||
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
if (!SOFT_AES) {
|
||||
xin0 ^= k9;
|
||||
xin1 ^= k9;
|
||||
xin2 ^= k9;
|
||||
xin3 ^= k9;
|
||||
xin4 ^= k9;
|
||||
xin5 ^= k9;
|
||||
xin6 ^= k9;
|
||||
xin7 ^= k9;
|
||||
}
|
||||
else {
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
}
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(outputTmp, xin0);
|
||||
_mm_store_si128(outputTmp + 1, xin1);
|
||||
|
@ -346,10 +321,6 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
xout6 = _mm_xor_si128(_mm_load_si128(inputTmp + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(inputTmp + 7), xout7);
|
||||
|
||||
if (!SOFT_AES) {
|
||||
aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
@ -359,20 +330,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
||||
if (!SOFT_AES) {
|
||||
xout0 ^= k9;
|
||||
xout1 ^= k9;
|
||||
xout2 ^= k9;
|
||||
xout3 ^= k9;
|
||||
xout4 ^= k9;
|
||||
xout5 ^= k9;
|
||||
xout6 ^= k9;
|
||||
xout7 ^= k9;
|
||||
}
|
||||
else {
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
|
||||
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
|
@ -390,10 +348,6 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
xout6 = _mm_xor_si128(_mm_load_si128(inputTmp + 6), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(inputTmp + 7), xout7);
|
||||
|
||||
if (!SOFT_AES) {
|
||||
aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
@ -403,29 +357,12 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
||||
if (!SOFT_AES) {
|
||||
xout0 ^= k9;
|
||||
xout1 ^= k9;
|
||||
xout2 ^= k9;
|
||||
xout3 ^= k9;
|
||||
xout4 ^= k9;
|
||||
xout5 ^= k9;
|
||||
xout6 ^= k9;
|
||||
xout7 ^= k9;
|
||||
}
|
||||
else {
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
||||
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 16; i++) {
|
||||
if (!SOFT_AES) {
|
||||
aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
@ -435,20 +372,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
||||
if (!SOFT_AES) {
|
||||
xout0 ^= k9;
|
||||
xout1 ^= k9;
|
||||
xout2 ^= k9;
|
||||
xout3 ^= k9;
|
||||
xout4 ^= k9;
|
||||
xout5 ^= k9;
|
||||
xout6 ^= k9;
|
||||
xout7 ^= k9;
|
||||
}
|
||||
else {
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
}
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
||||
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
}
|
||||
|
@ -465,8 +389,23 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
}
|
||||
|
||||
|
||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
||||
{
|
||||
mem_out[0] = EXTRACT64(tmp);
|
||||
|
||||
uint64_t vh = vgetq_lane_u64(tmp, 1);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
}
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||
|
@ -477,14 +416,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
return;
|
||||
}
|
||||
|
||||
keccak(input, (int) size, ctx->state0, 200);
|
||||
keccak(input, (int) size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0)
|
||||
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
|
@ -536,14 +475,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||
|
@ -554,16 +493,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
return;
|
||||
}
|
||||
|
||||
keccak(input, (int) size, ctx->state0, 200);
|
||||
keccak(input + size, (int) size, ctx->state1, 200);
|
||||
keccak(input, (int) size, ctx[0]->state, 200);
|
||||
keccak(input + size, (int) size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEM;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
|
||||
|
@ -589,16 +528,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
else {
|
||||
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
|
||||
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
|
||||
# ifndef XMRIG_ARMv7
|
||||
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
|
||||
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
|
||||
# endif
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
|
||||
VARIANT1_1(&l0[idx0 & MASK]);
|
||||
VARIANT1_1(&l1[idx1 & MASK]);
|
||||
if (VARIANT > 0) {
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
|
||||
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
|
||||
} else {
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
|
||||
};
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
@ -664,25 +604,25 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, output + 32);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include <stdint.h>
|
||||
|
||||
|
||||
#include "xmrig.h"
|
||||
#include "common/xmrig.h"
|
||||
|
||||
|
||||
namespace xmrig
|
||||
|
|
|
@ -32,14 +32,14 @@
|
|||
uint64_t tweak1_2_##part = 0; \
|
||||
if (VARIANT > 0) { \
|
||||
tweak1_2_##part = (*reinterpret_cast<const uint64_t*>(input + 35 + part * size) ^ \
|
||||
*(reinterpret_cast<const uint64_t*>(ctx->state##part) + 24)); \
|
||||
*(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24)); \
|
||||
}
|
||||
#else
|
||||
# define VARIANT1_INIT(part) \
|
||||
uint64_t tweak1_2_##part = 0; \
|
||||
if (VARIANT > 0) { \
|
||||
volatile const uint64_t a = *reinterpret_cast<const uint64_t*>(input + 35 + part * size); \
|
||||
volatile const uint64_t b = *(reinterpret_cast<const uint64_t*>(ctx->state##part) + 24); \
|
||||
volatile const uint64_t b = *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24); \
|
||||
tweak1_2_##part = a ^ b; \
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -26,62 +26,107 @@
|
|||
#define __CRYPTONIGHT_TEST_H__
|
||||
|
||||
|
||||
const static uint8_t test_input[152] = {
|
||||
const static uint8_t test_input[380] = {
|
||||
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
||||
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
||||
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
|
||||
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
|
||||
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
|
||||
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
|
||||
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
|
||||
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
|
||||
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
||||
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
||||
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01
|
||||
0x07, 0x07, 0xB4, 0x87, 0xD0, 0xD6, 0x05, 0x26, 0xE0, 0xC6, 0xDD, 0x9B, 0xC7, 0x18, 0xC3, 0xCF,
|
||||
0x52, 0x04, 0xBD, 0x4F, 0x9B, 0x27, 0xF6, 0x73, 0xB9, 0x3F, 0xEF, 0x7B, 0xB2, 0xF7, 0x2B, 0xBB,
|
||||
0x3F, 0x3E, 0x9C, 0x3E, 0x9D, 0x33, 0x1E, 0xDE, 0xAD, 0xBE, 0xEF, 0x4E, 0x00, 0x91, 0x81, 0x29,
|
||||
0x74, 0xB2, 0x70, 0xE7, 0x6D, 0xD2, 0x2A, 0x5F, 0x52, 0x04, 0x93, 0xE6, 0x18, 0x89, 0x40, 0xD8,
|
||||
0xC6, 0xE3, 0x90, 0x6E, 0xAA, 0x6A, 0xB7, 0xE2, 0x08, 0x7E, 0x78, 0x0E,
|
||||
0x01, 0x00, 0xEE, 0xB2, 0xD1, 0xD6, 0x05, 0xFF, 0x27, 0x7F, 0x26, 0xDB, 0xAA, 0xB2, 0xC9, 0x26,
|
||||
0x30, 0xC6, 0xCF, 0x11, 0x64, 0xEA, 0x6C, 0x8A, 0xE0, 0x98, 0x01, 0xF8, 0x75, 0x4B, 0x49, 0xAF,
|
||||
0x79, 0x70, 0xAE, 0xEE, 0xA7, 0x62, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x47, 0x8C, 0x63, 0xE7, 0xD8,
|
||||
0x40, 0x02, 0x3C, 0xDA, 0xEA, 0x92, 0x52, 0x53, 0xAC, 0xFD, 0xC7, 0x8A, 0x4C, 0x31, 0xB2, 0xF2,
|
||||
0xEC, 0x72, 0x7B, 0xFF, 0xCE, 0xC0, 0xE7, 0x12, 0xD4, 0xE9, 0x2A, 0x01,
|
||||
0x07, 0x07, 0xA9, 0xB7, 0xD1, 0xD6, 0x05, 0x3F, 0x0D, 0x5E, 0xFD, 0xC7, 0x03, 0xFC, 0xFC, 0xD2,
|
||||
0xCE, 0xBC, 0x44, 0xD8, 0xAB, 0x44, 0xA6, 0xA0, 0x3A, 0xE4, 0x4D, 0x8F, 0x15, 0xAF, 0x62, 0x17,
|
||||
0xD1, 0xE0, 0x92, 0x85, 0xE4, 0x73, 0xF9, 0x00, 0x00, 0x00, 0xA0, 0xFC, 0x09, 0xDE, 0xAB, 0xF5,
|
||||
0x8B, 0x6F, 0x1D, 0xCA, 0xA8, 0xBA, 0xAC, 0x74, 0xDD, 0x74, 0x19, 0xD5, 0xD6, 0x10, 0xEC, 0x38,
|
||||
0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04
|
||||
};
|
||||
|
||||
|
||||
const static uint8_t test_output_v0[64] = {
|
||||
const static uint8_t test_output_v0[160] = {
|
||||
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
||||
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
|
||||
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
|
||||
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
|
||||
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
||||
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00
|
||||
0xA1, 0xB4, 0xFA, 0xE3, 0xE5, 0x76, 0xCE, 0xCF, 0xB7, 0x9C, 0xAF, 0x3E, 0x29, 0x92, 0xE4, 0xE0,
|
||||
0x31, 0x24, 0x05, 0x48, 0xBF, 0x8D, 0x5F, 0x7B, 0x11, 0x03, 0x60, 0xAA, 0xD7, 0x50, 0x3F, 0x0C,
|
||||
0x2D, 0x30, 0xF3, 0x87, 0x4F, 0x86, 0xA1, 0x4A, 0xB5, 0xA2, 0x1A, 0x08, 0xD0, 0x44, 0x2C, 0x9D,
|
||||
0x16, 0xE9, 0x28, 0x49, 0xA1, 0xFF, 0x85, 0x6F, 0x12, 0xBB, 0x7D, 0xAB, 0x11, 0x1C, 0xE7, 0xF7,
|
||||
0x2D, 0x9D, 0x19, 0xE4, 0xD2, 0x26, 0x44, 0x1E, 0xCD, 0x22, 0x08, 0x24, 0xA8, 0x97, 0x46, 0x62,
|
||||
0x04, 0x84, 0x90, 0x4A, 0xEE, 0x99, 0x14, 0xED, 0xB8, 0xC6, 0x0D, 0x37, 0xA1, 0x66, 0x17, 0xB0
|
||||
};
|
||||
|
||||
|
||||
// Monero v7
|
||||
const static uint8_t test_output_v1[64] = {
|
||||
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
|
||||
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
|
||||
const static uint8_t test_output_v1[160] = {
|
||||
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
|
||||
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9
|
||||
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
|
||||
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
|
||||
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
|
||||
0xE7, 0x8C, 0x5A, 0x6E, 0x38, 0x30, 0x68, 0x4A, 0x73, 0xFC, 0x1B, 0xC6, 0x6D, 0xFC, 0x8D, 0x98,
|
||||
0xB4, 0xC2, 0x23, 0x39, 0xAD, 0xE0, 0x9D, 0xF6, 0x6D, 0x8C, 0x6A, 0xAA, 0xF9, 0xB2, 0xE3, 0x4C,
|
||||
0xB6, 0x90, 0x6C, 0xE6, 0x15, 0x5E, 0x46, 0x07, 0x9C, 0xB2, 0x6B, 0xAC, 0x3B, 0xAC, 0x1A, 0xDE,
|
||||
0x92, 0x2C, 0xD6, 0x0C, 0x46, 0x9D, 0x9B, 0xC2, 0x84, 0x52, 0x65, 0xF6, 0xBD, 0xFA, 0x0D, 0x74,
|
||||
0x00, 0x66, 0x10, 0x07, 0xF1, 0x19, 0x06, 0x3A, 0x6C, 0xFF, 0xEE, 0xB2, 0x40, 0xE5, 0x88, 0x2B,
|
||||
0x6C, 0xAB, 0x6B, 0x1D, 0x88, 0xB8, 0x44, 0x25, 0xF4, 0xEA, 0xB7, 0xEC, 0xBA, 0x12, 0x8A, 0x24
|
||||
};
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
const static uint8_t test_output_v0_lite[64] = {
|
||||
const static uint8_t test_output_v0_lite[160] = {
|
||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
|
||||
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
|
||||
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
|
||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88
|
||||
0x38, 0x08, 0xE1, 0x17, 0x0B, 0x99, 0x8D, 0x1A, 0x3C, 0xCE, 0x35, 0xC5, 0xC7, 0x3A, 0x00, 0x2E,
|
||||
0xCB, 0x54, 0xF0, 0x78, 0x2E, 0x9E, 0xDB, 0xC7, 0xDF, 0x2E, 0x71, 0x9A, 0x16, 0x97, 0xC4, 0x18,
|
||||
0x4B, 0x97, 0x07, 0xFE, 0x5D, 0x98, 0x9A, 0xD6, 0xD8, 0xE5, 0x92, 0x66, 0x87, 0x7F, 0x19, 0x37,
|
||||
0xA2, 0x5E, 0xE6, 0x96, 0xB5, 0x97, 0x33, 0x89, 0xE0, 0xA7, 0xC9, 0xDD, 0x4A, 0x7E, 0x9E, 0x53,
|
||||
0xBE, 0x91, 0x2B, 0xF5, 0xF5, 0xAF, 0xDD, 0x09, 0xA2, 0xF4, 0xA4, 0x56, 0xEB, 0x96, 0x22, 0xC9,
|
||||
0x94, 0xFB, 0x7B, 0x28, 0xC9, 0x97, 0x65, 0x04, 0xAC, 0x4F, 0x84, 0x71, 0xDA, 0x6E, 0xD8, 0xC5
|
||||
};
|
||||
|
||||
|
||||
// AEON v7
|
||||
const static uint8_t test_output_v1_lite[64] = {
|
||||
const static uint8_t test_output_v1_lite[160] = {
|
||||
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
|
||||
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
|
||||
0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
|
||||
0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F,
|
||||
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
|
||||
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41
|
||||
0x16, 0x08, 0x74, 0xC7, 0xA2, 0xD2, 0xA3, 0x97, 0x95, 0x76, 0xCA, 0x4D, 0x06, 0x39, 0x7A, 0xAB,
|
||||
0x6C, 0x87, 0x58, 0x33, 0x4D, 0xC8, 0x5A, 0xAB, 0x04, 0x27, 0xFE, 0x8B, 0x1C, 0x23, 0x2F, 0x32,
|
||||
0xC0, 0x44, 0xFF, 0x0D, 0xB5, 0x3B, 0x27, 0x96, 0x06, 0x89, 0x7B, 0xA3, 0x0B, 0xD0, 0xCE, 0x9E,
|
||||
0x90, 0x22, 0x77, 0x5A, 0xAD, 0xA1, 0xE5, 0xB6, 0xFC, 0xCB, 0x39, 0x7E, 0x2B, 0x10, 0xEE, 0xB4,
|
||||
0x8C, 0x2B, 0xA4, 0x1F, 0x60, 0x76, 0x39, 0xD7, 0xF6, 0x46, 0x77, 0x18, 0x20, 0xAD, 0xD4, 0xC9,
|
||||
0x87, 0xF7, 0x37, 0xDA, 0xFD, 0xBA, 0xBA, 0xD2, 0xF2, 0x68, 0xDC, 0x26, 0x8D, 0x1B, 0x08, 0xC6
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_SUMO
|
||||
const static uint8_t test_output_heavy[64] = {
|
||||
const static uint8_t test_output_heavy[160] = {
|
||||
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
|
||||
0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2,
|
||||
0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A,
|
||||
0x1F, 0x1A, 0xA2, 0x5B, 0xFC, 0x0A, 0xAD, 0x82, 0xDE, 0xA8, 0x99, 0x96, 0x88, 0x52, 0xD2, 0x7D,
|
||||
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
|
||||
0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2
|
||||
0x3E, 0xE1, 0x23, 0x03, 0x5A, 0x63, 0x7B, 0x66, 0xF6, 0xD7, 0xC2, 0x2A, 0x34, 0x5E, 0x88, 0xE7,
|
||||
0xFA, 0xC4, 0x25, 0x36, 0x54, 0xCB, 0xD2, 0x5C, 0x2F, 0x80, 0x2A, 0xF9, 0xCC, 0x43, 0xF7, 0xCD,
|
||||
0xE5, 0x18, 0xA8, 0x05, 0x60, 0x18, 0xA5, 0x73, 0x72, 0x9B, 0x32, 0xDC, 0x69, 0x83, 0xC1, 0xE1,
|
||||
0x1F, 0xDB, 0xDA, 0x6B, 0xAC, 0xEC, 0x9F, 0x67, 0xF8, 0x27, 0x1D, 0xC7, 0xE6, 0x46, 0x42, 0xF9,
|
||||
0x53, 0x62, 0x0A, 0x54, 0x7D, 0x43, 0xEA, 0x18, 0x94, 0xED, 0xD8, 0x92, 0x06, 0x6A, 0xA1, 0x51,
|
||||
0xAD, 0xB1, 0xFD, 0x89, 0xFB, 0x5C, 0xB4, 0x25, 0x6A, 0xDD, 0xB0, 0x09, 0xC5, 0x72, 0x87, 0xEB
|
||||
};
|
||||
#endif
|
||||
|
||||
|
|
|
@ -405,7 +405,7 @@ static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
|||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||
|
@ -416,14 +416,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
return;
|
||||
}
|
||||
|
||||
keccak(input, (int) size, ctx->state0, 200);
|
||||
keccak(input, (int) size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0)
|
||||
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
|
@ -472,15 +472,15 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||
|
@ -491,16 +491,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
return;
|
||||
}
|
||||
|
||||
keccak(input, (int) size, ctx->state0, 200);
|
||||
keccak(input + size, (int) size, ctx->state1, 200);
|
||||
keccak(input, (int) size, ctx[0]->state, 200);
|
||||
keccak(input + size, (int) size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEM;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
|
||||
|
@ -513,8 +513,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
void* mp0 = ((uint8_t*) l0) + ((idx0) & MASK);
|
||||
void* mp1 = ((uint8_t*) l1) + ((idx1) & MASK);
|
||||
|
@ -602,26 +602,382 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
|||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, output + 32);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
#define CN_STEP1(a, b, c, l, ptr, idx) \
|
||||
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
|
||||
c = _mm_load_si128(ptr);
|
||||
|
||||
|
||||
#define CN_STEP2(a, b, c, l, ptr, idx) \
|
||||
if (SOFT_AES) { \
|
||||
c = soft_aesenc(c, a); \
|
||||
} else { \
|
||||
c = _mm_aesenc_si128(c, a); \
|
||||
} \
|
||||
\
|
||||
b = _mm_xor_si128(b, c); \
|
||||
\
|
||||
if (VARIANT > 0) { \
|
||||
cryptonight_monero_tweak(reinterpret_cast<uint64_t*>(ptr), b); \
|
||||
} else { \
|
||||
_mm_store_si128(ptr, b); \
|
||||
}
|
||||
|
||||
|
||||
#define CN_STEP3(a, b, c, l, ptr, idx) \
|
||||
idx = EXTRACT64(c); \
|
||||
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
|
||||
b = _mm_load_si128(ptr);
|
||||
|
||||
|
||||
#define CN_STEP4(a, b, c, l, mc, ptr, idx) \
|
||||
lo = __umul128(idx, EXTRACT64(b), &hi); \
|
||||
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
|
||||
\
|
||||
if (VARIANT > 0) { \
|
||||
_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
|
||||
} else { \
|
||||
_mm_store_si128(ptr, a); \
|
||||
} \
|
||||
\
|
||||
a = _mm_xor_si128(a, b); \
|
||||
idx = EXTRACT64(a); \
|
||||
\
|
||||
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { \
|
||||
int64_t n = ((int64_t*)&l[idx & MASK])[0]; \
|
||||
int32_t d = ((int32_t*)&l[idx & MASK])[2]; \
|
||||
int64_t q = n / (d | 0x5); \
|
||||
((int64_t*)&l[idx & MASK])[0] = n ^ q; \
|
||||
idx = d ^ q; \
|
||||
}
|
||||
|
||||
|
||||
#define CONST_INIT(ctx, n) \
|
||||
__m128i mc##n; \
|
||||
if (VARIANT > 0) { \
|
||||
mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \
|
||||
*(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \
|
||||
}
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
||||
|
||||
if (VARIANT > 0 && size < 43) {
|
||||
memset(output, 0, 32 * 3);
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
keccak(input + size * i, static_cast<int>(size), ctx[i]->state, 200);
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
}
|
||||
|
||||
CONST_INIT(ctx[0], 0);
|
||||
CONST_INIT(ctx[1], 1);
|
||||
CONST_INIT(ctx[2], 2);
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
uint8_t* l1 = ctx[1]->memory;
|
||||
uint8_t* l2 = ctx[2]->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
|
||||
|
||||
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
|
||||
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
|
||||
__m128i cx0 = _mm_set_epi64x(0, 0);
|
||||
__m128i cx1 = _mm_set_epi64x(0, 0);
|
||||
__m128i cx2 = _mm_set_epi64x(0, 0);
|
||||
|
||||
uint64_t idx0, idx1, idx2;
|
||||
idx0 = EXTRACT64(ax0);
|
||||
idx1 = EXTRACT64(ax1);
|
||||
idx2 = EXTRACT64(ax2);
|
||||
|
||||
for (size_t i = 0; i < ITERATIONS / 2; i++) {
|
||||
uint64_t hi, lo;
|
||||
__m128i *ptr0, *ptr1, *ptr2;
|
||||
|
||||
// EVEN ROUND
|
||||
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||
|
||||
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||
|
||||
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||
|
||||
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
|
||||
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
|
||||
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
|
||||
|
||||
// ODD ROUND
|
||||
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||
|
||||
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||
|
||||
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||
|
||||
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
|
||||
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
|
||||
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
||||
|
||||
if (VARIANT > 0 && size < 43) {
|
||||
memset(output, 0, 32 * 4);
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
keccak(input + size * i, static_cast<int>(size), ctx[i]->state, 200);
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
}
|
||||
|
||||
CONST_INIT(ctx[0], 0);
|
||||
CONST_INIT(ctx[1], 1);
|
||||
CONST_INIT(ctx[2], 2);
|
||||
CONST_INIT(ctx[3], 3);
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
uint8_t* l1 = ctx[1]->memory;
|
||||
uint8_t* l2 = ctx[2]->memory;
|
||||
uint8_t* l3 = ctx[3]->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
|
||||
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
|
||||
|
||||
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
|
||||
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
|
||||
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
|
||||
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
|
||||
__m128i cx0 = _mm_set_epi64x(0, 0);
|
||||
__m128i cx1 = _mm_set_epi64x(0, 0);
|
||||
__m128i cx2 = _mm_set_epi64x(0, 0);
|
||||
__m128i cx3 = _mm_set_epi64x(0, 0);
|
||||
|
||||
uint64_t idx0, idx1, idx2, idx3;
|
||||
idx0 = EXTRACT64(ax0);
|
||||
idx1 = EXTRACT64(ax1);
|
||||
idx2 = EXTRACT64(ax2);
|
||||
idx3 = EXTRACT64(ax3);
|
||||
|
||||
for (size_t i = 0; i < ITERATIONS / 2; i++)
|
||||
{
|
||||
uint64_t hi, lo;
|
||||
__m128i *ptr0, *ptr1, *ptr2, *ptr3;
|
||||
|
||||
// EVEN ROUND
|
||||
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||
|
||||
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||
|
||||
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||
|
||||
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
|
||||
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
|
||||
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
|
||||
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
|
||||
|
||||
// ODD ROUND
|
||||
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||
|
||||
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||
|
||||
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||
|
||||
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
|
||||
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
|
||||
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
|
||||
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
}
|
||||
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
|
||||
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
|
||||
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
||||
|
||||
if (VARIANT > 0 && size < 43) {
|
||||
memset(output, 0, 32 * 5);
|
||||
return;
|
||||
}
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
|
||||
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
|
||||
{
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
keccak(input + size * i, static_cast<int>(size), ctx[i]->state, 200);
|
||||
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
}
|
||||
|
||||
CONST_INIT(ctx[0], 0);
|
||||
CONST_INIT(ctx[1], 1);
|
||||
CONST_INIT(ctx[2], 2);
|
||||
CONST_INIT(ctx[3], 3);
|
||||
CONST_INIT(ctx[4], 4);
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
uint8_t* l1 = ctx[1]->memory;
|
||||
uint8_t* l2 = ctx[2]->memory;
|
||||
uint8_t* l3 = ctx[3]->memory;
|
||||
uint8_t* l4 = ctx[4]->memory;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
|
||||
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
|
||||
uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx[4]->state);
|
||||
|
||||
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
|
||||
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
|
||||
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
|
||||
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
|
||||
__m128i ax4 = _mm_set_epi64x(h4[1] ^ h4[5], h4[0] ^ h4[4]);
|
||||
__m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
|
||||
__m128i cx0 = _mm_set_epi64x(0, 0);
|
||||
__m128i cx1 = _mm_set_epi64x(0, 0);
|
||||
__m128i cx2 = _mm_set_epi64x(0, 0);
|
||||
__m128i cx3 = _mm_set_epi64x(0, 0);
|
||||
__m128i cx4 = _mm_set_epi64x(0, 0);
|
||||
|
||||
uint64_t idx0, idx1, idx2, idx3, idx4;
|
||||
idx0 = EXTRACT64(ax0);
|
||||
idx1 = EXTRACT64(ax1);
|
||||
idx2 = EXTRACT64(ax2);
|
||||
idx3 = EXTRACT64(ax3);
|
||||
idx4 = EXTRACT64(ax4);
|
||||
|
||||
for (size_t i = 0; i < ITERATIONS / 2; i++)
|
||||
{
|
||||
uint64_t hi, lo;
|
||||
__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
|
||||
|
||||
// EVEN ROUND
|
||||
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||
CN_STEP1(ax4, bx4, cx4, l4, ptr4, idx4);
|
||||
|
||||
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||
CN_STEP2(ax4, bx4, cx4, l4, ptr4, idx4);
|
||||
|
||||
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
|
||||
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
|
||||
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
|
||||
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
|
||||
CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4);
|
||||
|
||||
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
|
||||
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
|
||||
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
|
||||
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
|
||||
CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4);
|
||||
|
||||
// ODD ROUND
|
||||
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||
CN_STEP1(ax4, cx4, bx4, l4, ptr4, idx4);
|
||||
|
||||
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||
CN_STEP2(ax4, cx4, bx4, l4, ptr4, idx4);
|
||||
|
||||
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
|
||||
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
|
||||
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
|
||||
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
|
||||
CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4);
|
||||
|
||||
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
|
||||
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
|
||||
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
|
||||
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
|
||||
CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __CRYPTONIGHT_X86_H__ */
|
||||
|
|
|
@ -105,6 +105,23 @@ static inline __m128i soft_aesenc(const uint32_t* in, __m128i key)
|
|||
return _mm_xor_si128(out, key);
|
||||
}
|
||||
|
||||
static inline __m128i soft_aesenc(__m128i in, __m128i key)
|
||||
{
|
||||
uint32_t x0, x1, x2, x3;
|
||||
x0 = _mm_cvtsi128_si32(in);
|
||||
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
|
||||
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
|
||||
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
|
||||
|
||||
__m128i out = _mm_set_epi32(
|
||||
(saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
|
||||
(saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
|
||||
(saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
|
||||
(saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
|
||||
|
||||
return _mm_xor_si128(out, key);
|
||||
}
|
||||
|
||||
static inline uint32_t sub_word(uint32_t key)
|
||||
{
|
||||
return (saes_sbox[key >> 24 ] << 24) |
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue