merge conflicts solved

This commit is contained in:
aegroto 2018-04-22 11:58:36 +02:00
commit bc6a6c8028
84 changed files with 1750 additions and 1087 deletions

View file

@ -30,18 +30,8 @@
#include <stdint.h>
#define AEON_MEMORY 1048576
#define AEON_MASK 0xFFFF0
#define AEON_ITER 0x40000
#define MONERO_MEMORY 2097152
#define MONERO_MASK 0x1FFFF0
#define MONERO_ITER 0x80000
struct cryptonight_ctx {
alignas(16) uint8_t state0[200];
alignas(16) uint8_t state1[200];
alignas(16) uint8_t state[200];
alignas(16) uint8_t* memory;
};

View file

@ -28,13 +28,7 @@
#define __CRYPTONIGHT_ARM_H__
#if defined(XMRIG_ARM) && !defined(__clang__)
# include "aligned_malloc.h"
#else
# include <mm_malloc.h>
#endif
#include "common/utils/mm_malloc.h"
#include "crypto/CryptoNight.h"
#include "crypto/CryptoNight_constants.h"
#include "crypto/CryptoNight_monero.h"
@ -80,6 +74,19 @@ static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(const uint64
}
#ifdef XMRIG_ARMv8
static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
{
alignas(16) const __m128i zero = { 0 };
return veorq_u8(vaesmcq_u8(vaeseq_u8(v, zero)), rkey );
}
#else
static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
{
}
#endif
/* this one was not implemented yet so here it is */
static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i a)
{
@ -162,19 +169,19 @@ static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, _
*k0 = xout0;
*k1 = xout2;
SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : soft_aes_genkey_sub<0x01>(&xout0, &xout2);
soft_aes_genkey_sub<0x01>(&xout0, &xout2);
*k2 = xout0;
*k3 = xout2;
SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : soft_aes_genkey_sub<0x02>(&xout0, &xout2);
soft_aes_genkey_sub<0x02>(&xout0, &xout2);
*k4 = xout0;
*k5 = xout2;
SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : soft_aes_genkey_sub<0x04>(&xout0, &xout2);
soft_aes_genkey_sub<0x04>(&xout0, &xout2);
*k6 = xout0;
*k7 = xout2;
SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : soft_aes_genkey_sub<0x08>(&xout0, &xout2);
soft_aes_genkey_sub<0x08>(&xout0, &xout2);
*k8 = xout0;
*k9 = xout2;
}
@ -193,18 +200,16 @@ static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2,
*x6 = soft_aesenc((uint32_t*)x6, key);
*x7 = soft_aesenc((uint32_t*)x7, key);
}
# ifndef XMRIG_ARMv7
else {
*x0 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x0), key));
*x1 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x1), key));
*x2 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x2), key));
*x3 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x3), key));
*x4 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x4), key));
*x5 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x5), key));
*x6 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x6), key));
*x7 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x7), key));
*x0 = _mm_aesenc_si128(*x0, key);
*x1 = _mm_aesenc_si128(*x1, key);
*x2 = _mm_aesenc_si128(*x2, key);
*x3 = _mm_aesenc_si128(*x3, key);
*x4 = _mm_aesenc_si128(*x4, key);
*x5 = _mm_aesenc_si128(*x5, key);
*x6 = _mm_aesenc_si128(*x6, key);
*x7 = _mm_aesenc_si128(*x7, key);
}
# endif
}
@ -241,10 +246,6 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
for (size_t i = 0; i < 16; i++) {
if (!SOFT_AES) {
aes_round<SOFT_AES>(_mm_setzero_si128(), &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
}
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
@ -254,20 +255,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
if (!SOFT_AES) {
xin0 ^= k9;
xin1 ^= k9;
xin2 ^= k9;
xin3 ^= k9;
xin4 ^= k9;
xin5 ^= k9;
xin6 ^= k9;
xin7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
}
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
}
@ -289,20 +277,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
if (!SOFT_AES) {
xin0 ^= k9;
xin1 ^= k9;
xin2 ^= k9;
xin3 ^= k9;
xin4 ^= k9;
xin5 ^= k9;
xin6 ^= k9;
xin7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
}
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(outputTmp, xin0);
_mm_store_si128(outputTmp + 1, xin1);
@ -346,10 +321,6 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout6 = _mm_xor_si128(_mm_load_si128(inputTmp + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(inputTmp + 7), xout7);
if (!SOFT_AES) {
aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@ -359,20 +330,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
if (!SOFT_AES) {
xout0 ^= k9;
xout1 ^= k9;
xout2 ^= k9;
xout3 ^= k9;
xout4 ^= k9;
xout5 ^= k9;
xout6 ^= k9;
xout7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
@ -390,10 +348,6 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout6 = _mm_xor_si128(_mm_load_si128(inputTmp + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(inputTmp + 7), xout7);
if (!SOFT_AES) {
aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@ -403,29 +357,12 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
if (!SOFT_AES) {
xout0 ^= k9;
xout1 ^= k9;
xout2 ^= k9;
xout3 ^= k9;
xout4 ^= k9;
xout5 ^= k9;
xout6 ^= k9;
xout7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
}
for (size_t i = 0; i < 16; i++) {
if (!SOFT_AES) {
aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@ -435,20 +372,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
if (!SOFT_AES) {
xout0 ^= k9;
xout1 ^= k9;
xout2 ^= k9;
xout3 ^= k9;
xout4 ^= k9;
xout5 ^= k9;
xout6 ^= k9;
xout7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
}
@ -465,8 +389,23 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
}
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{
mem_out[0] = EXTRACT64(tmp);
uint64_t vh = vgetq_lane_u64(tmp, 1);
uint8_t x = vh >> 24;
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh;
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
@ -477,14 +416,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
return;
}
keccak(input, (int) size, ctx->state0, 200);
keccak(input, (int) size, ctx[0]->state, 200);
VARIANT1_INIT(0)
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@ -536,14 +475,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
}
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
@ -554,16 +493,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
return;
}
keccak(input, (int) size, ctx->state0, 200);
keccak(input + size, (int) size, ctx->state1, 200);
keccak(input, (int) size, ctx[0]->state, 200);
keccak(input + size, (int) size, ctx[1]->state, 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
@ -589,16 +528,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
else {
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
# ifndef XMRIG_ARMv7
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
# endif
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
}
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
VARIANT1_1(&l0[idx0 & MASK]);
VARIANT1_1(&l1[idx1 & MASK]);
if (VARIANT > 0) {
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
} else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
};
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
@ -664,25 +604,25 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, output + 32);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{
}

View file

@ -29,7 +29,7 @@
#include <stdint.h>
#include "xmrig.h"
#include "common/xmrig.h"
namespace xmrig

View file

@ -32,14 +32,14 @@
uint64_t tweak1_2_##part = 0; \
if (VARIANT > 0) { \
tweak1_2_##part = (*reinterpret_cast<const uint64_t*>(input + 35 + part * size) ^ \
*(reinterpret_cast<const uint64_t*>(ctx->state##part) + 24)); \
*(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24)); \
}
#else
# define VARIANT1_INIT(part) \
uint64_t tweak1_2_##part = 0; \
if (VARIANT > 0) { \
volatile const uint64_t a = *reinterpret_cast<const uint64_t*>(input + 35 + part * size); \
volatile const uint64_t b = *(reinterpret_cast<const uint64_t*>(ctx->state##part) + 24); \
volatile const uint64_t b = *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24); \
tweak1_2_##part = a ^ b; \
}
#endif

View file

@ -26,62 +26,107 @@
#define __CRYPTONIGHT_TEST_H__
const static uint8_t test_input[152] = {
const static uint8_t test_input[380] = {
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01
0x07, 0x07, 0xB4, 0x87, 0xD0, 0xD6, 0x05, 0x26, 0xE0, 0xC6, 0xDD, 0x9B, 0xC7, 0x18, 0xC3, 0xCF,
0x52, 0x04, 0xBD, 0x4F, 0x9B, 0x27, 0xF6, 0x73, 0xB9, 0x3F, 0xEF, 0x7B, 0xB2, 0xF7, 0x2B, 0xBB,
0x3F, 0x3E, 0x9C, 0x3E, 0x9D, 0x33, 0x1E, 0xDE, 0xAD, 0xBE, 0xEF, 0x4E, 0x00, 0x91, 0x81, 0x29,
0x74, 0xB2, 0x70, 0xE7, 0x6D, 0xD2, 0x2A, 0x5F, 0x52, 0x04, 0x93, 0xE6, 0x18, 0x89, 0x40, 0xD8,
0xC6, 0xE3, 0x90, 0x6E, 0xAA, 0x6A, 0xB7, 0xE2, 0x08, 0x7E, 0x78, 0x0E,
0x01, 0x00, 0xEE, 0xB2, 0xD1, 0xD6, 0x05, 0xFF, 0x27, 0x7F, 0x26, 0xDB, 0xAA, 0xB2, 0xC9, 0x26,
0x30, 0xC6, 0xCF, 0x11, 0x64, 0xEA, 0x6C, 0x8A, 0xE0, 0x98, 0x01, 0xF8, 0x75, 0x4B, 0x49, 0xAF,
0x79, 0x70, 0xAE, 0xEE, 0xA7, 0x62, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x47, 0x8C, 0x63, 0xE7, 0xD8,
0x40, 0x02, 0x3C, 0xDA, 0xEA, 0x92, 0x52, 0x53, 0xAC, 0xFD, 0xC7, 0x8A, 0x4C, 0x31, 0xB2, 0xF2,
0xEC, 0x72, 0x7B, 0xFF, 0xCE, 0xC0, 0xE7, 0x12, 0xD4, 0xE9, 0x2A, 0x01,
0x07, 0x07, 0xA9, 0xB7, 0xD1, 0xD6, 0x05, 0x3F, 0x0D, 0x5E, 0xFD, 0xC7, 0x03, 0xFC, 0xFC, 0xD2,
0xCE, 0xBC, 0x44, 0xD8, 0xAB, 0x44, 0xA6, 0xA0, 0x3A, 0xE4, 0x4D, 0x8F, 0x15, 0xAF, 0x62, 0x17,
0xD1, 0xE0, 0x92, 0x85, 0xE4, 0x73, 0xF9, 0x00, 0x00, 0x00, 0xA0, 0xFC, 0x09, 0xDE, 0xAB, 0xF5,
0x8B, 0x6F, 0x1D, 0xCA, 0xA8, 0xBA, 0xAC, 0x74, 0xDD, 0x74, 0x19, 0xD5, 0xD6, 0x10, 0xEC, 0x38,
0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04
};
const static uint8_t test_output_v0[64] = {
const static uint8_t test_output_v0[160] = {
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00
0xA1, 0xB4, 0xFA, 0xE3, 0xE5, 0x76, 0xCE, 0xCF, 0xB7, 0x9C, 0xAF, 0x3E, 0x29, 0x92, 0xE4, 0xE0,
0x31, 0x24, 0x05, 0x48, 0xBF, 0x8D, 0x5F, 0x7B, 0x11, 0x03, 0x60, 0xAA, 0xD7, 0x50, 0x3F, 0x0C,
0x2D, 0x30, 0xF3, 0x87, 0x4F, 0x86, 0xA1, 0x4A, 0xB5, 0xA2, 0x1A, 0x08, 0xD0, 0x44, 0x2C, 0x9D,
0x16, 0xE9, 0x28, 0x49, 0xA1, 0xFF, 0x85, 0x6F, 0x12, 0xBB, 0x7D, 0xAB, 0x11, 0x1C, 0xE7, 0xF7,
0x2D, 0x9D, 0x19, 0xE4, 0xD2, 0x26, 0x44, 0x1E, 0xCD, 0x22, 0x08, 0x24, 0xA8, 0x97, 0x46, 0x62,
0x04, 0x84, 0x90, 0x4A, 0xEE, 0x99, 0x14, 0xED, 0xB8, 0xC6, 0x0D, 0x37, 0xA1, 0x66, 0x17, 0xB0
};
// Monero v7
const static uint8_t test_output_v1[64] = {
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
const static uint8_t test_output_v1[160] = {
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
0xE7, 0x8C, 0x5A, 0x6E, 0x38, 0x30, 0x68, 0x4A, 0x73, 0xFC, 0x1B, 0xC6, 0x6D, 0xFC, 0x8D, 0x98,
0xB4, 0xC2, 0x23, 0x39, 0xAD, 0xE0, 0x9D, 0xF6, 0x6D, 0x8C, 0x6A, 0xAA, 0xF9, 0xB2, 0xE3, 0x4C,
0xB6, 0x90, 0x6C, 0xE6, 0x15, 0x5E, 0x46, 0x07, 0x9C, 0xB2, 0x6B, 0xAC, 0x3B, 0xAC, 0x1A, 0xDE,
0x92, 0x2C, 0xD6, 0x0C, 0x46, 0x9D, 0x9B, 0xC2, 0x84, 0x52, 0x65, 0xF6, 0xBD, 0xFA, 0x0D, 0x74,
0x00, 0x66, 0x10, 0x07, 0xF1, 0x19, 0x06, 0x3A, 0x6C, 0xFF, 0xEE, 0xB2, 0x40, 0xE5, 0x88, 0x2B,
0x6C, 0xAB, 0x6B, 0x1D, 0x88, 0xB8, 0x44, 0x25, 0xF4, 0xEA, 0xB7, 0xEC, 0xBA, 0x12, 0x8A, 0x24
};
#ifndef XMRIG_NO_AEON
const static uint8_t test_output_v0_lite[64] = {
const static uint8_t test_output_v0_lite[160] = {
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88
0x38, 0x08, 0xE1, 0x17, 0x0B, 0x99, 0x8D, 0x1A, 0x3C, 0xCE, 0x35, 0xC5, 0xC7, 0x3A, 0x00, 0x2E,
0xCB, 0x54, 0xF0, 0x78, 0x2E, 0x9E, 0xDB, 0xC7, 0xDF, 0x2E, 0x71, 0x9A, 0x16, 0x97, 0xC4, 0x18,
0x4B, 0x97, 0x07, 0xFE, 0x5D, 0x98, 0x9A, 0xD6, 0xD8, 0xE5, 0x92, 0x66, 0x87, 0x7F, 0x19, 0x37,
0xA2, 0x5E, 0xE6, 0x96, 0xB5, 0x97, 0x33, 0x89, 0xE0, 0xA7, 0xC9, 0xDD, 0x4A, 0x7E, 0x9E, 0x53,
0xBE, 0x91, 0x2B, 0xF5, 0xF5, 0xAF, 0xDD, 0x09, 0xA2, 0xF4, 0xA4, 0x56, 0xEB, 0x96, 0x22, 0xC9,
0x94, 0xFB, 0x7B, 0x28, 0xC9, 0x97, 0x65, 0x04, 0xAC, 0x4F, 0x84, 0x71, 0xDA, 0x6E, 0xD8, 0xC5
};
// AEON v7
const static uint8_t test_output_v1_lite[64] = {
const static uint8_t test_output_v1_lite[160] = {
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F,
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41
0x16, 0x08, 0x74, 0xC7, 0xA2, 0xD2, 0xA3, 0x97, 0x95, 0x76, 0xCA, 0x4D, 0x06, 0x39, 0x7A, 0xAB,
0x6C, 0x87, 0x58, 0x33, 0x4D, 0xC8, 0x5A, 0xAB, 0x04, 0x27, 0xFE, 0x8B, 0x1C, 0x23, 0x2F, 0x32,
0xC0, 0x44, 0xFF, 0x0D, 0xB5, 0x3B, 0x27, 0x96, 0x06, 0x89, 0x7B, 0xA3, 0x0B, 0xD0, 0xCE, 0x9E,
0x90, 0x22, 0x77, 0x5A, 0xAD, 0xA1, 0xE5, 0xB6, 0xFC, 0xCB, 0x39, 0x7E, 0x2B, 0x10, 0xEE, 0xB4,
0x8C, 0x2B, 0xA4, 0x1F, 0x60, 0x76, 0x39, 0xD7, 0xF6, 0x46, 0x77, 0x18, 0x20, 0xAD, 0xD4, 0xC9,
0x87, 0xF7, 0x37, 0xDA, 0xFD, 0xBA, 0xBA, 0xD2, 0xF2, 0x68, 0xDC, 0x26, 0x8D, 0x1B, 0x08, 0xC6
};
#endif
#ifndef XMRIG_NO_SUMO
const static uint8_t test_output_heavy[64] = {
const static uint8_t test_output_heavy[160] = {
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2,
0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A,
0x1F, 0x1A, 0xA2, 0x5B, 0xFC, 0x0A, 0xAD, 0x82, 0xDE, 0xA8, 0x99, 0x96, 0x88, 0x52, 0xD2, 0x7D,
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2
0x3E, 0xE1, 0x23, 0x03, 0x5A, 0x63, 0x7B, 0x66, 0xF6, 0xD7, 0xC2, 0x2A, 0x34, 0x5E, 0x88, 0xE7,
0xFA, 0xC4, 0x25, 0x36, 0x54, 0xCB, 0xD2, 0x5C, 0x2F, 0x80, 0x2A, 0xF9, 0xCC, 0x43, 0xF7, 0xCD,
0xE5, 0x18, 0xA8, 0x05, 0x60, 0x18, 0xA5, 0x73, 0x72, 0x9B, 0x32, 0xDC, 0x69, 0x83, 0xC1, 0xE1,
0x1F, 0xDB, 0xDA, 0x6B, 0xAC, 0xEC, 0x9F, 0x67, 0xF8, 0x27, 0x1D, 0xC7, 0xE6, 0x46, 0x42, 0xF9,
0x53, 0x62, 0x0A, 0x54, 0x7D, 0x43, 0xEA, 0x18, 0x94, 0xED, 0xD8, 0x92, 0x06, 0x6A, 0xA1, 0x51,
0xAD, 0xB1, 0xFD, 0x89, 0xFB, 0x5C, 0xB4, 0x25, 0x6A, 0xDD, 0xB0, 0x09, 0xC5, 0x72, 0x87, 0xEB
};
#endif

View file

@ -405,7 +405,7 @@ static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
@ -416,14 +416,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
return;
}
keccak(input, (int) size, ctx->state0, 200);
keccak(input, (int) size, ctx[0]->state, 200);
VARIANT1_INIT(0)
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@ -472,15 +472,15 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
}
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
@ -491,16 +491,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
return;
}
keccak(input, (int) size, ctx->state0, 200);
keccak(input + size, (int) size, ctx->state1, 200);
keccak(input, (int) size, ctx[0]->state, 200);
keccak(input + size, (int) size, ctx[1]->state, 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
@ -513,8 +513,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
uint64_t idx0 = al0;
uint64_t idx1 = al1;
void* mp0 = ((uint8_t*) l0) + ((idx0) & MASK);
void* mp1 = ((uint8_t*) l1) + ((idx1) & MASK);
@ -602,26 +602,382 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, output + 32);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
#define CN_STEP1(a, b, c, l, ptr, idx) \
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
c = _mm_load_si128(ptr);
#define CN_STEP2(a, b, c, l, ptr, idx) \
if (SOFT_AES) { \
c = soft_aesenc(c, a); \
} else { \
c = _mm_aesenc_si128(c, a); \
} \
\
b = _mm_xor_si128(b, c); \
\
if (VARIANT > 0) { \
cryptonight_monero_tweak(reinterpret_cast<uint64_t*>(ptr), b); \
} else { \
_mm_store_si128(ptr, b); \
}
#define CN_STEP3(a, b, c, l, ptr, idx) \
idx = EXTRACT64(c); \
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
b = _mm_load_si128(ptr);
#define CN_STEP4(a, b, c, l, mc, ptr, idx) \
lo = __umul128(idx, EXTRACT64(b), &hi); \
a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \
\
if (VARIANT > 0) { \
_mm_store_si128(ptr, _mm_xor_si128(a, mc)); \
} else { \
_mm_store_si128(ptr, a); \
} \
\
a = _mm_xor_si128(a, b); \
idx = EXTRACT64(a); \
\
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { \
int64_t n = ((int64_t*)&l[idx & MASK])[0]; \
int32_t d = ((int32_t*)&l[idx & MASK])[2]; \
int64_t q = n / (d | 0x5); \
((int64_t*)&l[idx & MASK])[0] = n ^ q; \
idx = d ^ q; \
}
#define CONST_INIT(ctx, n) \
__m128i mc##n; \
if (VARIANT > 0) { \
mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^ \
*(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0); \
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
if (VARIANT > 0 && size < 43) {
memset(output, 0, 32 * 3);
return;
}
for (size_t i = 0; i < 3; i++) {
keccak(input + size * i, static_cast<int>(size), ctx[i]->state, 200);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
}
CONST_INIT(ctx[0], 0);
CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2);
uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i cx0 = _mm_set_epi64x(0, 0);
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
uint64_t idx0, idx1, idx2;
idx0 = EXTRACT64(ax0);
idx1 = EXTRACT64(ax1);
idx2 = EXTRACT64(ax2);
for (size_t i = 0; i < ITERATIONS / 2; i++) {
uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2;
// EVEN ROUND
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
}
for (size_t i = 0; i < 3; i++) {
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
if (VARIANT > 0 && size < 43) {
memset(output, 0, 32 * 4);
return;
}
for (size_t i = 0; i < 4; i++) {
keccak(input + size * i, static_cast<int>(size), ctx[i]->state, 200);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
}
CONST_INIT(ctx[0], 0);
CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2);
CONST_INIT(ctx[3], 3);
uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory;
uint8_t* l3 = ctx[3]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128i cx0 = _mm_set_epi64x(0, 0);
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
__m128i cx3 = _mm_set_epi64x(0, 0);
uint64_t idx0, idx1, idx2, idx3;
idx0 = EXTRACT64(ax0);
idx1 = EXTRACT64(ax1);
idx2 = EXTRACT64(ax2);
idx3 = EXTRACT64(ax3);
for (size_t i = 0; i < ITERATIONS / 2; i++)
{
uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3;
// EVEN ROUND
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3);
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
}
for (size_t i = 0; i < 4; i++) {
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
}
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
if (VARIANT > 0 && size < 43) {
memset(output, 0, 32 * 5);
return;
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
{
for (size_t i = 0; i < 5; i++) {
keccak(input + size * i, static_cast<int>(size), ctx[i]->state, 200);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
}
CONST_INIT(ctx[0], 0);
CONST_INIT(ctx[1], 1);
CONST_INIT(ctx[2], 2);
CONST_INIT(ctx[3], 3);
CONST_INIT(ctx[4], 4);
uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory;
uint8_t* l3 = ctx[3]->memory;
uint8_t* l4 = ctx[4]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx[4]->state);
__m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]);
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128i ax4 = _mm_set_epi64x(h4[1] ^ h4[5], h4[0] ^ h4[4]);
__m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
__m128i cx0 = _mm_set_epi64x(0, 0);
__m128i cx1 = _mm_set_epi64x(0, 0);
__m128i cx2 = _mm_set_epi64x(0, 0);
__m128i cx3 = _mm_set_epi64x(0, 0);
__m128i cx4 = _mm_set_epi64x(0, 0);
uint64_t idx0, idx1, idx2, idx3, idx4;
idx0 = EXTRACT64(ax0);
idx1 = EXTRACT64(ax1);
idx2 = EXTRACT64(ax2);
idx3 = EXTRACT64(ax3);
idx4 = EXTRACT64(ax4);
for (size_t i = 0; i < ITERATIONS / 2; i++)
{
uint64_t hi, lo;
__m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
// EVEN ROUND
CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0);
CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3);
CN_STEP1(ax4, bx4, cx4, l4, ptr4, idx4);
CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0);
CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3);
CN_STEP2(ax4, bx4, cx4, l4, ptr4, idx4);
CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0);
CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1);
CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2);
CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3);
CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4);
CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3);
CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4);
// ODD ROUND
CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP1(ax4, cx4, bx4, l4, ptr4, idx4);
CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP2(ax4, cx4, bx4, l4, ptr4, idx4);
CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0);
CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1);
CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2);
CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3);
CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4);
CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0);
CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1);
CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2);
CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3);
CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4);
}
for (size_t i = 0; i < 5; i++) {
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}
}
#endif /* __CRYPTONIGHT_X86_H__ */

View file

@ -105,6 +105,23 @@ static inline __m128i soft_aesenc(const uint32_t* in, __m128i key)
return _mm_xor_si128(out, key);
}
static inline __m128i soft_aesenc(__m128i in, __m128i key)
{
uint32_t x0, x1, x2, x3;
x0 = _mm_cvtsi128_si32(in);
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
__m128i out = _mm_set_epi32(
(saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
(saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
(saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
(saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
return _mm_xor_si128(out, key);
}
static inline uint32_t sub_word(uint32_t key)
{
return (saes_sbox[key >> 24 ] << 24) |