merge conflicts solved

This commit is contained in:
aegroto 2018-04-22 11:58:36 +02:00
commit bc6a6c8028
84 changed files with 1750 additions and 1087 deletions

View file

@ -28,13 +28,7 @@
#define __CRYPTONIGHT_ARM_H__
#if defined(XMRIG_ARM) && !defined(__clang__)
# include "aligned_malloc.h"
#else
# include <mm_malloc.h>
#endif
#include "common/utils/mm_malloc.h"
#include "crypto/CryptoNight.h"
#include "crypto/CryptoNight_constants.h"
#include "crypto/CryptoNight_monero.h"
@ -80,6 +74,19 @@ static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(const uint64
}
#ifdef XMRIG_ARMv8
static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
{
alignas(16) const __m128i zero = { 0 };
return veorq_u8(vaesmcq_u8(vaeseq_u8(v, zero)), rkey );
}
#else
static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
{
}
#endif
/* this one was not implemented yet so here it is */
static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i a)
{
@ -162,19 +169,19 @@ static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, _
*k0 = xout0;
*k1 = xout2;
SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : soft_aes_genkey_sub<0x01>(&xout0, &xout2);
soft_aes_genkey_sub<0x01>(&xout0, &xout2);
*k2 = xout0;
*k3 = xout2;
SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : soft_aes_genkey_sub<0x02>(&xout0, &xout2);
soft_aes_genkey_sub<0x02>(&xout0, &xout2);
*k4 = xout0;
*k5 = xout2;
SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : soft_aes_genkey_sub<0x04>(&xout0, &xout2);
soft_aes_genkey_sub<0x04>(&xout0, &xout2);
*k6 = xout0;
*k7 = xout2;
SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : soft_aes_genkey_sub<0x08>(&xout0, &xout2);
soft_aes_genkey_sub<0x08>(&xout0, &xout2);
*k8 = xout0;
*k9 = xout2;
}
@ -193,18 +200,16 @@ static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2,
*x6 = soft_aesenc((uint32_t*)x6, key);
*x7 = soft_aesenc((uint32_t*)x7, key);
}
# ifndef XMRIG_ARMv7
else {
*x0 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x0), key));
*x1 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x1), key));
*x2 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x2), key));
*x3 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x3), key));
*x4 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x4), key));
*x5 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x5), key));
*x6 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x6), key));
*x7 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x7), key));
*x0 = _mm_aesenc_si128(*x0, key);
*x1 = _mm_aesenc_si128(*x1, key);
*x2 = _mm_aesenc_si128(*x2, key);
*x3 = _mm_aesenc_si128(*x3, key);
*x4 = _mm_aesenc_si128(*x4, key);
*x5 = _mm_aesenc_si128(*x5, key);
*x6 = _mm_aesenc_si128(*x6, key);
*x7 = _mm_aesenc_si128(*x7, key);
}
# endif
}
@ -241,10 +246,6 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
for (size_t i = 0; i < 16; i++) {
if (!SOFT_AES) {
aes_round<SOFT_AES>(_mm_setzero_si128(), &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
}
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
@ -254,20 +255,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
if (!SOFT_AES) {
xin0 ^= k9;
xin1 ^= k9;
xin2 ^= k9;
xin3 ^= k9;
xin4 ^= k9;
xin5 ^= k9;
xin6 ^= k9;
xin7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
}
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
}
@ -289,20 +277,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
if (!SOFT_AES) {
xin0 ^= k9;
xin1 ^= k9;
xin2 ^= k9;
xin3 ^= k9;
xin4 ^= k9;
xin5 ^= k9;
xin6 ^= k9;
xin7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
}
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(outputTmp, xin0);
_mm_store_si128(outputTmp + 1, xin1);
@ -346,10 +321,6 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout6 = _mm_xor_si128(_mm_load_si128(inputTmp + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(inputTmp + 7), xout7);
if (!SOFT_AES) {
aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@ -359,20 +330,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
if (!SOFT_AES) {
xout0 ^= k9;
xout1 ^= k9;
xout2 ^= k9;
xout3 ^= k9;
xout4 ^= k9;
xout5 ^= k9;
xout6 ^= k9;
xout7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
@ -390,10 +348,6 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout6 = _mm_xor_si128(_mm_load_si128(inputTmp + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(inputTmp + 7), xout7);
if (!SOFT_AES) {
aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@ -403,29 +357,12 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
if (!SOFT_AES) {
xout0 ^= k9;
xout1 ^= k9;
xout2 ^= k9;
xout3 ^= k9;
xout4 ^= k9;
xout5 ^= k9;
xout6 ^= k9;
xout7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
}
for (size_t i = 0; i < 16; i++) {
if (!SOFT_AES) {
aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@ -435,20 +372,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
if (!SOFT_AES) {
xout0 ^= k9;
xout1 ^= k9;
xout2 ^= k9;
xout3 ^= k9;
xout4 ^= k9;
xout5 ^= k9;
xout6 ^= k9;
xout7 ^= k9;
}
else {
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
}
@ -465,8 +389,23 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
}
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
{
mem_out[0] = EXTRACT64(tmp);
uint64_t vh = vgetq_lane_u64(tmp, 1);
uint8_t x = vh >> 24;
static const uint16_t table = 0x7531;
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh;
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
@ -477,14 +416,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
return;
}
keccak(input, (int) size, ctx->state0, 200);
keccak(input, (int) size, ctx[0]->state, 200);
VARIANT1_INIT(0)
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
const uint8_t* l0 = ctx[0]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
@ -536,14 +475,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
}
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{
constexpr size_t MASK = xmrig::cn_select_mask<ALGO>();
constexpr size_t ITERATIONS = xmrig::cn_select_iter<ALGO>();
@ -554,16 +493,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
return;
}
keccak(input, (int) size, ctx->state0, 200);
keccak(input + size, (int) size, ctx->state1, 200);
keccak(input, (int) size, ctx[0]->state, 200);
keccak(input + size, (int) size, ctx[1]->state, 200);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
const uint8_t* l0 = ctx[0]->memory;
const uint8_t* l1 = ctx[1]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
@ -589,16 +528,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
else {
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
# ifndef XMRIG_ARMv7
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
# endif
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
}
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
VARIANT1_1(&l0[idx0 & MASK]);
VARIANT1_1(&l1[idx1 & MASK]);
if (VARIANT > 0) {
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
} else {
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
};
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
@ -664,25 +604,25 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, output + 32);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{
}
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{
}