arm patch fix

This commit is contained in:
aegroto 2018-04-22 10:34:20 +00:00
parent bc6a6c8028
commit 81edb49211

View file

@ -7,7 +7,6 @@
* Copyright 2016 Imran Yusuff <https://github.com/imranyusuff> * Copyright 2016 Imran Yusuff <https://github.com/imranyusuff>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd> * Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018 aegroto <https://github.com/aegroto>
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
@ -264,10 +263,6 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
const __m128i *outputTmpLimit = output + (MEM / sizeof(__m128i)); const __m128i *outputTmpLimit = output + (MEM / sizeof(__m128i));
for (__m128i *outputTmp = output; outputTmp < outputTmpLimit; outputTmp += 8) { for (__m128i *outputTmp = output; outputTmp < outputTmpLimit; outputTmp += 8) {
if (!SOFT_AES) {
aes_round<SOFT_AES>(_mm_setzero_si128(), &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
}
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
@ -279,6 +274,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(outputTmp, xin0); _mm_store_si128(outputTmp, xin0);
_mm_store_si128(outputTmp + 1, xin1); _mm_store_si128(outputTmp + 1, xin1);
_mm_store_si128(outputTmp + 2, xin2); _mm_store_si128(outputTmp + 2, xin2);
@ -396,9 +392,7 @@ static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
uint64_t vh = vgetq_lane_u64(tmp, 1); uint64_t vh = vgetq_lane_u64(tmp, 1);
uint8_t x = vh >> 24; uint8_t x = vh >> 24;
static const uint16_t table = 0x7531; vh ^= ((0x7531 >> ((((x >> 3) & 6) | (x & 1)) << 1)) & 0x3) << 28;
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
vh ^= ((table >> index) & 0x3) << 28;
mem_out[1] = vh; mem_out[1] = vh;
} }
@ -418,7 +412,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
keccak(input, (int) size, ctx[0]->state, 200); keccak(input, (int) size, ctx[0]->state, 200);
VARIANT1_INIT(0) VARIANT1_INIT(0);
cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
@ -430,23 +424,25 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
uint64_t idx0 = h0[0] ^ h0[4]; uint64_t idx0 = h0[0] ^ h0[4];
void* mp = ((uint8_t*) l0) + (idx0 & MASK);
void* mp = ((uint8_t*) l0) + ((idx0) & MASK);
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx; __m128i cx;
if (SOFT_AES) { if (SOFT_AES) {
cx = soft_aesenc((uint32_t*) mp, _mm_set_epi64x(ah0, al0)); cx = soft_aesenc((uint32_t*) mp, _mm_set_epi64x(ah0, al0));
} else { }
else {
cx = _mm_load_si128((__m128i *) mp); cx = _mm_load_si128((__m128i *) mp);
# ifndef XMRIG_ARMv7 cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
# endif
} }
if (VARIANT > 0) {
cryptonight_monero_tweak((uint64_t*) mp, _mm_xor_si128(bx0, cx));
} else {
_mm_store_si128((__m128i *) mp, _mm_xor_si128(bx0, cx)); _mm_store_si128((__m128i *) mp, _mm_xor_si128(bx0, cx));
VARIANT1_1(mp); }
mp = ((uint8_t*) l0) + ((idx0 = EXTRACT64(cx)) & MASK); mp = ((uint8_t*) l0) + ((idx0 = EXTRACT64(cx)) & MASK);
bx0 = cx; bx0 = cx;
@ -465,12 +461,13 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
ah0 ^= ch; ah0 ^= ch;
al0 ^= cl; al0 ^= cl;
mp = ((uint8_t*) l0) + ((al0) & MASK); mp = ((uint8_t*) l0) + (al0 & MASK);
if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
int64_t n = ((int64_t*) mp)[0]; int64_t n = ((int64_t*) mp)[0];
int32_t d = ((int32_t*) mp)[2]; int32_t d = ((int32_t*) mp)[2];
int64_t q = n / (d | 0x5); int64_t q = n / (d | 0x5);
((int64_t*) mp)[0] = n ^ q; ((int64_t*) mp)[0] = n ^ q;
} }
} }
@ -481,6 +478,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
} }
template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT> template<xmrig::Algo ALGO, bool SOFT_AES, int VARIANT>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx) inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx)
{ {
@ -627,3 +625,4 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
} }
#endif /* __CRYPTONIGHT_ARM_H__ */ #endif /* __CRYPTONIGHT_ARM_H__ */