From 81edb492112ae543e4187fb40f81fc486195bd31 Mon Sep 17 00:00:00 2001 From: aegroto Date: Sun, 22 Apr 2018 10:34:20 +0000 Subject: [PATCH] arm patch fix --- src/crypto/CryptoNight_arm.h | 47 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/src/crypto/CryptoNight_arm.h b/src/crypto/CryptoNight_arm.h index 2e24d660..02c2479b 100644 --- a/src/crypto/CryptoNight_arm.h +++ b/src/crypto/CryptoNight_arm.h @@ -7,7 +7,6 @@ * Copyright 2016 Imran Yusuff * Copyright 2017-2018 XMR-Stak , * Copyright 2018 Lee Clagett - * Copyright 2018 aegroto * Copyright 2016-2018 XMRig , * * This program is free software: you can redistribute it and/or modify @@ -264,10 +263,6 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) const __m128i *outputTmpLimit = output + (MEM / sizeof(__m128i)); for (__m128i *outputTmp = output; outputTmp < outputTmpLimit; outputTmp += 8) { - if (!SOFT_AES) { - aes_round(_mm_setzero_si128(), &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - } - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); @@ -279,6 +274,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); + _mm_store_si128(outputTmp, xin0); _mm_store_si128(outputTmp + 1, xin1); _mm_store_si128(outputTmp + 2, xin2); @@ -286,7 +282,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) _mm_store_si128(outputTmp + 4, xin4); _mm_store_si128(outputTmp + 5, xin5); _mm_store_si128(outputTmp + 6, xin6); - _mm_store_si128(outputTmp + 7, xin7); + _mm_store_si128(outputTmp + 7, xin7); } } @@ -396,9 +392,7 @@ static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) uint64_t vh = vgetq_lane_u64(tmp, 1); uint8_t x = vh >> 24; - static const uint16_t table = 0x7531; - const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; - vh ^= ((table >> index) & 0x3) << 28; + vh ^= ((0x7531 >> ((((x >> 3) & 6) | (x & 1)) << 1)) & 0x3) << 28; mem_out[1] = vh; } @@ -418,7 +412,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si keccak(input, (int) size, ctx[0]->state, 200); - VARIANT1_INIT(0) + VARIANT1_INIT(0); cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); @@ -430,24 +424,26 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); uint64_t idx0 = h0[0] ^ h0[4]; + void* mp = ((uint8_t*) l0) + (idx0 & MASK); - void* mp = ((uint8_t*) l0) + ((idx0) & MASK); - for (size_t i = 0; i < ITERATIONS; i++) { __m128i cx; if (SOFT_AES) { - cx = soft_aesenc((uint32_t*) mp, _mm_set_epi64x(ah0, al0)); - } else { + cx = soft_aesenc((uint32_t*) mp, _mm_set_epi64x(ah0, al0)); + } + else { cx = _mm_load_si128((__m128i *) mp); -# ifndef XMRIG_ARMv7 - cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); -# endif + cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); } - _mm_store_si128((__m128i *) mp, _mm_xor_si128(bx0, cx)); - VARIANT1_1(mp); - mp = ((uint8_t*) l0) + ((idx0 = EXTRACT64(cx)) & MASK); + if (VARIANT > 0) { + cryptonight_monero_tweak((uint64_t*) mp, _mm_xor_si128(bx0, cx)); + } else { + _mm_store_si128((__m128i *) mp, _mm_xor_si128(bx0, cx)); + } + + mp = ((uint8_t*) l0) + ((idx0 = EXTRACT64(cx)) & MASK); bx0 = cx; uint64_t hi, lo, cl, ch; @@ -465,13 +461,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si ah0 ^= ch; al0 ^= cl; - mp = ((uint8_t*) l0) + ((al0) & MASK); + mp = ((uint8_t*) l0) + (al0 & MASK); if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - int64_t n = ((int64_t*)mp)[0]; - int32_t d = ((int32_t*)mp)[2]; + int64_t n = ((int64_t*) mp)[0]; + int32_t d = ((int32_t*) mp)[2]; int64_t q = n / (d | 0x5); - ((int64_t*) mp)[0] = n ^ q; + + ((int64_t*) mp)[0] = n ^ q; } } @@ -481,6 +478,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); } + template inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx) { @@ -627,3 +625,4 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz } #endif /* __CRYPTONIGHT_ARM_H__ */ +