diff --git a/src/Options.cpp b/src/Options.cpp index 9e75a676..d26efecb 100644 --- a/src/Options.cpp +++ b/src/Options.cpp @@ -130,6 +130,9 @@ static char const short_options[] = "a:c:khBp:Px:r:R:s:t:T:o:u:O:v:Vl:S"; static struct option const options[] = { { "algo", 1, nullptr, 'a' }, + { "api-access-token", 1, nullptr, 4001 }, + { "api-port", 1, nullptr, 4000 }, + { "api-worker-id", 1, nullptr, 4002 }, { "av", 1, nullptr, 'v' }, { "aesni", 1, nullptr, 'A' }, { "multihash-factor", 1, nullptr, 'm' }, @@ -532,7 +535,6 @@ bool Options::parseArg(int key, const char *arg) case 'S': /* --syslog */ case 1005: /* --safe */ case 1006: /* --nicehash */ - return parseBoolean(key, true); case 1002: /* --no-color */ case 1009: /* --no-huge-pages */ diff --git a/src/api/NetworkState.cpp b/src/api/NetworkState.cpp index 374d1e1a..82d61c89 100644 --- a/src/api/NetworkState.cpp +++ b/src/api/NetworkState.cpp @@ -46,7 +46,7 @@ NetworkState::NetworkState() : int NetworkState::connectionTime() const { - return m_active ? ((uv_now(uv_default_loop()) - m_connectionTime) / 1000) : 0; + return m_active ? (int)((uv_now(uv_default_loop()) - m_connectionTime) / 1000) : 0; } @@ -56,7 +56,7 @@ uint32_t NetworkState::avgTime() const return 0; } - return (uint32_t) connectionTime() / m_latency.size(); + return connectionTime() / (uint32_t)m_latency.size(); } diff --git a/src/crypto/CryptoNight_arm.h b/src/crypto/CryptoNight_arm.h index 77cfd7c3..799d3804 100644 --- a/src/crypto/CryptoNight_arm.h +++ b/src/crypto/CryptoNight_arm.h @@ -208,14 +208,14 @@ aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m12 __m128i* x7) { if (SOFT_AES) { - *x0 = soft_aesenc(*x0, key); - *x1 = soft_aesenc(*x1, key); - *x2 = soft_aesenc(*x2, key); - *x3 = soft_aesenc(*x3, key); - *x4 = soft_aesenc(*x4, key); - *x5 = soft_aesenc(*x5, key); - *x6 = soft_aesenc(*x6, key); - *x7 = soft_aesenc(*x7, key); + *x0 = soft_aesenc((uint32_t*)x0, key); + *x1 = soft_aesenc((uint32_t*)x1, key); + *x2 = soft_aesenc((uint32_t*)x2, key); + *x3 = soft_aesenc((uint32_t*)x3, key); + *x4 = soft_aesenc((uint32_t*)x4, key); + *x5 = soft_aesenc((uint32_t*)x5, key); + *x6 = soft_aesenc((uint32_t*)x6, key); + *x7 = soft_aesenc((uint32_t*)x7, key); } # ifndef XMRIG_ARMv7 else { @@ -392,11 +392,11 @@ public: for (size_t i = 0; i < ITERATIONS; i++) { for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { __m128i cx; - cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]); if (SOFT_AES) { cx = soft_aesenc(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock])); } else { + cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]); cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock])); } @@ -460,15 +460,17 @@ public: idx = h[0] ^ h[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx = _mm_load_si128((__m128i*) &l[idx & MASK]); + __m128i cx; if (SOFT_AES) { - cx = soft_aesenc(cx, _mm_set_epi64x(ah, al)); - } else { -# ifndef XMRIG_ARMv7 - cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah, al); -# endif + cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); } + else { + cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); + # ifndef XMRIG_ARMv7 + cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); + # endif + } _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx)); idx = EXTRACT64(cx); @@ -528,13 +530,16 @@ public: uint64_t idx1 = h1[0] ^h1[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - __m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + __m128i cx0; + __m128i cx1; if (SOFT_AES) { cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); } else { + cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + # ifndef XMRIG_ARMv7 cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1); @@ -630,20 +635,24 @@ public: uint64_t idx1 = h1[0] ^h1[4]; uint64_t idx2 = h2[0] ^h2[4]; - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - __m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); - __m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); + for (size_t i = 0; i < ITERATIONS; i++) { + __m128i cx0; + __m128i cx1; + __m128i cx2; - if (SOFT_AES) { - cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); - cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2)); - } else { -# ifndef XMRIG_ARMv7 - cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); - cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1); - cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2); + if (SOFT_AES) { + cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); + cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); + } + else { + cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); + cx2 = _mm_load_si128((__m128i *) &l2[idx2 & MASK]); +# ifndef XMRIG_ARMv7 + cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); + cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1); + cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2); # endif } @@ -768,10 +777,10 @@ public: uint64_t idx3 = h3[0] ^h3[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - __m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); - __m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); - __m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); + __m128i cx0; + __m128i cx1; + __m128i cx2; + __m128i cx3; if (SOFT_AES) { cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); @@ -780,6 +789,11 @@ public: cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3)); } else { # ifndef XMRIG_ARMv7 + cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); + cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); + cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1); cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2); @@ -937,11 +951,11 @@ public: uint64_t idx4 = h4[0] ^h4[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - __m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); - __m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); - __m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); - __m128i cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]); + __m128i cx0; + __m128i cx1; + __m128i cx2; + __m128i cx3; + __m128i cx4; if (SOFT_AES) { cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); @@ -951,6 +965,12 @@ public: cx4 = soft_aesenc(cx4, _mm_set_epi64x(ah4, al4)); } else { # ifndef XMRIG_ARMv7 + cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); + cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); + cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]); + cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1); cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2); diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index f1a0fbe7..04686f0c 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -204,15 +204,16 @@ aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m12 __m128i* x7) { if (SOFT_AES) { - *x0 = soft_aesenc(*x0, key); - *x1 = soft_aesenc(*x1, key); - *x2 = soft_aesenc(*x2, key); - *x3 = soft_aesenc(*x3, key); - *x4 = soft_aesenc(*x4, key); - *x5 = soft_aesenc(*x5, key); - *x6 = soft_aesenc(*x6, key); - *x7 = soft_aesenc(*x7, key); - } else { + *x0 = soft_aesenc((uint32_t*)x0, key); + *x1 = soft_aesenc((uint32_t*)x1, key); + *x2 = soft_aesenc((uint32_t*)x2, key); + *x3 = soft_aesenc((uint32_t*)x3, key); + *x4 = soft_aesenc((uint32_t*)x4, key); + *x5 = soft_aesenc((uint32_t*)x5, key); + *x6 = soft_aesenc((uint32_t*)x6, key); + *x7 = soft_aesenc((uint32_t*)x7, key); + } + else { *x0 = _mm_aesenc_si128(*x0, key); *x1 = _mm_aesenc_si128(*x1, key); *x2 = _mm_aesenc_si128(*x2, key); @@ -352,8 +353,7 @@ public: for (size_t i = 0; i < ITERATIONS; i++) { for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { - __m128i cx; - cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]); + __m128i cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]); if (SOFT_AES) { cx = soft_aesenc(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock])); @@ -421,11 +421,12 @@ public: idx = h[0] ^ h[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx = _mm_load_si128((__m128i*) &l[idx & MASK]); + __m128i cx; if (SOFT_AES) { - cx = soft_aesenc(cx, _mm_set_epi64x(ah, al)); + cx = soft_aesenc((uint32_t*)&l[idx & MASK], _mm_set_epi64x(ah, al)); } else { + cx = _mm_load_si128((__m128i*) &l[idx & MASK]); cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al)); } @@ -487,13 +488,16 @@ public: uint64_t idx1 = h1[0] ^h1[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - __m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + __m128i cx0; + __m128i cx1; if (SOFT_AES) { - cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); + cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); } else { + cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); } @@ -588,15 +592,19 @@ public: uint64_t idx2 = h2[0] ^h2[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - __m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); - __m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); + __m128i cx0; + __m128i cx1; + __m128i cx2; if (SOFT_AES) { - cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); - cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2)); + cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); + cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); } else { + cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); + cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); @@ -723,17 +731,22 @@ public: uint64_t idx3 = h3[0] ^h3[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - __m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); - __m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); - __m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); + __m128i cx0; + __m128i cx1; + __m128i cx2; + __m128i cx3; if (SOFT_AES) { - cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); - cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2)); - cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3)); + cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); + cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); + cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], _mm_set_epi64x(ah3, al3)); } else { + cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); + cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); + cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); @@ -890,19 +903,25 @@ public: uint64_t idx4 = h4[0] ^h4[4]; for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); - __m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); - __m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); - __m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); - __m128i cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]); + __m128i cx0; + __m128i cx1; + __m128i cx2; + __m128i cx3; + __m128i cx4; if (SOFT_AES) { - cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); - cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); - cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2)); - cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3)); - cx4 = soft_aesenc(cx4, _mm_set_epi64x(ah4, al4)); + cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); + cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); + cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); + cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], _mm_set_epi64x(ah3, al3)); + cx4 = soft_aesenc((uint32_t*)&l4[idx4 & MASK], _mm_set_epi64x(ah4, al4)); } else { + cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); + cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); + cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); + cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); + cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]); + cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); diff --git a/src/crypto/soft_aes.h b/src/crypto/soft_aes.h index 99321c4e..0703f98d 100644 --- a/src/crypto/soft_aes.h +++ b/src/crypto/soft_aes.h @@ -89,34 +89,34 @@ alignas(16) const uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) }; alignas(16) const uint8_t saes_sbox[256] = saes_data(saes_h0); -static inline __m128i soft_aesenc(__m128i in, __m128i key) +static inline __m128i soft_aesenc(const uint32_t* in, __m128i key) { - const uint32_t x0 = _mm_cvtsi128_si32(in); - const uint32_t x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55)); - const uint32_t x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA)); - const uint32_t x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF)); + const uint32_t x0 = in[0]; + const uint32_t x1 = in[1]; + const uint32_t x2 = in[2]; + const uint32_t x3 = in[3]; - __m128i out = _mm_set_epi32( - (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]), - (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]), - (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]), - (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24])); + __m128i out = _mm_set_epi32( + (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]), + (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]), + (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]), + (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24])); - return _mm_xor_si128(out, key); + return _mm_xor_si128(out, key); } static inline uint32_t sub_word(uint32_t key) { - return (saes_sbox[key >> 24 ] << 24) | - (saes_sbox[(key >> 16) & 0xff] << 16 ) | - (saes_sbox[(key >> 8) & 0xff] << 8 ) | - saes_sbox[key & 0xff]; + return (saes_sbox[key >> 24 ] << 24) | + (saes_sbox[(key >> 16) & 0xff] << 16 ) | + (saes_sbox[(key >> 8) & 0xff] << 8 ) | + saes_sbox[key & 0xff]; } #if defined(__clang__) || defined(XMRIG_ARM) static inline uint32_t _rotr(uint32_t value, uint32_t amount) { - return (value >> amount) | (value << ((32 - amount) & 31)); + return (value >> amount) | (value << ((32 - amount) & 31)); } #endif