Merge xmrig v6.16.1 into master

2021-11-29 15:34:55 +00:00 · 2021-11-29 15:34:55 +00:00 · ecdb1929e2
commit ecdb1929e2
parent dabb2f7118 00990f2649
121 changed files with 1944 additions and 1118 deletions
--- a/src/crypto/astrobwt/salsa20_ref/ecrypt-config.h
+++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-config.h
@ -9,7 +9,7 @@

 /* Guess the endianness of the target architecture. */

-/* 
+/*
 * The LITTLE endian machines:
 */
 #if defined(__ultrix)           /* Older MIPS */
@ -27,15 +27,15 @@
 #elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */
 #define ECRYPT_LITTLE_ENDIAN

-/* 
- * The BIG endian machines: 
+/*
+ * The BIG endian machines:
 */
 #elif defined(sun)              /* Newer Sparc's */
 #define ECRYPT_BIG_ENDIAN
 #elif defined(__ppc__)          /* PowerPC */
 #define ECRYPT_BIG_ENDIAN

-/* 
+/*
 * Finally machines with UNKNOWN endianness:
 */
 #elif defined (_AIX)            /* RS6000 */
--- a/src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h
+++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h
@ -5,8 +5,8 @@
 * and should be used carefully. They should NOT be used with
 * parameters which perform some action. E.g., the following two lines
 * are not equivalent:
- * 
- *  1) ++x; y = ROTL32(x, n); 
+ *
+ *  1) ++x; y = ROTL32(x, n);
 *  2) y = ROTL32(++x, n);
 */

--- a/src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h
+++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h
@ -1,9 +1,9 @@
 /* ecrypt-sync.h */

-/* 
+/*
 * Header file for synchronous stream ciphers without authentication
 * mechanism.
- * 
+ *
 * *** Please only edit parts marked with "[edit]". ***
 */

@ -16,10 +16,10 @@

 /* Cipher parameters */

-/* 
+/*
 * The name of your cipher.
 */
-#define ECRYPT_NAME "Salsa20"    /* [edit] */ 
+#define ECRYPT_NAME "Salsa20"    /* [edit] */
 #define ECRYPT_PROFILE "S!_H."

 /*
@ -47,15 +47,15 @@

 /* Data structures */

-/* 
+/*
 * ECRYPT_ctx is the structure containing the representation of the
- * internal state of your cipher. 
+ * internal state of your cipher.
 */

 typedef struct
 {
  u32 input[16]; /* could be compressed */
-  /* 
+  /*
   * [edit]
   *
   * Put here all state variable needed during the encryption process.
@ -79,10 +79,10 @@ void ECRYPT_init();
 * above.
 */
 void ECRYPT_keysetup(
-  ECRYPT_ctx* ctx, 
-  const u8* key, 
-  u32 keysize,                /* Key size in bits. */ 
-  u32 ivsize);                /* IV size in bits. */ 
+  ECRYPT_ctx* ctx,
+  const u8* key,
+  u32 keysize,                /* Key size in bits. */
+  u32 ivsize);                /* IV size in bits. */

 /*
 * IV setup. After having called ECRYPT_keysetup(), the user is
@ -91,7 +91,7 @@ void ECRYPT_keysetup(
 * IV's.
 */
 void ECRYPT_ivsetup(
-  ECRYPT_ctx* ctx, 
+  ECRYPT_ctx* ctx,
  const u8* iv);

 /*
@ -102,7 +102,7 @@ void ECRYPT_ivsetup(
 * (declared here) encrypts byte strings of arbitrary length, while
 * the ECRYPT_encrypt_blocks() function (defined later) only accepts
 * lengths which are multiples of ECRYPT_BLOCKLENGTH.
- * 
+ *
 * The user is allowed to make multiple calls to
 * ECRYPT_encrypt_blocks() to incrementally encrypt a long message,
 * but he is NOT allowed to make additional encryption calls once he
@ -122,7 +122,7 @@ void ECRYPT_ivsetup(
 *
 * ECRYPT_ivsetup();
 * ECRYPT_encrypt_bytes();
- * 
+ *
 * The following sequence is not:
 *
 * ECRYPT_keysetup();
@ -133,22 +133,22 @@ void ECRYPT_ivsetup(
 */

 void ECRYPT_encrypt_bytes(
-  ECRYPT_ctx* ctx, 
-  const u8* plaintext, 
-  u8* ciphertext, 
-  u32 msglen);                /* Message length in bytes. */ 
+  ECRYPT_ctx* ctx,
+  const u8* plaintext,
+  u8* ciphertext,
+  u32 msglen);                /* Message length in bytes. */

 void ECRYPT_decrypt_bytes(
-  ECRYPT_ctx* ctx, 
-  const u8* ciphertext, 
-  u8* plaintext, 
-  u32 msglen);                /* Message length in bytes. */ 
+  ECRYPT_ctx* ctx,
+  const u8* ciphertext,
+  u8* plaintext,
+  u32 msglen);                /* Message length in bytes. */

 /* ------------------------------------------------------------------------- */

 /* Optional features */

-/* 
+/*
 * For testing purposes it can sometimes be useful to have a function
 * which immediately generates keystream without having to provide it
 * with a zero plaintext. If your cipher cannot provide this function
@ -170,7 +170,7 @@ void ECRYPT_keystream_bytes(

 /* Optional optimizations */

-/* 
+/*
 * By default, the functions in this section are implemented using
 * calls to functions declared above. However, you might want to
 * implement them differently for performance reasons.
@ -186,22 +186,22 @@ void ECRYPT_keystream_bytes(
 #define ECRYPT_USES_DEFAULT_ALL_IN_ONE        /* [edit] */

 void ECRYPT_encrypt_packet(
-  ECRYPT_ctx* ctx, 
+  ECRYPT_ctx* ctx,
  const u8* iv,
-  const u8* plaintext, 
-  u8* ciphertext, 
+  const u8* plaintext,
+  u8* ciphertext,
  u32 msglen);

 void ECRYPT_decrypt_packet(
-  ECRYPT_ctx* ctx, 
+  ECRYPT_ctx* ctx,
  const u8* iv,
-  const u8* ciphertext, 
-  u8* plaintext, 
+  const u8* ciphertext,
+  u8* plaintext,
  u32 msglen);

 /*
 * Encryption/decryption of blocks.
- * 
+ *
 * By default, these functions are defined as macros. If you want to
 * provide a different implementation, please undef the
 * ECRYPT_USES_DEFAULT_BLOCK_MACROS flag and implement the functions
@ -232,23 +232,23 @@ void ECRYPT_decrypt_packet(
 #else

 void ECRYPT_encrypt_blocks(
-  ECRYPT_ctx* ctx, 
-  const u8* plaintext, 
-  u8* ciphertext, 
-  u32 blocks);                /* Message length in blocks. */ 
+  ECRYPT_ctx* ctx,
+  const u8* plaintext,
+  u8* ciphertext,
+  u32 blocks);                /* Message length in blocks. */

 void ECRYPT_decrypt_blocks(
-  ECRYPT_ctx* ctx, 
-  const u8* ciphertext, 
-  u8* plaintext, 
-  u32 blocks);                /* Message length in blocks. */ 
+  ECRYPT_ctx* ctx,
+  const u8* ciphertext,
+  u8* plaintext,
+  u32 blocks);                /* Message length in blocks. */

 #ifdef ECRYPT_GENERATES_KEYSTREAM

 void ECRYPT_keystream_blocks(
  ECRYPT_ctx* ctx,
  const u8* keystream,
-  u32 blocks);                /* Keystream length in blocks. */ 
+  u32 blocks);                /* Keystream length in blocks. */

 #endif

@ -262,7 +262,7 @@ void ECRYPT_keystream_blocks(
 * significant difference and keep the number of variants
 * (ECRYPT_MAXVARIANT) as small as possible (definitely not more than
 * 10). Note also that all variants should have exactly the same
- * external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.). 
+ * external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.).
 */
 #define ECRYPT_MAXVARIANT 1                   /* [edit] */

--- a/src/crypto/cn/CnHash.cpp
+++ b/src/crypto/cn/CnHash.cpp
@ -354,7 +354,7 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,

 #   ifdef XMRIG_ALGO_CN_HEAVY
    // cn-heavy optimization for Zen3 CPUs
-    if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
+    if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21)) {
        switch (algorithm.id()) {
        case Algorithm::CN_HEAVY_0:
            return cryptonight_single_hash<Algorithm::CN_HEAVY_0, false, 3>;
--- a/src/crypto/cn/CryptoNight_test.h
+++ b/src/crypto/cn/CryptoNight_test.h
@ -466,6 +466,29 @@ const static uint8_t astrobwt_dero_test_out[256] = {
 #endif


+#ifdef XMRIG_ALGO_GHOSTRIDER
+// "GhostRider"
+const static uint8_t test_output_gr[256] = {
+    0x42, 0x17, 0x0C, 0xC1, 0x85, 0xE6, 0x76, 0x3C, 0xC7, 0xCB, 0x27, 0xC4, 0x17, 0x39, 0x2D, 0xE2,
+    0x29, 0x6B, 0x40, 0x66, 0x85, 0xA4, 0xE3, 0xD3, 0x8C, 0xE9, 0xA5, 0x8F, 0x10, 0xFC, 0x81, 0xE4,
+    0x90, 0x56, 0xF2, 0x9E, 0x00, 0xD0, 0xF8, 0xA1, 0x88, 0x82, 0x86, 0xC0, 0x86, 0x04, 0x6B, 0x0E,
+    0x9A, 0xDB, 0xDB, 0xFD, 0x23, 0x16, 0x77, 0x94, 0xFE, 0x58, 0x93, 0x05, 0x10, 0x3F, 0x27, 0x75,
+    0x51, 0x44, 0xF3, 0x5F, 0xE2, 0xF9, 0x61, 0xBE, 0xC0, 0x30, 0xB5, 0x8E, 0xB1, 0x1B, 0xA1, 0xF7,
+    0x06, 0x4E, 0xF1, 0x6A, 0xFD, 0xA5, 0x44, 0x8E, 0x64, 0x47, 0x8C, 0x67, 0x51, 0xE2, 0x5C, 0x55,
+    0x3E, 0x39, 0xA6, 0xA5, 0xF7, 0xB8, 0xD0, 0x5E, 0xE2, 0xBF, 0x92, 0x44, 0xD9, 0xAA, 0x76, 0x22,
+    0xE3, 0x3E, 0x15, 0x96, 0xD8, 0x6A, 0x78, 0x2D, 0xA9, 0x77, 0x24, 0x1A, 0x4B, 0xE7, 0x5A, 0x2E,
+    0x89, 0x77, 0xAE, 0x92, 0xE4, 0xA4, 0x2D, 0xAF, 0x0B, 0x27, 0x09, 0xB2, 0x5F, 0x95, 0x61, 0xA9,
+    0xA8, 0xBE, 0x5D, 0x39, 0xBE, 0x41, 0x5F, 0x9C, 0x67, 0x28, 0x48, 0x4F, 0xAE, 0x2A, 0x50, 0x2B,
+    0xB8, 0xC7, 0x42, 0x73, 0x51, 0x60, 0x59, 0xD8, 0x9C, 0xBA, 0x22, 0x2F, 0x8E, 0x34, 0xDE, 0xC8,
+    0x1B, 0xAE, 0x9E, 0xBD, 0xF7, 0xE8, 0xFD, 0x8A, 0x97, 0xBE, 0xF0, 0x47, 0xAC, 0x27, 0xDD, 0x28,
+    0xC9, 0x28, 0xA8, 0x7B, 0x2A, 0xB8, 0x90, 0x3E, 0xCA, 0xB4, 0x78, 0x44, 0xCE, 0xCD, 0x91, 0xEC,
+    0xC2, 0x5A, 0x17, 0x59, 0x7C, 0x14, 0xF8, 0x95, 0x28, 0x14, 0xC3, 0xAD, 0xC4, 0xE1, 0x13, 0x5A,
+    0xC4, 0xA7, 0xC7, 0x77, 0xAD, 0xF8, 0x09, 0x61, 0x16, 0xBB, 0xAA, 0x7E, 0xAB, 0xC3, 0x00, 0x25,
+    0xBA, 0xA8, 0x97, 0xC7, 0x7D, 0x38, 0x46, 0x0E, 0x59, 0xAC, 0xCB, 0xAE, 0xFE, 0x3C, 0x6F, 0x01
+};
+#endif
+
+
 } // namespace xmrig


--- a/src/crypto/cn/CryptoNight_x86.h
+++ b/src/crypto/cn/CryptoNight_x86.h
@ -43,6 +43,11 @@
 #include "crypto/cn/soft_aes.h"


+#ifdef XMRIG_VAES
+#   include "crypto/cn/CryptoNight_x86_vaes.h"
+#endif
+
+
 extern "C"
 {
 #include "crypto/cn/c_groestl.h"
@ -289,6 +294,13 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
 {
    constexpr CnAlgo<ALGO> props;

+#   ifdef XMRIG_VAES
+    if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_explode_scratchpad_vaes<ALGO>(ctx);
+        return;
+    }
+#   endif
+
    constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);

    __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
@ -341,7 +353,7 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
    constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
    constexpr int prefetch_dist = 2048 / sizeof(__m128i);

-    __m128i* e = output + N - prefetch_dist;
+    __m128i* e = output + (N << interleave) - prefetch_dist;
    __m128i* prefetch_ptr = output + prefetch_dist;

    for (int i = 0; i < 2; ++i) {
@ -396,6 +408,13 @@ static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
 {
    constexpr CnAlgo<ALGO> props;

+#   ifdef XMRIG_VAES
+    if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_implode_scratchpad_vaes<ALGO>(ctx);
+        return;
+    }
+#   endif
+
 #   ifdef XMRIG_ALGO_CN_GPU
    constexpr bool IS_HEAVY = props.isHeavy() || ALGO == Algorithm::CN_GPU;
 #   else
@ -1070,8 +1089,17 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
        ctx[0]->first_half = true;
        ctx[1]->first_half = true;
    }
-    cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
-    cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
+
+#   ifdef XMRIG_VAES
+    if (!props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
+    }
+    else
+#   endif
+    {
+        cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
+        cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
+    }

    if (ALGO == Algorithm::CN_2) {
        cnv2_double_mainloop_sandybridge_asm(ctx);
@ -1110,8 +1138,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
        ctx[0]->generated_code(ctx);
    }

-    cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
-    cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
+#   ifdef XMRIG_VAES
+    if (!props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
+    }
+    else
+#   endif
+    {
+        cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
+        cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
+    }

    keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
    keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
@ -1166,8 +1202,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
        ctx[0]->first_half = true;
        ctx[1]->first_half = true;
    }
-    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
-    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+
+#   ifdef XMRIG_VAES
+    if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
+    }
+    else
+#   endif
+    {
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+    }

    uint64_t al0 = h0[0] ^ h0[4];
    uint64_t al1 = h1[0] ^ h1[4];
@ -1362,8 +1407,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
        bx10 = cx1;
    }

-    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
-    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+#   ifdef XMRIG_VAES
+    if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
+    }
+    else
+#   endif
+    {
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+    }

    keccakf(h0, 24);
    keccakf(h1, 24);
@ -1424,10 +1477,19 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
        ctx[3]->first_half = true;
    }

-    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
-    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
-    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
-    cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
+#   ifdef XMRIG_VAES
+    if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
+        cn_explode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
+    }
+    else
+#   endif
+    {
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
+    }

    uint64_t al0 = h0[0] ^ h0[4];
    uint64_t al1 = h1[0] ^ h1[4];
@ -1548,10 +1610,19 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
        if (!SOFT_AES) cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
    }

-    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
-    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
-    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
-    cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
+#   ifdef XMRIG_VAES
+    if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
+        cn_implode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
+    }
+    else
+#   endif
+    {
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
+    }

    keccakf(h0, 24);
    keccakf(h1, 24);
@ -1788,7 +1859,20 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
        if (props.half_mem()) {
            ctx[i]->first_half = true;
        }
-        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
+    }
+
+#   ifdef XMRIG_VAES
+    if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_explode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
+        cn_explode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
+    }
+    else
+#   endif
+    {
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
+        cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
    }

    uint8_t* l0  = ctx[0]->memory;
@ -1840,8 +1924,21 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
        CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
    }

+#   ifdef XMRIG_VAES
+    if (!SOFT_AES && !props.isHeavy() && Cpu::info()->hasVAES()) {
+        cn_implode_scratchpad_vaes_double<ALGO>(ctx[0], ctx[1]);
+        cn_implode_scratchpad_vaes_double<ALGO>(ctx[2], ctx[3]);
+    }
+    else
+#   endif
+    {
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
+        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
+    }
+
    for (size_t i = 0; i < 4; i++) {
-        cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
        keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
        extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
    }
--- a/src/crypto/cn/CryptoNight_x86_vaes.cpp
+++ b/src/crypto/cn/CryptoNight_x86_vaes.cpp
@ -0,0 +1,530 @@
+/* XMRig
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
+ * Copyright 2018-2020 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2020 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "CryptoNight_x86_vaes.h"
+#include "CryptoNight_monero.h"
+#include "CryptoNight.h"
+
+
+#ifdef __GNUC__
+#   include <x86intrin.h>
+#if !defined(__clang__) && !defined(__ICC) && __GNUC__ < 10
+static inline __m256i
+__attribute__((__always_inline__))
+  _mm256_loadu2_m128i(const __m128i* const hiaddr, const __m128i* const loaddr)
+{
+    return _mm256_inserti128_si256(
+            _mm256_castsi128_si256(_mm_loadu_si128(loaddr)), _mm_loadu_si128(hiaddr), 1);
+}
+
+static inline void
+__attribute__((__always_inline__))
+  _mm256_storeu2_m128i(__m128i* const hiaddr, __m128i* const loaddr, const __m256i a)
+{
+    _mm_storeu_si128(loaddr, _mm256_castsi256_si128(a));
+      _mm_storeu_si128(hiaddr, _mm256_extracti128_si256(a, 1));
+}
+#endif
+#else
+#   include <intrin.h>
+#endif
+
+
+// This will shift and xor tmp1 into itself as 4 32-bit vals such as
+// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
+static FORCEINLINE __m128i sl_xor(__m128i tmp1)
+{
+    __m128i tmp4;
+    tmp4 = _mm_slli_si128(tmp1, 0x04);
+    tmp1 = _mm_xor_si128(tmp1, tmp4);
+    tmp4 = _mm_slli_si128(tmp4, 0x04);
+    tmp1 = _mm_xor_si128(tmp1, tmp4);
+    tmp4 = _mm_slli_si128(tmp4, 0x04);
+    tmp1 = _mm_xor_si128(tmp1, tmp4);
+    return tmp1;
+}
+
+
+template<uint8_t rcon>
+static FORCEINLINE void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
+{
+    __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
+    xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
+    *xout0 = sl_xor(*xout0);
+    *xout0 = _mm_xor_si128(*xout0, xout1);
+    xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);
+    xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
+    *xout2 = sl_xor(*xout2);
+    *xout2 = _mm_xor_si128(*xout2, xout1);
+}
+
+
+static NOINLINE void vaes_genkey(const __m128i* memory, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
+{
+    __m128i xout0 = _mm_load_si128(memory);
+    __m128i xout2 = _mm_load_si128(memory + 1);
+    *k0 = _mm256_set_m128i(xout0, xout0);
+    *k1 = _mm256_set_m128i(xout2, xout2);
+
+    aes_genkey_sub<0x01>(&xout0, &xout2);
+    *k2 = _mm256_set_m128i(xout0, xout0);
+    *k3 = _mm256_set_m128i(xout2, xout2);
+
+    aes_genkey_sub<0x02>(&xout0, &xout2);
+    *k4 = _mm256_set_m128i(xout0, xout0);
+    *k5 = _mm256_set_m128i(xout2, xout2);
+
+    aes_genkey_sub<0x04>(&xout0, &xout2);
+    *k6 = _mm256_set_m128i(xout0, xout0);
+    *k7 = _mm256_set_m128i(xout2, xout2);
+
+    aes_genkey_sub<0x08>(&xout0, &xout2);
+    *k8 = _mm256_set_m128i(xout0, xout0);
+    *k9 = _mm256_set_m128i(xout2, xout2);
+}
+
+
+static NOINLINE void vaes_genkey_double(const __m128i* memory1, const __m128i* memory2, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
+{
+    __m128i xout0 = _mm_load_si128(memory1);
+    __m128i xout1 = _mm_load_si128(memory1 + 1);
+    __m128i xout2 = _mm_load_si128(memory2);
+    __m128i xout3 = _mm_load_si128(memory2 + 1);
+    *k0 = _mm256_set_m128i(xout2, xout0);
+    *k1 = _mm256_set_m128i(xout3, xout1);
+
+    aes_genkey_sub<0x01>(&xout0, &xout1);
+    aes_genkey_sub<0x01>(&xout2, &xout3);
+    *k2 = _mm256_set_m128i(xout2, xout0);
+    *k3 = _mm256_set_m128i(xout3, xout1);
+
+    aes_genkey_sub<0x02>(&xout0, &xout1);
+    aes_genkey_sub<0x02>(&xout2, &xout3);
+    *k4 = _mm256_set_m128i(xout2, xout0);
+    *k5 = _mm256_set_m128i(xout3, xout1);
+
+    aes_genkey_sub<0x04>(&xout0, &xout1);
+    aes_genkey_sub<0x04>(&xout2, &xout3);
+    *k6 = _mm256_set_m128i(xout2, xout0);
+    *k7 = _mm256_set_m128i(xout3, xout1);
+
+    aes_genkey_sub<0x08>(&xout0, &xout1);
+    aes_genkey_sub<0x08>(&xout2, &xout3);
+    *k8 = _mm256_set_m128i(xout2, xout0);
+    *k9 = _mm256_set_m128i(xout3, xout1);
+}
+
+
+static FORCEINLINE void vaes_round(__m256i key, __m256i& x01, __m256i& x23, __m256i& x45, __m256i& x67)
+{
+    x01 = _mm256_aesenc_epi128(x01, key);
+    x23 = _mm256_aesenc_epi128(x23, key);
+    x45 = _mm256_aesenc_epi128(x45, key);
+    x67 = _mm256_aesenc_epi128(x67, key);
+}
+
+
+static FORCEINLINE void vaes_round(__m256i key, __m256i& x0, __m256i& x1, __m256i& x2, __m256i& x3, __m256i& x4, __m256i& x5, __m256i& x6, __m256i& x7)
+{
+    x0 = _mm256_aesenc_epi128(x0, key);
+    x1 = _mm256_aesenc_epi128(x1, key);
+    x2 = _mm256_aesenc_epi128(x2, key);
+    x3 = _mm256_aesenc_epi128(x3, key);
+    x4 = _mm256_aesenc_epi128(x4, key);
+    x5 = _mm256_aesenc_epi128(x5, key);
+    x6 = _mm256_aesenc_epi128(x6, key);
+    x7 = _mm256_aesenc_epi128(x7, key);
+}
+
+
+namespace xmrig {
+
+
+template<Algorithm::Id ALGO>
+NOINLINE void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx)
+{
+    constexpr CnAlgo<ALGO> props;
+
+    constexpr size_t N = (props.memory() / sizeof(__m256i)) / (props.half_mem() ? 2 : 1);
+
+    __m256i xin01, xin23, xin45, xin67;
+    __m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
+
+    const __m128i* input = reinterpret_cast<const __m128i*>(ctx->state);
+    __m256i* output = reinterpret_cast<__m256i*>(ctx->memory);
+
+    vaes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
+
+    if (props.half_mem() && !ctx->first_half) {
+        const __m256i* p = reinterpret_cast<const __m256i*>(ctx->save_state);
+        xin01 = _mm256_load_si256(p + 0);
+        xin23 = _mm256_load_si256(p + 1);
+        xin45 = _mm256_load_si256(p + 2);
+        xin67 = _mm256_load_si256(p + 3);
+    }
+    else {
+        xin01 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 4));
+        xin23 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 6));
+        xin45 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 8));
+        xin67 = _mm256_load_si256(reinterpret_cast<const __m256i*>(input + 10));
+    }
+
+    constexpr int output_increment = 64 / sizeof(__m256i);
+    constexpr int prefetch_dist = 2048 / sizeof(__m256i);
+
+    __m256i* e = output + N - prefetch_dist;
+    __m256i* prefetch_ptr = output + prefetch_dist;
+
+    for (int i = 0; i < 2; ++i) {
+        do {
+            _mm_prefetch((const char*)(prefetch_ptr), _MM_HINT_T0);
+            _mm_prefetch((const char*)(prefetch_ptr + output_increment), _MM_HINT_T0);
+
+            vaes_round(k0, xin01, xin23, xin45, xin67);
+            vaes_round(k1, xin01, xin23, xin45, xin67);
+            vaes_round(k2, xin01, xin23, xin45, xin67);
+            vaes_round(k3, xin01, xin23, xin45, xin67);
+            vaes_round(k4, xin01, xin23, xin45, xin67);
+            vaes_round(k5, xin01, xin23, xin45, xin67);
+            vaes_round(k6, xin01, xin23, xin45, xin67);
+            vaes_round(k7, xin01, xin23, xin45, xin67);
+            vaes_round(k8, xin01, xin23, xin45, xin67);
+            vaes_round(k9, xin01, xin23, xin45, xin67);
+
+            _mm256_store_si256(output + 0, xin01);
+            _mm256_store_si256(output + 1, xin23);
+
+            _mm256_store_si256(output + output_increment + 0, xin45);
+            _mm256_store_si256(output + output_increment + 1, xin67);
+
+            output += output_increment * 2;
+            prefetch_ptr += output_increment * 2;
+        } while (output < e);
+        e += prefetch_dist;
+        prefetch_ptr = output;
+    }
+
+    if (props.half_mem() && ctx->first_half) {
+        __m256i* p = reinterpret_cast<__m256i*>(ctx->save_state);
+        _mm256_store_si256(p + 0, xin01);
+        _mm256_store_si256(p + 1, xin23);
+        _mm256_store_si256(p + 2, xin45);
+        _mm256_store_si256(p + 3, xin67);
+    }
+
+    _mm256_zeroupper();
+}
+
+
+template<Algorithm::Id ALGO>
+NOINLINE void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2)
+{
+    constexpr CnAlgo<ALGO> props;
+
+    constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
+
+    __m256i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
+    __m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
+
+    const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->state);
+    const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->state);
+
+    __m128i* output1 = reinterpret_cast<__m128i*>(ctx1->memory);
+    __m128i* output2 = reinterpret_cast<__m128i*>(ctx2->memory);
+
+    vaes_genkey_double(input1, input2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
+
+    {
+        const bool b = props.half_mem() && !ctx1->first_half && !ctx2->first_half;
+        const __m128i* p1 = b ? reinterpret_cast<const __m128i*>(ctx1->save_state) : (input1 + 4);
+        const __m128i* p2 = b ? reinterpret_cast<const __m128i*>(ctx2->save_state) : (input2 + 4);
+        xin0 = _mm256_loadu2_m128i(p2 + 0, p1 + 0);
+        xin1 = _mm256_loadu2_m128i(p2 + 1, p1 + 1);
+        xin2 = _mm256_loadu2_m128i(p2 + 2, p1 + 2);
+        xin3 = _mm256_loadu2_m128i(p2 + 3, p1 + 3);
+        xin4 = _mm256_loadu2_m128i(p2 + 4, p1 + 4);
+        xin5 = _mm256_loadu2_m128i(p2 + 5, p1 + 5);
+        xin6 = _mm256_loadu2_m128i(p2 + 6, p1 + 6);
+        xin7 = _mm256_loadu2_m128i(p2 + 7, p1 + 7);
+    }
+
+    constexpr int output_increment = 64 / sizeof(__m128i);
+    constexpr int prefetch_dist = 2048 / sizeof(__m128i);
+
+    __m128i* e = output1 + N - prefetch_dist;
+    __m128i* prefetch_ptr1 = output1 + prefetch_dist;
+    __m128i* prefetch_ptr2 = output2 + prefetch_dist;
+
+    for (int i = 0; i < 2; ++i) {
+        do {
+            _mm_prefetch((const char*)(prefetch_ptr1), _MM_HINT_T0);
+            _mm_prefetch((const char*)(prefetch_ptr1 + output_increment), _MM_HINT_T0);
+            _mm_prefetch((const char*)(prefetch_ptr2), _MM_HINT_T0);
+            _mm_prefetch((const char*)(prefetch_ptr2 + output_increment), _MM_HINT_T0);
+
+            vaes_round(k0, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+            vaes_round(k1, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+            vaes_round(k2, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+            vaes_round(k3, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+            vaes_round(k4, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+            vaes_round(k5, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+            vaes_round(k6, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+            vaes_round(k7, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+            vaes_round(k8, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+            vaes_round(k9, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
+
+            _mm256_storeu2_m128i(output2 + 0, output1 + 0, xin0);
+            _mm256_storeu2_m128i(output2 + 1, output1 + 1, xin1);
+            _mm256_storeu2_m128i(output2 + 2, output1 + 2, xin2);
+            _mm256_storeu2_m128i(output2 + 3, output1 + 3, xin3);
+
+            _mm256_storeu2_m128i(output2 + output_increment + 0, output1 + output_increment + 0, xin4);
+            _mm256_storeu2_m128i(output2 + output_increment + 1, output1 + output_increment + 1, xin5);
+            _mm256_storeu2_m128i(output2 + output_increment + 2, output1 + output_increment + 2, xin6);
+            _mm256_storeu2_m128i(output2 + output_increment + 3, output1 + output_increment + 3, xin7);
+
+            output1 += output_increment * 2;
+            prefetch_ptr1 += output_increment * 2;
+            output2 += output_increment * 2;
+            prefetch_ptr2 += output_increment * 2;
+        } while (output1 < e);
+        e += prefetch_dist;
+        prefetch_ptr1 = output1;
+        prefetch_ptr2 = output2;
+    }
+
+    if (props.half_mem() && ctx1->first_half && ctx2->first_half) {
+        __m128i* p1 = reinterpret_cast<__m128i*>(ctx1->save_state);
+        __m128i* p2 = reinterpret_cast<__m128i*>(ctx2->save_state);
+        _mm256_storeu2_m128i(p2 + 0, p1 + 0, xin0);
+        _mm256_storeu2_m128i(p2 + 1, p1 + 1, xin1);
+        _mm256_storeu2_m128i(p2 + 2, p1 + 2, xin2);
+        _mm256_storeu2_m128i(p2 + 3, p1 + 3, xin3);
+        _mm256_storeu2_m128i(p2 + 4, p1 + 4, xin4);
+        _mm256_storeu2_m128i(p2 + 5, p1 + 5, xin5);
+        _mm256_storeu2_m128i(p2 + 6, p1 + 6, xin6);
+        _mm256_storeu2_m128i(p2 + 7, p1 + 7, xin7);
+    }
+
+    _mm256_zeroupper();
+}
+
+
+template<Algorithm::Id ALGO>
+NOINLINE void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx)
+{
+    constexpr CnAlgo<ALGO> props;
+
+    constexpr size_t N = (props.memory() / sizeof(__m256i)) / (props.half_mem() ? 2 : 1);
+
+    __m256i xout01, xout23, xout45, xout67;
+    __m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
+
+    const __m256i* input = reinterpret_cast<const __m256i*>(ctx->memory);
+    __m256i* output = reinterpret_cast<__m256i*>(ctx->state);
+
+    vaes_genkey(reinterpret_cast<__m128i*>(output) + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
+
+    xout01 = _mm256_load_si256(output + 2);
+    xout23 = _mm256_load_si256(output + 3);
+    xout45 = _mm256_load_si256(output + 4);
+    xout67 = _mm256_load_si256(output + 5);
+
+    const __m256i* input_begin = input;
+    for (size_t part = 0; part < (props.half_mem() ? 2 : 1); ++part) {
+        if (props.half_mem() && (part == 1)) {
+            input = input_begin;
+            ctx->first_half = false;
+            cn_explode_scratchpad_vaes<ALGO>(ctx);
+        }
+
+        for (size_t i = 0; i < N;) {
+            xout01 = _mm256_xor_si256(xout01, input[0]);
+            xout23 = _mm256_xor_si256(xout23, input[1]);
+
+            constexpr int input_increment = 64 / sizeof(__m256i);
+
+            xout45 = _mm256_xor_si256(xout45, input[input_increment]);
+            xout67 = _mm256_xor_si256(xout67, input[input_increment + 1]);
+
+            input += input_increment * 2;
+            i += 4;
+
+            if (i < N) {
+                _mm_prefetch((const char*)(input), _MM_HINT_T0);
+                _mm_prefetch((const char*)(input + input_increment), _MM_HINT_T0);
+            }
+
+            vaes_round(k0, xout01, xout23, xout45, xout67);
+            vaes_round(k1, xout01, xout23, xout45, xout67);
+            vaes_round(k2, xout01, xout23, xout45, xout67);
+            vaes_round(k3, xout01, xout23, xout45, xout67);
+            vaes_round(k4, xout01, xout23, xout45, xout67);
+            vaes_round(k5, xout01, xout23, xout45, xout67);
+            vaes_round(k6, xout01, xout23, xout45, xout67);
+            vaes_round(k7, xout01, xout23, xout45, xout67);
+            vaes_round(k8, xout01, xout23, xout45, xout67);
+            vaes_round(k9, xout01, xout23, xout45, xout67);
+        }
+    }
+
+    _mm256_store_si256(output + 2, xout01);
+    _mm256_store_si256(output + 3, xout23);
+    _mm256_store_si256(output + 4, xout45);
+    _mm256_store_si256(output + 5, xout67);
+
+    _mm256_zeroupper();
+}
+
+
+template<Algorithm::Id ALGO>
+NOINLINE void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2)
+{
+    constexpr CnAlgo<ALGO> props;
+
+    constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
+
+    __m256i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
+    __m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
+
+    const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->memory);
+    const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->memory);
+
+    __m128i* output1 = reinterpret_cast<__m128i*>(ctx1->state);
+    __m128i* output2 = reinterpret_cast<__m128i*>(ctx2->state);
+
+    vaes_genkey_double(output1 + 2, output2 + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
+
+    xout0 = _mm256_loadu2_m128i(output2 + 4, output1 + 4);
+    xout1 = _mm256_loadu2_m128i(output2 + 5, output1 + 5);
+    xout2 = _mm256_loadu2_m128i(output2 + 6, output1 + 6);
+    xout3 = _mm256_loadu2_m128i(output2 + 7, output1 + 7);
+    xout4 = _mm256_loadu2_m128i(output2 + 8, output1 + 8);
+    xout5 = _mm256_loadu2_m128i(output2 + 9, output1 + 9);
+    xout6 = _mm256_loadu2_m128i(output2 + 10, output1 + 10);
+    xout7 = _mm256_loadu2_m128i(output2 + 11, output1 + 11);
+
+    const __m128i* input_begin1 = input1;
+    const __m128i* input_begin2 = input2;
+    for (size_t part = 0; part < (props.half_mem() ? 2 : 1); ++part) {
+        if (props.half_mem() && (part == 1)) {
+            input1 = input_begin1;
+            input2 = input_begin2;
+            ctx1->first_half = false;
+            ctx2->first_half = false;
+            cn_explode_scratchpad_vaes_double<ALGO>(ctx1, ctx2);
+        }
+
+        for (size_t i = 0; i < N;) {
+            xout0 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 0, input1 + 0), xout0);
+            xout1 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 1, input1 + 1), xout1);
+            xout2 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 2, input1 + 2), xout2);
+            xout3 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 3, input1 + 3), xout3);
+
+            constexpr int input_increment = 64 / sizeof(__m128i);
+
+            xout4 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 0, input1 + input_increment + 0), xout4);
+            xout5 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 1, input1 + input_increment + 1), xout5);
+            xout6 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 2, input1 + input_increment + 2), xout6);
+            xout7 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 3, input1 + input_increment + 3), xout7);
+
+            input1 += input_increment * 2;
+            input2 += input_increment * 2;
+            i += 8;
+
+            if (i < N) {
+                _mm_prefetch((const char*)(input1), _MM_HINT_T0);
+                _mm_prefetch((const char*)(input1 + input_increment), _MM_HINT_T0);
+                _mm_prefetch((const char*)(input2), _MM_HINT_T0);
+                _mm_prefetch((const char*)(input2 + input_increment), _MM_HINT_T0);
+            }
+
+            vaes_round(k0, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            vaes_round(k1, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            vaes_round(k2, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            vaes_round(k3, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            vaes_round(k4, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            vaes_round(k5, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            vaes_round(k6, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            vaes_round(k7, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            vaes_round(k8, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+            vaes_round(k9, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
+        }
+    }
+
+    _mm256_storeu2_m128i(output2 + 4, output1 + 4, xout0);
+    _mm256_storeu2_m128i(output2 + 5, output1 + 5, xout1);
+    _mm256_storeu2_m128i(output2 + 6, output1 + 6, xout2);
+    _mm256_storeu2_m128i(output2 + 7, output1 + 7, xout3);
+    _mm256_storeu2_m128i(output2 + 8, output1 + 8, xout4);
+    _mm256_storeu2_m128i(output2 + 9, output1 + 9, xout5);
+    _mm256_storeu2_m128i(output2 + 10, output1 + 10, xout6);
+    _mm256_storeu2_m128i(output2 + 11, output1 + 11, xout7);
+
+    _mm256_zeroupper();
+}
+
+
+template<Algorithm::Id ALGO>
+void VAES_Instance()
+{
+    cn_explode_scratchpad_vaes<ALGO>(nullptr);
+    cn_explode_scratchpad_vaes_double<ALGO>(nullptr, nullptr);
+    cn_implode_scratchpad_vaes<ALGO>(nullptr);
+    cn_implode_scratchpad_vaes_double<ALGO>(nullptr, nullptr);
+}
+
+
+void (*vaes_instances[])() = {
+    VAES_Instance<Algorithm::CN_0>,
+    VAES_Instance<Algorithm::CN_1>,
+    VAES_Instance<Algorithm::CN_2>,
+    VAES_Instance<Algorithm::CN_R>,
+    VAES_Instance<Algorithm::CN_FAST>,
+    VAES_Instance<Algorithm::CN_HALF>,
+    VAES_Instance<Algorithm::CN_XAO>,
+    VAES_Instance<Algorithm::CN_RTO>,
+    VAES_Instance<Algorithm::CN_RWZ>,
+    VAES_Instance<Algorithm::CN_ZLS>,
+    VAES_Instance<Algorithm::CN_DOUBLE>,
+    VAES_Instance<Algorithm::CN_CCX>,
+    VAES_Instance<Algorithm::CN_LITE_0>,
+    VAES_Instance<Algorithm::CN_LITE_1>,
+    VAES_Instance<Algorithm::CN_HEAVY_0>,
+    VAES_Instance<Algorithm::CN_HEAVY_TUBE>,
+    VAES_Instance<Algorithm::CN_HEAVY_XHV>,
+    VAES_Instance<Algorithm::CN_PICO_0>,
+    VAES_Instance<Algorithm::CN_PICO_TLO>,
+    VAES_Instance<Algorithm::CN_UPX2>,
+    VAES_Instance<Algorithm::CN_GR_0>,
+    VAES_Instance<Algorithm::CN_GR_1>,
+    VAES_Instance<Algorithm::CN_GR_2>,
+    VAES_Instance<Algorithm::CN_GR_3>,
+    VAES_Instance<Algorithm::CN_GR_4>,
+    VAES_Instance<Algorithm::CN_GR_5>,
+};
+
+
+} // xmrig
--- a/src/crypto/cn/CryptoNight_x86_vaes.h
+++ b/src/crypto/cn/CryptoNight_x86_vaes.h
@ -0,0 +1,48 @@
+/* XMRig
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
+ * Copyright 2018-2020 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2020 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XMRIG_CRYPTONIGHT_X86_VAES_H
+#define XMRIG_CRYPTONIGHT_X86_VAES_H
+
+
+#include "crypto/cn/CnAlgo.h"
+
+
+struct cryptonight_ctx;
+
+
+namespace xmrig {
+
+
+template<Algorithm::Id ALGO> void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx);
+template<Algorithm::Id ALGO> void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2);
+template<Algorithm::Id ALGO> void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx);
+template<Algorithm::Id ALGO> void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2);
+
+
+} // xmrig
+
+
+#endif /* XMRIG_CRYPTONIGHT_X86_VAES_H */
--- a/src/crypto/cn/c_groestl.c
+++ b/src/crypto/cn/c_groestl.c
@ -4,7 +4,7 @@
 *
 *  This work is based on the implementation of
 *          Soeren S. Thomsen and Krystian Matusiewicz
- *          
+ *
 *
 */

@ -22,7 +22,7 @@ const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
 #define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
 															v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
 															v1 = temp_var;}
-  
+

 #define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t)				\
   tu = T[2*(uint32_t)x[4*c0+0]];			    \
@ -161,11 +161,11 @@ static void F512(uint32_t *h, const uint32_t *m) {

 /* digest up to msglen bytes of input (full blocks only) */
 static void Transform(groestlHashState *ctx,
-	       const uint8_t *input, 
+	       const uint8_t *input,
 	       int msglen) {

  /* digest message, one block at a time */
-  for (; msglen >= SIZE512; 
+  for (; msglen >= SIZE512;
       msglen -= SIZE512, input += SIZE512) {
    F512(ctx->chaining,(uint32_t*)input);

@ -199,7 +199,7 @@ static void OutputTransformation(groestlHashState *ctx) {
 	RND512P((uint8_t*)y, temp, 0x00000009);
 	for (j = 0; j < 2*COLS512; j++) {
 	  ctx->chaining[j] ^= temp[j];
-	}									  
+	}									
 }

 /* initialise context */
@ -313,7 +313,7 @@ static void Final(groestlHashState* ctx,
    ctx->block_counter2 >>= 8;
  }
  /* digest final padding block */
-  Transform(ctx, ctx->buffer, SIZE512); 
+  Transform(ctx, ctx->buffer, SIZE512);
  /* perform output transformation */
  OutputTransformation(ctx);

@ -332,7 +332,7 @@ static void Final(groestlHashState* ctx,
 }

 /* hash bit sequence */
-void groestl(const BitSequence* data, 
+void groestl(const BitSequence* data,
 		DataLength databitlen,
 		BitSequence* hashval) {

--- a/src/crypto/cn/c_groestl.h
+++ b/src/crypto/cn/c_groestl.h
@ -4,10 +4,10 @@
 #include "crypto_uint8.h"
 #include "crypto_uint32.h"
 #include "crypto_uint64.h"
-#include "crypto_hash.h" 
+#include "crypto_hash.h"

-typedef crypto_uint8 uint8_t; 
-typedef crypto_uint32 uint32_t; 
+typedef crypto_uint8 uint8_t;
+typedef crypto_uint32 uint32_t;
 typedef crypto_uint64 uint64_t;
 */
 #include <stdint.h>
--- a/src/crypto/cn/c_skein.c
+++ b/src/crypto/cn/c_skein.c
@ -5,7 +5,7 @@
 ** Source code author: Doug Whiting, 2008.
 **
 ** This algorithm and source code is released to the public domain.
-** 
+**
 ************************************************************************/

 #define  SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
@ -57,7 +57,7 @@ static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);

 /*****************************************************************
 ** "Internal" Skein definitions
-**    -- not needed for sequential hashing API, but will be 
+**    -- not needed for sequential hashing API, but will be
 **           helpful for other uses of Skein (e.g., tree hash mode).
 **    -- included here so that they can be shared between
 **           reference and optimized code.
@ -179,11 +179,11 @@ static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
 #define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
 #define Skein_assert(x)
 #elif   defined(SKEIN_ASSERT)
-#include <assert.h>     
-#define Skein_Assert(x,retCode) assert(x) 
-#define Skein_assert(x)         assert(x) 
+#include <assert.h>
+#define Skein_Assert(x,retCode) assert(x)
+#define Skein_assert(x)         assert(x)
 #else
-#include <assert.h>     
+#include <assert.h>
 #define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /*  caller  error */
 #define Skein_assert(x)         assert(x)                     /* internal error */
 #endif
@ -191,8 +191,8 @@ static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
 /*****************************************************************
 ** Skein block function constants (shared across Ref and Opt code)
 ******************************************************************/
-enum    
-{   
+enum
+{
  /* Skein_512 round rotation constants */
  R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
  R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
@ -251,7 +251,7 @@ const u64b_t SKEIN_512_IV_256[] =
 #define BLK_BITS        (WCNT*64)               /* some useful definitions for code here */
 #define KW_TWK_BASE     (0)
 #define KW_KEY_BASE     (3)
-#define ks              (kw + KW_KEY_BASE)                
+#define ks              (kw + KW_KEY_BASE)
 #define ts              (kw + KW_TWK_BASE)

 #ifdef SKEIN_DEBUG
@ -310,7 +310,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s
        ks[5] = ctx->X[5];
        ks[6] = ctx->X[6];
        ks[7] = ctx->X[7];
-        ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ 
+        ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
                ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;

        ts[2] = ts[0] ^ ts[1];
@ -338,7 +338,7 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s
    X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
    X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \

-#if SKEIN_UNROLL_512 == 0                       
+#if SKEIN_UNROLL_512 == 0
 #define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)      /* unrolled */  \
    Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
    Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
@ -469,7 +469,7 @@ static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
        u08b_t  b[SKEIN_512_STATE_BYTES];
        u64b_t  w[SKEIN_512_STATE_WORDS];
        } cfg;                              /* config block */
-        
+
    Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
    ctx->h.hashBitLen = hashBitLen;         /* output hash bit count */

@ -548,7 +548,7 @@ static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msg

    return SKEIN_SUCCESS;
    }
-   
+
 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* finalize the hash computation and output the result */
 static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
@ -562,7 +562,7 @@ static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
        memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);

    Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt);  /* process the final block */
-    
+
    /* now output the result */
    byteCnt = (ctx->h.hashBitLen + 7) >> 3;             /* total number of output bytes */

--- a/src/crypto/cn/c_skein.h
+++ b/src/crypto/cn/c_skein.h
@ -9,7 +9,7 @@
 ** This algorithm and source code is released to the public domain.
 **
 ***************************************************************************
-** 
+**
 ** The following compile-time switches may be defined to control some
 ** tradeoffs between speed, code size, error checking, and security.
 **
@ -20,8 +20,8 @@
 **                            [default: no callouts (no overhead)]
 **
 **  SKEIN_ERR_CHECK        -- how error checking is handled inside Skein
-**                            code. If not defined, most error checking 
-**                            is disabled (for performance). Otherwise, 
+**                            code. If not defined, most error checking
+**                            is disabled (for performance). Otherwise,
 **                            the switch value is interpreted as:
 **                                0: use assert()      to flag errors
 **                                1: return SKEIN_FAIL to flag errors
--- a/src/crypto/cn/soft_aes.h
+++ b/src/crypto/cn/soft_aes.h
@ -124,9 +124,9 @@ static inline __m128i soft_aesenc(__m128i in, __m128i key)

 static inline uint32_t sub_word(uint32_t key)
 {
-    return (saes_sbox[key >> 24 ] << 24)   | 
-        (saes_sbox[(key >> 16) & 0xff] << 16 ) | 
-        (saes_sbox[(key >> 8)  & 0xff] << 8  ) | 
+    return (saes_sbox[key >> 24 ] << 24)   |
+        (saes_sbox[(key >> 16) & 0xff] << 16 ) |
+        (saes_sbox[(key >> 8)  & 0xff] << 8  ) |
         saes_sbox[key & 0xff];
 }

--- a/src/crypto/ghostrider/README.md
+++ b/src/crypto/ghostrider/README.md
@ -18,19 +18,21 @@ xmrig -a gr -o us.flockpool.com:5555 --tls -u WALLET_ADDRESS

 You can use **rtm_ghostrider_example.cmd** as a template and put pool URL and your wallet address there. The general XMRig documentation is available [here](https://xmrig.com/docs/miner).

+**Using `--threads` or `-t` option is NOT recommended because it turns off advanced built-in config.** If you want to tweak the nubmer of threads used for GhostRider, it's recommended to start using config.json instead of command line. The best suitable command line option for this is `--cpu-max-threads-hint=N` where N can be between 0 and 100.
+
 ## Performance

 While individual algorithm implementations are a bit unoptimized, XMRig achieves higher hashrates by employing better auto-config and more fine-grained thread scheduling: it can calculate a single batch of hashes using 2 threads for parts that don't require much cache. For example, on a typical Intel CPU (2 MB cache per core) it will use 1 thread per core for cn/fast, and 2 threads per core for other Cryptonight variants while calculating the same batch of hashes, always achieving more than 50% CPU load.

-For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system. Also, this is why it reports using only half the threads at startup - it's actually 2 threads per each reported thread.
+For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system.

-**Windows** (detailed results [here](https://imgur.com/a/GCjEWpl))
+**Windows** (detailed results [here](https://imgur.com/a/uRU1yO2))
 CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (MSVC build), h/s|Speedup
 -|-|-|-
 AMD Ryzen 7 4700U|632.6|731|+15.5%
 Intel Core i7-2600|496.4|533.6|+7.5%
 AMD Ryzen 7 3700X @ 4.1 GHz|2453.0|2469.1|+0.65%
-AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2221.2|+5.1%
+AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2313.2|+9.5%

 **Linux** (tested by **Delgon**, detailed results [here](https://cdn.discordapp.com/attachments/604375870236524574/913167614749048872/unknown.png))
 CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (GCC build), h/s|Speedup
--- a/src/crypto/ghostrider/aes_helper.c
+++ b/src/crypto/ghostrider/aes_helper.c
@ -18,7 +18,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -26,10 +26,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/ghostrider.cpp
+++ b/src/crypto/ghostrider/ghostrider.cpp
@ -538,7 +538,7 @@ void destroy_helper_thread(HelperThread* t)
 }


-void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper)
+void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose)
 {
    enum { N = 8 };

@ -554,11 +554,13 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
    uint32_t cn_indices[6];
    select_indices(cn_indices, data + 4);

-    static uint32_t prev_indices[3];
-    if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
-        memcpy(prev_indices, cn_indices, sizeof(prev_indices));
-        for (int i = 0; i < 3; ++i) {
-            LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
+    if (verbose) {
+        static uint32_t prev_indices[3];
+        if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
+            memcpy(prev_indices, cn_indices, sizeof(prev_indices));
+            for (int i = 0; i < 3; ++i) {
+                LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
+            }
        }
    }

@ -765,7 +767,7 @@ HelperThread* create_helper_thread(int64_t, const std::vector<int64_t>&) { retur
 void destroy_helper_thread(HelperThread*) {}


-void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*)
+void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*, bool verbose)
 {
    constexpr uint32_t N = 8;

@ -784,11 +786,13 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
    uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
 #endif

-    static uint32_t prev_indices[3];
-    if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
-        memcpy(prev_indices, cn_indices, sizeof(prev_indices));
-        for (int i = 0; i < 3; ++i) {
-            LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
+    if (verbose) {
+        static uint32_t prev_indices[3];
+        if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
+            memcpy(prev_indices, cn_indices, sizeof(prev_indices));
+            for (int i = 0; i < 3; ++i) {
+                LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
+            }
        }
    }

--- a/src/crypto/ghostrider/ghostrider.h
+++ b/src/crypto/ghostrider/ghostrider.h
@ -41,7 +41,7 @@ struct HelperThread;
 void benchmark();
 HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities);
 void destroy_helper_thread(HelperThread* t);
-void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper);
+void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose = true);


 } // namespace ghostrider
--- a/src/crypto/ghostrider/md_helper.c
+++ b/src/crypto/ghostrider/md_helper.c
@ -51,7 +51,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -59,10 +59,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_blake.h
+++ b/src/crypto/ghostrider/sph_blake.h
@ -8,7 +8,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -16,10 +16,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_bmw.h
+++ b/src/crypto/ghostrider/sph_bmw.h
@ -7,7 +7,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_cubehash.c
+++ b/src/crypto/ghostrider/sph_cubehash.c
@ -5,7 +5,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_cubehash.h
+++ b/src/crypto/ghostrider/sph_cubehash.h
@ -8,7 +8,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -16,10 +16,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_echo.c
+++ b/src/crypto/ghostrider/sph_echo.c
@ -5,7 +5,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -1029,4 +1029,4 @@ sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
 }
 #ifdef __cplusplus
 }
-#endif 
+#endif
--- a/src/crypto/ghostrider/sph_echo.h
+++ b/src/crypto/ghostrider/sph_echo.h
@ -7,7 +7,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -312,7 +312,7 @@ void sph_echo512_close(void *cc, void *dst);
 */
 void sph_echo512_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);
-	
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/crypto/ghostrider/sph_groestl.c
+++ b/src/crypto/ghostrider/sph_groestl.c
@ -5,7 +5,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_jh.h
+++ b/src/crypto/ghostrider/sph_jh.h
@ -7,7 +7,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_luffa.c
+++ b/src/crypto/ghostrider/sph_luffa.c
@ -5,7 +5,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_luffa.h
+++ b/src/crypto/ghostrider/sph_luffa.h
@ -7,7 +7,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -288,9 +288,9 @@ void sph_luffa512_close(void *cc, void *dst);
 */
 void sph_luffa512_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);
-	
+
 #ifdef __cplusplus
 }
 #endif
-	
+
 #endif
--- a/src/crypto/ghostrider/sph_sha2.c
+++ b/src/crypto/ghostrider/sph_sha2.c
@ -692,9 +692,9 @@ memcpy( state_out, state_in, 32 );
 #undef SHA2_IN
 }

-void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data, 
+void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
                              const uint32_t *state_in )
-{  
+{
 memcpy( state_out, state_in, 32 );
 #define SHA2_IN(x)   sph_dec32be_aligned( data+(x) )
   SHA2_ROUND_BODY( SHA2_IN, state_out );
@ -775,7 +775,7 @@ void sph_sha256_full( void *dst, const void *data, size_t len )
   sph_sha256_init( &cc );
   sph_sha256( &cc, data, len );
   sph_sha256_close( &cc, dst );
-}   
+}

 void sha256d(void* hash, const void* data, int len)
 {
--- a/src/crypto/ghostrider/sph_sha2.h
+++ b/src/crypto/ghostrider/sph_sha2.h
@ -11,7 +11,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -19,10 +19,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_shavite.c
+++ b/src/crypto/ghostrider/sph_shavite.c
@ -5,7 +5,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -99,7 +99,7 @@ static const sph_u32 IV512[] = {
 /*
 * This is the code needed to match the "reference implementation" as
 * published on Nov 23rd, 2009, instead of the published specification.
- * 
+ *

 #define AES_BIG_ENDIAN   1
 #include "aes_helper.c"
--- a/src/crypto/ghostrider/sph_shavite.h
+++ b/src/crypto/ghostrider/sph_shavite.h
@ -9,7 +9,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -17,10 +17,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@ -306,9 +306,9 @@ void sph_shavite512_close(void *cc, void *dst);
 */
 void sph_shavite512_addbits_and_close(
 	void *cc, unsigned ub, unsigned n, void *dst);
-	
+
 #ifdef __cplusplus
 }
-#endif	
-	
+#endif
+
 #endif
--- a/src/crypto/ghostrider/sph_simd.c
+++ b/src/crypto/ghostrider/sph_simd.c
@ -5,7 +5,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_simd.h
+++ b/src/crypto/ghostrider/sph_simd.h
@ -7,7 +7,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -15,10 +15,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_skein.c
+++ b/src/crypto/ghostrider/sph_skein.c
@ -5,7 +5,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -13,10 +13,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_skein.h
+++ b/src/crypto/ghostrider/sph_skein.h
@ -12,7 +12,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -20,10 +20,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/ghostrider/sph_types.h
+++ b/src/crypto/ghostrider/sph_types.h
@ -18,7 +18,7 @@
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2007-2010  Projet RNRT SAPHIR
- * 
+ *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
@ -26,10 +26,10 @@
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- * 
+ *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
- * 
+ *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/src/crypto/kawpow/KPHash.cpp
+++ b/src/crypto/kawpow/KPHash.cpp
@ -166,9 +166,9 @@ static inline uint32_t popcount_soft(uint64_t x)
    constexpr uint64_t h01 = 0x0101010101010101ull;

    x -= (x >> 1) & m1;             //put count of each 2 bits into those 2 bits
-    x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits 
-    x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits 
-    return (x * h01) >> 56;         //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... 
+    x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
+    x = (x + (x >> 4)) & m4;        //put count of each 8 bits into those 8 bits
+    return (x * h01) >> 56;         //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
 }


--- a/src/crypto/randomx/jit_compiler_x86_static.S
+++ b/src/crypto/randomx/jit_compiler_x86_static.S
@ -180,7 +180,7 @@ init_block_loop:
 	prefetchw byte ptr [rsi]
 	mov rbx, rbp
 	.byte 232 ;# 0xE8 = call
-	;# .set CALL_LOC, 
+	;# .set CALL_LOC,
 	.int 32768 - (call_offset - DECL(randomx_dataset_init))
 call_offset:
 	mov qword ptr [rsi+0], r8
--- a/src/crypto/randomx/superscalar.cpp
+++ b/src/crypto/randomx/superscalar.cpp
@ -231,7 +231,7 @@ namespace randomx {
 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
-	
+
 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP");

 	//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
@ -494,7 +494,7 @@ namespace randomx {
 			// * value must be ready at the required cycle
 			// * cannot be the same as the source register unless the instruction allows it
 			//   - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
-			// * register cannot be multiplied twice in a row unless allowChainedMul is true 
+			// * register cannot be multiplied twice in a row unless allowChainedMul is true
 			//   - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
 			//   - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
 			// * either the last instruction applied to the register or its source must be different than this instruction
@ -619,7 +619,7 @@ namespace randomx {
 			if (commit)
 				if (trace) std::cout << "; (eliminated)" << std::endl;
 			return cycle;
-		} 
+		}
 		else if (mop.isSimple()) {
 			//this macro-op has only one uOP
 			return scheduleUop<commit>(mop.getUop1(), portBusy, cycle);
@ -676,7 +676,7 @@ namespace randomx {
 			if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;

 			int bufferIndex = 0;
-			
+
 			//fill all instruction slots in the current decode buffer
 			while (bufferIndex < decodeBuffer->getSize()) {
 				int topCycle = cycle;
@ -831,7 +831,7 @@ namespace randomx {
 		prog.decodeCycles = decodeCycle;
 		prog.ipc = ipc;
 		prog.mulCount = mulCount;
-		
+

 		/*if(INFO) std::cout << "; ALU port utilization:" << std::endl;
 		if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;