diff --git a/CHANGELOG.md b/CHANGELOG.md
index 18acfca9..7f0966ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+# 1.9.0
+- Integrated Monero CN-R variant so called CNv4, aka CN-R, aka CNv5, aka Cryptonight-R #233 (algo: "cryptonight", variant: "r")
+- Integrated Wownero CN-R variant #233 (algo: "cryptonight", variant: "wow")
+- Integrated Graft variant (algo: "cryptonight", variant: "rwz" OR variant: "graft")
+- Integrated X-Cash variant #234 (algo: "cryptonight", variant: "double" OR variant: "heavyx" OR variant: "xcash")
+- Integrated Zelerius variant (algo: "cryptonight", variant: "zls" OR variant: "zelerius")
+- Add miner version column to the Dashboard (version turns red when its outdated)
+- Fixed crash when remote logging is disabled
 # 1.8.13
 - Integrated HOSP variant (algo: "cryptonight", variant: "hosp")
 - Added ASM code/optimization for HOSP and RTO on Intel CPUs
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e96f979b..757b26b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,6 +54,7 @@ set(SOURCES_CRYPTO
     src/crypto/c_jh.c
     src/crypto/c_skein.c
     src/crypto/CryptoNight.cpp
+    src/crypto/CryptoNightR_gen.cpp
    )
 
 set(SOURCES_COMMON
@@ -131,7 +132,7 @@ if (WIN32)
     add_definitions(-DBOOST_ALL_NO_LIB)
 endif(WIN32)
 
-find_package(Boost 1.63.0 COMPONENTS system REQUIRED)
+find_package(Boost 1.62.0 COMPONENTS system REQUIRED)
 
 include(cmake/flags.cmake)
 
diff --git a/cmake/asm.cmake b/cmake/asm.cmake
index abd4030c..b5067939 100644
--- a/cmake/asm.cmake
+++ b/cmake/asm.cmake
@@ -56,6 +56,40 @@ configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/
 configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_fastv2_sandybridge.inc")
 configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_fastv2_soft_aes_sandybridge.inc")
 
+# CN XCASH
+set(ALGO "xcash")
+set(ITERATIONS "1048576") #0x100000
+set(MASK "2097136") #0x1FFFF0
+
+configure_file("src/crypto/asm/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_ivybridge.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_bulldozer.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_ryzen.inc")
+configure_file("src/crypto/asm/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/cnv2_double_main_loop_xcash_sandybridge.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_soft_aes_sandybridge.inc")
+
+configure_file("src/crypto/asm/win/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_ivybridge.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_bulldozer.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_ryzen.inc")
+configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_xcash_sandybridge.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_soft_aes_sandybridge.inc")
+
+# CN ZELERIUS
+set(ALGO "zelerius")
+set(ITERATIONS "393216") #0x60000
+set(MASK "2097136") #0x1FFFF0
+
+configure_file("src/crypto/asm/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_ivybridge.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_bulldozer.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_ryzen.inc")
+configure_file("src/crypto/asm/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/cnv2_double_main_loop_zelerius_sandybridge.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_soft_aes_sandybridge.inc")
+
+configure_file("src/crypto/asm/win/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_ivybridge.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_bulldozer.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_ryzen.inc")
+configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_zelerius_sandybridge.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_soft_aes_sandybridge.inc")
+
 # CN LITE
 
 set(ALGO "lite")
@@ -99,16 +133,19 @@ configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "
 
 if (CMAKE_C_COMPILER_ID MATCHES MSVC)
     enable_language(ASM_MASM)
-    set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop.asm")
+    set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop.asm"
+                       "src/crypto/asm/win/CryptonightR_template.asm")
     set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
     include_directories(${CMAKE_BINARY_DIR}/src/crypto/asm/win)
 else()
     enable_language(ASM)
 
     if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
-        set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop_win_gcc.S")
+        set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop_win_gcc.S"
+                           "src/crypto/asm/win/CryptonightR_template.S")
     else()
-        set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop.S")
+        set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop.S"
+                           "src/crypto/asm/CryptonightR_template.S")
     endif()
 
     set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
diff --git a/index.html b/index.html
index 9a8d953c..ec011264 100644
--- a/index.html
+++ b/index.html
@@ -65,6 +65,9 @@
     var currentServerTime = 0;
     var clockDrift = 0;
 
+    var latestVersion = 0;
+    var currentVersion = 0;
+
     $.fn.dataTable.ext.search.push(
         function( settings, data, dataIndex ) {
 
@@ -108,6 +111,7 @@
                     orderable: false
                 },
                 {data: "client_status.client_id", render: clientInfo},
+                {data: "client_status.version", render: version},
                 {data: "client_status.current_pool"},
                 {data: "client_status.current_status", render: clientStatus},
                 {data: "client_status.current_algo_name", render: algoAndPowVariantName},
@@ -674,6 +678,16 @@
         }
     }
 
+    function version( data, type, row ) {
+        var clientVersion = parseInt(row.client_status.version.split('.').join(""));
+
+        if (latestVersion > clientVersion) {
+            return '<span data-toggle="tooltip" title="Outdated"><div class="offline">' + data + '</div></span>';
+        } else {
+            return data;
+        }
+    }
+
     function clientStatus( data, type, row ) {
         var lastStatus = row.client_status.last_status_update * 1000;
 
@@ -822,6 +836,7 @@
         <tr>
             <th class="center" width="2%"><i class="fa fa-square-o" id="selectAllTop"></i></th>
             <th>Miner Id</th>
+            <th>Version</th>
             <th>Pool</th>
             <th>Status</th>
             <th>Algo / PoW</th>
@@ -861,6 +876,7 @@
             <th></th>
             <th></th>
             <th></th>
+            <th></th>
         </tr>
         </tfoot>
     </table>
diff --git a/src/App.cpp b/src/App.cpp
index b240431e..45c30757 100644
--- a/src/App.cpp
+++ b/src/App.cpp
@@ -155,10 +155,14 @@ int App::start()
         return EINVAL;
     } else {
         if (Options::i()->colors()) {
-            LOG_INFO(WHITE_BOLD("%s hash self-test... ") GREEN_BOLD("successful") ".", m_options->algoName());
+            LOG_INFO(WHITE_BOLD("%s hash self-test... %s."),
+                m_options->algoName(),
+                Options::i()->skipSelfCheck() ?  YELLOW_BOLD("skipped") : GREEN_BOLD("successful"));
         }
         else {
-            LOG_INFO("%s hash self-test... successful.", m_options->algoName());
+            LOG_INFO("%s hash self-test... %s.",
+                m_options->algoName(),
+                Options::i()->skipSelfCheck() ?  "skipped" : "successful");
         }
     }
 
diff --git a/src/Cpu_arm.cpp b/src/Cpu_arm.cpp
index db6ffa30..7be95170 100644
--- a/src/Cpu_arm.cpp
+++ b/src/Cpu_arm.cpp
@@ -30,7 +30,11 @@
 
 void CpuImpl::initCommon()
 {
-    memcpy(m_brand, "Unknown", 7);
+#   ifdef XMRIG_ARMv8
+    memcpy(m_brand, "ARMv8", 5);
+#   else
+    memcpy(m_brand, "ARMv7", 5);
+#   endif
 
 #   if defined(XMRIG_ARMv8)
     m_flags |= Cpu::X86_64;
diff --git a/src/Mem.cpp b/src/Mem.cpp
index a9a233b4..cd82339c 100644
--- a/src/Mem.cpp
+++ b/src/Mem.cpp
@@ -67,9 +67,17 @@ ScratchPadMem Mem::create(ScratchPad** scratchPads, int threadId)
     allocate(scratchPadMem, m_useHugePages);
 
     for (size_t i = 0; i < getThreadHashFactor(threadId); ++i) {
-        ScratchPad* scratchPad = static_cast<ScratchPad *>(_mm_malloc(sizeof(ScratchPad), 4096));
+        auto* scratchPad = static_cast<ScratchPad *>(_mm_malloc(sizeof(ScratchPad), 4096));
         scratchPad->memory     = scratchPadMem.memory + (i * scratchPadSize);
 
+        auto* p = reinterpret_cast<uint8_t*>(allocateExecutableMemory(0x4000));
+        scratchPad->generated_code  = reinterpret_cast<cn_mainloop_fun_ms_abi>(p);
+        scratchPad->generated_code_double = reinterpret_cast<cn_mainloop_double_fun_ms_abi>(p + 0x2000);
+
+        scratchPad->generated_code_data.variant = PowVariant::LAST_ITEM;
+        scratchPad->generated_code_data.height = (uint64_t)(-1);
+        scratchPad->generated_code_double_data = scratchPad->generated_code_data;
+
         scratchPads[i] = scratchPad;
     }
 
diff --git a/src/Mem.h b/src/Mem.h
index 790bdd7e..94f74b22 100644
--- a/src/Mem.h
+++ b/src/Mem.h
@@ -75,6 +75,9 @@ public:
     static ScratchPadMem create(ScratchPad** scratchPads, int threadId);
     static void release(ScratchPad** scratchPads, ScratchPadMem& scratchPadMem, int threadId);
 
+    static void *allocateExecutableMemory(size_t size);
+    static void flushInstructionCache(void *p, size_t size);
+
     static inline size_t hashFactor()         { return m_hashFactor; }
     static inline size_t getThreadHashFactor(int threadId)
     {
diff --git a/src/Mem_unix.cpp b/src/Mem_unix.cpp
index 8acac2fa..53309406 100644
--- a/src/Mem_unix.cpp
+++ b/src/Mem_unix.cpp
@@ -86,3 +86,19 @@ void Mem::release(ScratchPadMem &scratchPadMem)
         _mm_free(scratchPadMem.memory);
     }
 }
+
+void *Mem::allocateExecutableMemory(size_t size)
+{
+#   if defined(__APPLE__)
+    return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
+#   else
+    return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+#   endif
+}
+
+void Mem::flushInstructionCache(void *p, size_t size)
+{
+#   ifndef __FreeBSD__
+    __builtin___clear_cache(reinterpret_cast<char*>(p), reinterpret_cast<char*>(p) + size);
+#   endif
+}
diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp
index 1a8e582d..94ad8e06 100644
--- a/src/Mem_win.cpp
+++ b/src/Mem_win.cpp
@@ -182,4 +182,14 @@ void Mem::release(ScratchPadMem &scratchPadMem)
     else {
         _mm_free(scratchPadMem.memory);
     }
+}
+
+void *Mem::allocateExecutableMemory(size_t size)
+{
+    return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
+}
+
+void Mem::flushInstructionCache(void *p, size_t size)
+{
+    ::FlushInstructionCache(GetCurrentProcess(), p, size);
 }
\ No newline at end of file
diff --git a/src/Options.cpp b/src/Options.cpp
index 87405c4a..3619b293 100644
--- a/src/Options.cpp
+++ b/src/Options.cpp
@@ -73,7 +73,7 @@ Options:\n"
   -k, --keepalive                       send keepalived for prevent timeout (need pool support)\n\
   -r, --retries=N                       number of times to retry before switch to backup server (default: 5)\n\
   -R, --retry-pause=N                   time to pause between retries (default: 5)\n\
-      --pow-variant=V                   specificy the PoW variat to use: -> 'auto' (default), '0' (v0), '1' (v1, aka cnv7), '2' (v2, aka cnv8), 'ipbc' (tube), 'xao', 'xtl' (including autodetect for > v5), 'rto', 'xfh', 'upx', 'turtle', 'hosp'\n\
+      --pow-variant=V                   specificy the PoW variat to use: \n'auto' (default), '0', '1', '2', 'ipbc', 'xao', 'xtl', 'rto', 'xfh', 'upx', 'turtle', 'hosp', 'r', 'wow', 'double (xcash)', 'zls' (zelerius), 'rwz' (graft)\n\
                                         for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations\n\
       --asm-optimization=V              specificy the ASM optimization to use: -> 'auto' (default), 'intel', 'ryzen', 'bulldozer', 'off' \n\
       --multihash-factor=N              number of hash blocks to process at a time (don't set or 0 enables automatic selection of optimal number of hash blocks)\n\
@@ -92,7 +92,8 @@ Options:\n"
       --api-access-token=T              access token for API\n\
       --api-worker-id=ID                custom worker-id for API\n\
       --reboot-cmd                      command/bat to execute to Reboot miner\n\
-      --force-pow-variant               disable pow/variant parsing from pool\n"
+      --force-pow-variant               skip pow/variant parsing from pool\n\
+      --skip-self-check                 skip self check on startup\n"
 # ifndef XMRIG_NO_CC
 "\
       --cc-url=URL                      url of the CC Server\n\
@@ -179,6 +180,7 @@ static struct option const options[] = {
     { "force-pow-variant", 0, nullptr, 1016 },
     { "pow-variant",      1, nullptr, 1017 },
     { "variant",          1, nullptr, 1017 },
+    { "skip-self-check",  0, nullptr, 1018 },
     { "api-port",         1, nullptr, 4000 },
     { "api-access-token", 1, nullptr, 4001 },
     { "api-worker-id",    1, nullptr, 4002 },
@@ -237,6 +239,7 @@ static struct option const config_options[] = {
     { "force-pow-variant", 0, nullptr, 1016 },
     { "pow-variant",   1, nullptr, 1017 },
     { "variant",       1, nullptr, 1017 },
+    { "skip-self-check", 0, nullptr, 1018 },
     { "doublehash-thread-mask",     1, nullptr, 4013 },
     { "multihash-thread-mask",     1, nullptr, 4013 },
     { "asm-optimization", 1, nullptr, 4020 },
@@ -331,7 +334,10 @@ constexpr static const char *pow_variant_names[] = {
         "fast2",
         "upx",
         "turtle",
-        "hosp"
+        "hosp",
+        "wow",
+        "r",
+        "xcash"
 };
 
 constexpr static const char *asm_optimization_names[] = {
@@ -380,6 +386,7 @@ Options::Options(int argc, char **argv) :
     m_ccPushPeriodicStatus(false),
     m_ccPushZeroHashrateMiners(false),
     m_forcePowVariant(false),
+    m_skipSelfCheck(false),
     m_fileName(Platform::defaultConfigName()),
     m_apiToken(nullptr),
     m_apiWorkerId(nullptr),
@@ -643,11 +650,14 @@ bool Options::parseArg(int key, const char *arg)
         return parseBoolean(key, true);
 
     case 1016: /* --force-pow-variant */
-        return parseBoolean(key, false);
+        return parseBoolean(key, true);
 
     case 1017: /* --pow-variant/--variant */
         return parsePowVariant(arg);
 
+    case 1018: /* --skip-self-check */
+        return parseBoolean(key, true);
+
     case 4016: /* --cc-use-tls */
         return parseBoolean(key, true);
 
@@ -912,6 +922,10 @@ bool Options::parseBoolean(int key, bool enable)
         m_forcePowVariant = enable;
         break;
 
+    case 1018: /* --skip-self-check */
+        m_skipSelfCheck = enable;
+        break;
+
     case 2000: /* --colors */
         m_colors = enable;
         break;
@@ -1206,6 +1220,31 @@ bool Options::parsePowVariant(const char *powVariant)
             break;
         }
 
+        if (i == ARRAY_SIZE(pow_variant_names) - 1 && !strcmp(powVariant, "wow")) {
+            m_powVariant = POW_WOW;
+            break;
+        }
+
+        if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "4") || !strcmp(powVariant, "r") || !strcmp(powVariant, "cnv4") || !strcmp(powVariant, "cnv5"))) {
+            m_powVariant = POW_V4;
+            break;
+        }
+
+        if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "xcash") || !strcmp(powVariant, "heavyx") || !strcmp(powVariant, "double"))) {
+            m_powVariant = POW_DOUBLE;
+            break;
+        }
+
+        if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "zelerius") || !strcmp(powVariant, "zls") || !strcmp(powVariant, "zlx"))) {
+            m_powVariant = POW_ZELERIUS;
+            break;
+        }
+
+        if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "rwz") || !strcmp(powVariant, "graft"))) {
+            m_powVariant = POW_RWZ;
+            break;
+        }
+
         if (i == ARRAY_SIZE(pow_variant_names) - 1) {
             showUsage(1);
             return false;
diff --git a/src/Options.h b/src/Options.h
index a02044f6..902eed3d 100644
--- a/src/Options.h
+++ b/src/Options.h
@@ -84,7 +84,8 @@ public:
     inline bool ccPushZeroHashrateMiners() const    { return m_ccPushZeroHashrateMiners; }
     inline bool ccUsePushover() const               { return ccPushoverUser() && ccPushoverToken(); }
     inline bool ccUseTelegram() const               { return ccTelegramBotToken() && ccTelegramChatId(); }
-    inline bool forcePowVariant() const             { return m_forcePowVariant; };
+    inline bool forcePowVariant() const             { return m_forcePowVariant; }
+    inline bool skipSelfCheck() const               { return m_skipSelfCheck; }
     inline const char *fileName() const             { return m_fileName; }
     inline const char *apiToken() const             { return m_apiToken; }
     inline const char *apiWorkerId() const          { return m_apiWorkerId; }
@@ -171,6 +172,7 @@ private:
     bool m_ccPushPeriodicStatus;
     bool m_ccPushZeroHashrateMiners;
     bool m_forcePowVariant;
+    bool m_skipSelfCheck;
     const char* m_fileName;
     char *m_apiToken;
     char *m_apiWorkerId;
diff --git a/src/PowVariant.h b/src/PowVariant.h
index a03fbd22..17ddec11 100644
--- a/src/PowVariant.h
+++ b/src/PowVariant.h
@@ -39,6 +39,11 @@ enum PowVariant
     POW_UPX,
     POW_TURTLE,
     POW_HOSP,
+    POW_WOW,
+    POW_V4,
+    POW_DOUBLE,
+    POW_ZELERIUS,
+    POW_RWZ,
     LAST_ITEM
 };
 
@@ -74,6 +79,16 @@ inline std::string getPowVariantName(PowVariant powVariant)
             return "turtle";
         case POW_HOSP:
             return "hosp";
+        case POW_WOW:
+            return "wow";
+        case POW_V4:
+            return "r";
+        case POW_DOUBLE:
+            return "double";
+        case POW_ZELERIUS:
+            return "zls";
+        case POW_RWZ:
+            return "rwz";
         case POW_AUTODETECT:
         default:
             return "-1";
@@ -149,6 +164,16 @@ inline PowVariant parseVariant(const std::string variant)
         powVariant = PowVariant::POW_TURTLE;
     } else if (variant == "hosp" || variant == "hospital") {
         powVariant = PowVariant::POW_HOSP;
+    } else if (variant == "wow" || variant == "wownero") {
+        powVariant = PowVariant::POW_WOW;
+    } else if (variant == "r" || variant == "4" || variant == "cnv4" || variant == "cnv5") {
+        powVariant = PowVariant::POW_V4;
+    } else if (variant == "xcash" || variant == "heavyx" || variant == "double") {
+        powVariant = PowVariant::POW_DOUBLE;
+    } else if (variant == "zelerius" || variant == "zls" || variant == "zlx") {
+        powVariant = PowVariant::POW_ZELERIUS;
+    } else if (variant == "rwz" || variant == "graft") {
+        powVariant = PowVariant::POW_RWZ;
     }
 
     return powVariant;
diff --git a/src/config.json b/src/config.json
index 50f1f9ca..c7a89a1f 100644
--- a/src/config.json
+++ b/src/config.json
@@ -4,7 +4,7 @@
     "threads": 0,                               // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
     "multihash-factor": 0,                      // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
     "multihash-thread-mask" : null,             // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
-    "pow-variant" : "auto",                     // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy (xao), xtl (including autodetect for > v5), msr, xhv, rto, xfh, upx, turtle, hosp
+    "pow-variant" : "auto",                     // specificy the PoW variat to use: -> auto (default), '0', '1', '2', 'ipbc', 'xao', 'xtl', 'rto', 'xfh', 'upx', 'turtle', 'hosp', 'r', 'wow', 'double (xcash)', 'zls' (zelerius), 'rwz' (graft)
                                                 // for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
     "asm-optimization" : "auto",                // specificy the ASM optimization to use: -> auto (default), intel, ryzen, bulldozer, off
     "background": false,                        // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
@@ -21,6 +21,7 @@
     "syslog": false,                            // use system log for output messages
     "reboot-cmd" : "",                          // command to execute to reboot the OS
     "force-pow-variant" : false,                // force pow variant, dont parse pow/variant from pool job
+    "skip-self-check" : false,                  // skip the self check on startup
     "pools": [
         {
             "url": "donate2.graef.in:80",       // URL of mining server
diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp
index 0040a164..cd4b6699 100644
--- a/src/crypto/CryptoNight.cpp
+++ b/src/crypto/CryptoNight.cpp
@@ -23,6 +23,7 @@
  *   along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <Mem.h>
 #include "crypto/CryptoNight.h"
 
 #if defined(XMRIG_ARM)
@@ -34,282 +35,398 @@
 #include "crypto/CryptoNight_test.h"
 
 template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
 #   if !defined(XMRIG_ARMv7)
-    if (powVersion == PowVariant::POW_V1) {
+    if (variant == PowVariant::POW_V1) {
 #if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
 #else
         if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
         } else {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
         }
 #endif
-    } else if (powVersion == PowVariant::POW_V2) {
+    } else if (variant == PowVariant::POW_V2) {
 #if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
 #else
         if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
             (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
             (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
         } else {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
         }
 #endif
-} else if (powVersion == PowVariant::POW_ALLOY) {
-        CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
-} else if (powVersion == PowVariant::POW_XTL) {
+    } else if (variant == PowVariant::POW_V4) {
 #if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+#else
+        if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2) {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V4, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+        }
+#endif
+    } else if (variant == PowVariant::POW_WOW) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+#else
+        if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2) {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+        }
+#endif
+    } else if (variant == PowVariant::POW_ALLOY) {
+        CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ALLOY, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+    } else if (variant == PowVariant::POW_XTL) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
 #else
         if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+            CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
         } else {
-            CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+            CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
         }
 #endif
-} else if (powVersion == PowVariant::POW_FAST_2) {
+    } else if (variant == PowVariant::POW_FAST_2) {
 #if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
 #else
         if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
             (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
             (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
         } else {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
         }
 #endif
-} else if (powVersion == PowVariant::POW_MSR) {
+    } else if (variant == PowVariant::POW_DOUBLE) {
 #if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+        if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
+            (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
+            (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
+            CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_ZELERIUS) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+        if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
+            (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
+            (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
+            CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_RWZ) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false,POW_RWZ,  NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+        if ((asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2)) {
+            CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_MSR) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
 #else
         if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
         } else {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
         }
 #endif
-} else if (powVersion == PowVariant::POW_RTO  || powVersion == PowVariant::POW_HOSP) {
+    } else if (variant == PowVariant::POW_RTO) {
 #if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
 #else
         if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RTO, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
         } else {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
         }
 #endif
-} else if (powVersion == PowVariant::POW_XFH) {
-    CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
-} else {
-    CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
-}
-#   endif
-}
-
-template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
-    if (powVersion == PowVariant::POW_V1) {
+    } else if (variant == PowVariant::POW_HOSP) {
 #if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
 #else
         if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_HOSP, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
         } else {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
         }
 #endif
-    } else if (powVersion == PowVariant::POW_V2) {
-#if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
-#else
-        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
-        } else {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
-        }
-#endif
-    } else if (powVersion == PowVariant::POW_FAST_2) {
-#if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
-#else
-        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
-        } else {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
-        }
-#endif
-    } else if (powVersion == PowVariant::POW_ALLOY) {
-        CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
-    } else if (powVersion == PowVariant::POW_XTL) {
-#if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-#else
-        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
-        } else {
-            CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-        }
-#endif
-    } else if (powVersion == PowVariant::POW_MSR) {
-#if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-#else
-        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
-        } else {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-        }
-#endif
-    } else if (powVersion == PowVariant::POW_RTO || powVersion == PowVariant::POW_HOSP) {
-#if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
-#else
-        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
-        } else {
-            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
-        }
-#endif
-    } else if (powVersion == PowVariant::POW_XFH) {
-        CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
+    } else if (variant == PowVariant::POW_XFH) {
+        CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
     } else {
-        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
-    }
-}
-
-template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
-#   if !defined(XMRIG_ARMv7)
-    if (powVersion == PowVariant::POW_V1) {
-#if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-#else
-        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
-        } else {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-        }
-#endif
-    } else if (powVersion == PowVariant::POW_TUBE) {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
-    } else if (powVersion == PowVariant::POW_UPX) {
-#if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-#else
-        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
-        } else {
-            CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-        }
-#endif
-    } else {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
     }
 #   endif
 }
 
 template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
-    if (powVersion == PowVariant::POW_V1) {
+static void cryptonight_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+    if (variant == PowVariant::POW_V1) {
 #if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
 #else
         if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
         } else {
-            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
         }
 #endif
-    } else if (powVersion == PowVariant::POW_TUBE) {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
-    } else if (powVersion == PowVariant::POW_UPX) {
+    } else if (variant == PowVariant::POW_V2) {
 #if defined(XMRIG_ARM)
-        CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
 #else
         if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-            CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
         } else {
-            CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
         }
 #endif
+    } else if (variant == PowVariant::POW_V4) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+#else
+        if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V4, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+        }
+#endif
+    } else if (variant == PowVariant::POW_WOW) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_WOW,NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+#else
+        if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+        }
+#endif
+    } else if (variant == PowVariant::POW_FAST_2) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_DOUBLE) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+        CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+        CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_ZELERIUS) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_RWZ) {
+        CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+    } else if (variant == PowVariant::POW_ALLOY) {
+        CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ALLOY, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+    } else if (variant == PowVariant::POW_XTL) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_MSR) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_RTO) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RTO, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_HOSP) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_HOSP, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_XFH) {
+        CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
     } else {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
     }
 }
 
 template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_super_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+#   if !defined(XMRIG_ARMv7)
+    if (variant == PowVariant::POW_V1) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_TUBE) {
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_TUBE, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+    } else if (variant == PowVariant::POW_UPX) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        }
+#endif
+    } else {
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+    }
+#   endif
+}
+
+template <size_t NUM_HASH_BLOCKS>
+static void cryptonight_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+    if (variant == PowVariant::POW_V1) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        }
+#endif
+    } else if (variant == PowVariant::POW_TUBE) {
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_TUBE, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+    } else if (variant == PowVariant::POW_UPX) {
+#if defined(XMRIG_ARM)
+        CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+        if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+            CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+        } else {
+            CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+        }
+#endif
+    } else {
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+    }
+}
+
+template <size_t NUM_HASH_BLOCKS>
+static void cryptonight_super_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
 
 }
 
 template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_super_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_super_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
 
 }
 
 template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_ultra_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_ultra_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
 #   if !defined(XMRIG_ARMv7)
 #if defined(XMRIG_ARM)
-    CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+    CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
 #else
     if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
         (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
         (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
-        CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+        CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
     } else {
-        CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
     }
 #endif
 #   endif
 }
 
 template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_ultra_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_ultra_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
 #if defined(XMRIG_ARM)
-    CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+    CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
 #else
     if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
-        CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+        CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
     } else {
-        CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
     }
 #endif
 }
 
 template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
 #   if !defined(XMRIG_ARMv7)
-    if (powVersion == PowVariant::POW_XHV) {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
+    if (variant == PowVariant::POW_XHV) {
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, POW_XHV, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
     }
-    else if (powVersion == PowVariant::POW_TUBE) {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
+    else if (variant == PowVariant::POW_TUBE) {
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, POW_TUBE, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
     }
     else {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
     }
 #   endif
 }
 
 template <size_t NUM_HASH_BLOCKS>
-static void cryptonight_heavy_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
-    if (powVersion == PowVariant::POW_XHV) {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
+static void cryptonight_heavy_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+    if (variant == PowVariant::POW_XHV) {
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, POW_XHV, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
     }
-    else if (powVersion == PowVariant::POW_TUBE) {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
+    else if (variant == PowVariant::POW_TUBE) {
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, POW_TUBE, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
     }
     else {
-        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
+        CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
     }
 }
 
-void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
+void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
 
 template <size_t HASH_FACTOR>
 void setCryptoNightHashMethods(Options::Algo algo, bool aesni)
@@ -377,15 +494,16 @@ bool CryptoNight::init(int algo, bool aesni)
     }
 
     setCryptoNightHashMethods<MAX_NUM_HASH_BLOCKS>(static_cast<Options::Algo>(algo), aesni);
-    return selfTest(algo);
+
+    return Options::i()->skipSelfCheck() ? true : selfCheck(algo);
 }
 
-void CryptoNight::hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
+void CryptoNight::hash(size_t factor, AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
 {
-    cryptonight_hash_ctx[factor-1](asmOptimization, powVersion, input, size, output, scratchPad);
+    cryptonight_hash_ctx[factor-1](asmOptimization, height, variant, input, size, output, scratchPad);
 }
 
-bool CryptoNight::selfTest(int algo)
+bool CryptoNight::selfCheck(int algo)
 {
     if (cryptonight_hash_ctx[0] == nullptr
     #if MAX_NUM_HASH_BLOCKS > 1
@@ -413,6 +531,14 @@ bool CryptoNight::selfTest(int algo)
         ScratchPad* scratchPad = static_cast<ScratchPad *>(_mm_malloc(sizeof(ScratchPad), 4096));
         scratchPad->memory     = (uint8_t *) _mm_malloc(MEMORY * 6, 16);
 
+        auto* p = reinterpret_cast<uint8_t*>(Mem::allocateExecutableMemory(0x4000));
+        scratchPad->generated_code  = reinterpret_cast<cn_mainloop_fun_ms_abi>(p);
+        scratchPad->generated_code_double = reinterpret_cast<cn_mainloop_double_fun_ms_abi>(p + 0x2000);
+
+        scratchPad->generated_code_data.variant = PowVariant::LAST_ITEM;
+        scratchPad->generated_code_data.height = (uint64_t)(-1);
+        scratchPad->generated_code_double_data = scratchPad->generated_code_data;
+
         scratchPads[i] = scratchPad;
     }
 
@@ -427,129 +553,128 @@ bool CryptoNight::selfTest(int algo)
     if (algo == Options::ALGO_CRYPTONIGHT_HEAVY) {
         // cn-heavy
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 64) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 2
-        cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 96) == 0;
         #endif
 
         // cn-heavy haven
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
         resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
         resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 64) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 2
-        cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
         resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 96) == 0;
         #endif
 
         // cn-heavy bittube
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
         resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
         resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 64) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 2
-        cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
         resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 96) == 0;
         #endif
 
     } else if (algo == Options::ALGO_CRYPTONIGHT_LITE) {
         // cn-lite v0
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_v0_lite, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_v0_lite, 64) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 2
-        cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_v0_lite, 96) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 3
-        cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_v0_lite, 128) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 4
-        cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_v0_lite, 160) == 0;
         #endif
 
         // cn-lite v7 tests
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
-        resultLite = resultLite && memcmp(output,  test_output_v1_lite, 32) == 0;
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        resultLite = resultLite && memcmp(output, test_output_v1_lite, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_v1_lite, 64) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 2
-        cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output,  test_output_v1_lite, 96) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 3
-        cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output,  test_output_v1_lite, 128) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 4
-        cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output,  test_output_v1_lite, 160) == 0;
         #endif
 
-
         // cn-lite ibpc tests
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 64) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 2
-        cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 96) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 3
-        cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 128) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 4
-        cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 160) == 0;
         #endif
 
         // cn-lite upx
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_UPX, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_UPX, test_input, 76, output, scratchPads);
         resultLite = resultLite && memcmp(output,  test_output_upx, 32) == 0;
 
     } else if (algo == Options::ALGO_CRYPTONIGHT_SUPERLITE) {
@@ -559,123 +684,173 @@ bool CryptoNight::selfTest(int algo)
     } else if (algo == Options::ALGO_CRYPTONIGHT_ULTRALITE) {
         // cn ultralite (cnv8 + turtle)
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
         resultUltraLite = resultUltraLite && memcmp(output,  test_output_turtle, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
         resultUltraLite = resultUltraLite && memcmp(output,  test_output_turtle, 64) == 0;
         #endif
     } else {
         // cn v0 aka orignal
-
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0,test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0,test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v0, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v0, 64) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 2
-        cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v0, 96) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 3
-        cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v0, 128) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 4
-        cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v0, 160) == 0;
         #endif
 
         // cn v7 aka cnv1
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v1, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v1, 64) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 2
-        cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v1, 96) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 3
-        cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v1, 128) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 4
-        cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v1, 160) == 0;
         #endif
 
         // cnv7 + xtl
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XTL,test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_XTL,test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_xtl, 32) == 0;
 
         // cnv7 + msr aka cn-fast
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_MSR,test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_MSR,test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_msr, 32) == 0;
 
         // cnv7 + alloy
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_ALLOY,test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_ALLOY,test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_alloy, 32) == 0;
 
         // cnv7 + hosp/rto
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_HOSP,test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_HOSP,test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_hosp, 32) == 0;
 
         // cnv8 aka cnv2
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v2, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v2, 64) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 2
-        cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v2, 96) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 3
-        cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v2, 128) == 0;
         #endif
 
         #if MAX_NUM_HASH_BLOCKS > 4
-        cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_v2, 160) == 0;
         #endif
 
         // cn xfh aka cn-heavy-superfast
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XFH, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_XFH, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_xfh, 32) == 0;
 
         // cnv8 + xtl aka cn-fast2
 
-        cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_xtl_v9, 32) == 0;
 
         #if MAX_NUM_HASH_BLOCKS > 1
-        cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
         result = result && memcmp(output, test_output_xtl_v9, 64) == 0;
         #endif
+
+        // cnv8 + xcash
+
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_DOUBLE, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_xcash, 32) == 0;
+
+        // cnv8 + zelerius
+
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_ZELERIUS, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_zelerius, 32) == 0;
+
+        // cnv8 + rwz
+
+        cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_RWZ, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_rwz, 32) == 0;
+
+        #if MAX_NUM_HASH_BLOCKS > 1
+        cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_RWZ, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_rwz, 64) == 0;
+        #endif
+
+        // cnv9 aka cnv4 aka cnv5 aka cnr
+
+        cryptonight_hash_ctx[0](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_v4, 32) == 0;
+
+        #if MAX_NUM_HASH_BLOCKS > 1
+        cryptonight_hash_ctx[1](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_v4, 64) == 0;
+        #endif
+
+        #if MAX_NUM_HASH_BLOCKS > 2
+        cryptonight_hash_ctx[2](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_v4, 96) == 0;
+        #endif
+
+        #if MAX_NUM_HASH_BLOCKS > 3
+        cryptonight_hash_ctx[3](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_v4, 128) == 0;
+        #endif
+
+        #if MAX_NUM_HASH_BLOCKS > 4
+        cryptonight_hash_ctx[4](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_v4, 160) == 0;
+        #endif
+
+        cryptonight_hash_ctx[0](asmOptimization, 10001, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_v4_1, 32) == 0;
+
+        cryptonight_hash_ctx[0](asmOptimization, 10002, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+        result = result && memcmp(output, test_output_v4_2, 32) == 0;
     }
 
     for (size_t i = 0; i < MAX_NUM_HASH_BLOCKS; ++i) {
diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h
index 10415ca9..aaf29145 100644
--- a/src/crypto/CryptoNight.h
+++ b/src/crypto/CryptoNight.h
@@ -42,8 +42,25 @@
 #define POW_DEFAULT_INDEX_SHIFT 3
 #define POW_XLT_V4_INDEX_SHIFT 4
 
+#if defined _MSC_VER || defined XMRIG_ARM
+#define ABI_ATTRIBUTE
+#else
+#define ABI_ATTRIBUTE __attribute__((ms_abi))
+#endif
+
+struct ScratchPad;
+typedef void(*cn_mainloop_fun_ms_abi)(ScratchPad*) ABI_ATTRIBUTE;
+typedef void(*cn_mainloop_double_fun_ms_abi)(ScratchPad*, ScratchPad*) ABI_ATTRIBUTE;
+
+struct cryptonight_r_data {
+    int variant;
+    uint64_t height;
+
+    bool match(const int v, const uint64_t h) const { return (v == variant) && (h == height); }
+};
+
 struct ScratchPad {
-    alignas(16) uint8_t state[224]; // 224 instead of 200 to maintain aligned to 16 byte boundaries
+    alignas(16) uint8_t state[224];
     alignas(16) uint8_t* memory;
 
     // Additional stuff for asm impl
@@ -51,6 +68,11 @@ struct ScratchPad {
     const void* input;
     uint8_t* variant_table;
     const uint32_t* t_fn;
+
+    cn_mainloop_fun_ms_abi generated_code;
+    cn_mainloop_double_fun_ms_abi generated_code_double;
+    cryptonight_r_data generated_code_data;
+    cryptonight_r_data generated_code_double_data;
 };
 
 alignas(64) static uint8_t variant1_table[256];
@@ -63,12 +85,12 @@ class CryptoNight
 {
 public:
     static bool init(int algo, bool aesni);
-    static void hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
+    static void hash(size_t factor, AsmOptimization asmOptimization, uint64_t height, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
 
 public:
 
 private:
-    static bool selfTest(int algo);
+    static bool selfCheck(int algo);
 };
 
 
diff --git a/src/crypto/CryptoNightR_gen.cpp b/src/crypto/CryptoNightR_gen.cpp
new file mode 100644
index 00000000..d856cade
--- /dev/null
+++ b/src/crypto/CryptoNightR_gen.cpp
@@ -0,0 +1,190 @@
+/* XMRig
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <cstring>
+
+typedef void(*void_func)();
+
+#include "crypto/asm/CryptonightR_template.h"
+#include "Mem.h"
+
+#if !defined XMRIG_ARM && !defined XMRIG_NO_ASM
+
+#include "crypto/CryptoNight_x86.h"
+
+static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)())
+{
+    const ptrdiff_t size = reinterpret_cast<const uint8_t*>(p2) - reinterpret_cast<const uint8_t*>(p1);
+    if (size > 0) {
+        memcpy(p, reinterpret_cast<void*>(p1), size);
+        p += size;
+    }
+}
+
+static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, AsmOptimization ASM)
+{
+    uint32_t prev_rot_src = (uint32_t)(-1);
+
+    for (int i = 0;; ++i) {
+        const V4_Instruction inst = code[i];
+        if (inst.opcode == RET) {
+            break;
+        }
+
+        uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
+        uint8_t dst_index = inst.dst_index;
+        uint8_t src_index = inst.src_index;
+
+        const uint32_t a = inst.dst_index;
+        const uint32_t b = inst.src_index;
+        const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
+
+        switch (inst.opcode) {
+            case ROR:
+            case ROL:
+                if (b != prev_rot_src) {
+                    prev_rot_src = b;
+                    add_code(p, instructions_mov[c], instructions_mov[c + 1]);
+                }
+                break;
+        }
+
+        if (a == prev_rot_src) {
+            prev_rot_src = (uint32_t)(-1);
+        }
+
+        void_func begin = instructions[c];
+
+        if ((ASM = ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) {
+            // AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
+            // Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
+            uint8_t* prefix = reinterpret_cast<uint8_t*>(begin);
+
+            if (*prefix == 0x49) {
+                *(p++) = 0x41;
+            }
+
+            begin = reinterpret_cast<void_func>(prefix + 1);
+        }
+
+        add_code(p, begin, instructions[c + 1]);
+
+        if (inst.opcode == ADD) {
+            *(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
+            if (is_64_bit) {
+                prev_rot_src = (uint32_t)(-1);
+            }
+        }
+    }
+}
+
+void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
+    uint8_t* p = p0;
+
+    add_code(p, CryptonightWOW_template_part1, CryptonightWOW_template_part2);
+    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+    add_code(p, CryptonightWOW_template_part2, CryptonightWOW_template_part3);
+    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_mainloop) - ((const uint8_t*)CryptonightWOW_template_part1)) - (p - p0));
+    add_code(p, CryptonightWOW_template_part3, CryptonightWOW_template_end);
+
+    Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
+    uint8_t* p = p0;
+
+    add_code(p, CryptonightR_template_part1, CryptonightR_template_part2);
+    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+    add_code(p, CryptonightR_template_part2, CryptonightR_template_part3);
+    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
+    add_code(p, CryptonightR_template_part3, CryptonightR_template_end);
+
+    Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
+    uint8_t* p = p0;
+
+    add_code(p, CryptonightWOW_template_double_part1, CryptonightWOW_template_double_part2);
+    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+    add_code(p, CryptonightWOW_template_double_part2, CryptonightWOW_template_double_part3);
+    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+    add_code(p, CryptonightWOW_template_double_part3, CryptonightWOW_template_double_part4);
+    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_double_mainloop) - ((const uint8_t*)CryptonightWOW_template_double_part1)) - (p - p0));
+    add_code(p, CryptonightWOW_template_double_part4, CryptonightWOW_template_double_end);
+
+    Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
+    uint8_t* p = p0;
+
+    add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2);
+    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+    add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3);
+    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+    add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4);
+    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0));
+    add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end);
+
+    Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
+    uint8_t* p = p0;
+
+    add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2);
+    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+    add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3);
+    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0));
+    add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end);
+
+    Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
+    uint8_t* p = p0;
+
+    add_code(p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
+    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+    add_code(p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
+    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
+    add_code(p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
+
+    Mem::flushInstructionCache(machine_code, p - p0);
+}
+#endif
\ No newline at end of file
diff --git a/src/crypto/CryptoNight_arm.h b/src/crypto/CryptoNight_arm.h
index b0e31ae6..6c3b2fd7 100644
--- a/src/crypto/CryptoNight_arm.h
+++ b/src/crypto/CryptoNight_arm.h
@@ -36,11 +36,26 @@
 
 #endif
 
+#define SWAP32LE(x) x
+#define SWAP64LE(x) x
+#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
+
+#ifndef NOINLINE
+#ifdef __GNUC__
+#define NOINLINE __attribute__ ((noinline))
+#elif _MSC_VER
+#define NOINLINE __declspec(noinline)
+#else
+#define NOINLINE
+#endif
+#endif
+
 #include <math.h>
 #include <signal.h>
 
 #include "crypto/CryptoNight.h"
 #include "crypto/soft_aes.h"
+#include "variant4_random_math.h"
 
 
 extern "C"
@@ -111,11 +126,11 @@ static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i
 #define EXTRACT64(X) _mm_cvtsi128_si64(X)
 
 
-# define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax) \
+# define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax, reverse) \
 { \
-    const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x10))); \
+    const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((l) + ((idx) ^ (reverse ? 0x30 : 0x10)))); \
     const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x20))); \
-    const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x30))); \
+    const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((l) + ((idx) ^ (reverse ? 0x10 : 0x30)))); \
     vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(bx1))); \
     vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(bx0))); \
     vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(ax))); \
@@ -136,18 +151,52 @@ static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i
     sqrt_result##idx += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
 }
 
-# define SHUFFLE_PHASE_2(l, idx, bx0, bx1, ax, lo, hi) \
+# define SHUFFLE_PHASE_2(l, idx, bx0, bx1, ax, lo, hi, reverse) \
 { \
     const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \
     const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x20))); \
     const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x30))); \
     hi ^= ((uint64_t*)((l) + ((idx) ^ 0x20)))[0]; \
     lo ^= ((uint64_t*)((l) + ((idx) ^ 0x20)))[1]; \
+    if (reverse) { \
+        vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x10)), vaddq_u64(chunk1, vreinterpretq_u64_u8(bx1))); \
+        vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x20)), vaddq_u64(chunk3, vreinterpretq_u64_u8(bx0))); \
+    } else { \
+        vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(bx1))); \
+        vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(bx0))); \
+    } \
+    vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(ax))); \
+}
+
+#   define SHUFFLE_V4(l, idx, bx0, bx1, ax, cx) \
+{ \
+    const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x10))); \
+    const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x20))); \
+    const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x30))); \
     vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(bx1))); \
     vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(bx0))); \
     vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(ax))); \
+    cx = veorq_u64(veorq_u64(cx, chunk3), veorq_u64(chunk1, chunk2)); \
 }
 
+#   define VARIANT4_RANDOM_MATH_INIT(idx, h) \
+    uint32_t r##idx[9]; \
+    struct V4_Instruction code##idx[256]; \
+    r##idx[0] = (uint32_t)(h[12]); \
+    r##idx[1] = (uint32_t)(h[12] >> 32); \
+    r##idx[2] = (uint32_t)(h[13]); \
+    r##idx[3] = (uint32_t)(h[13] >> 32); \
+    v4_random_math_init(code##idx, VARIANT, height);
+
+#   define VARIANT4_RANDOM_MATH(idx, al, ah, cl, bx0, bx1) \
+    cl ^= (r##idx[0] + r##idx[1]) | ((uint64_t)(r##idx[2] + r##idx[3]) << 32); \
+    r##idx[4] = static_cast<uint32_t>(al); \
+    r##idx[5] = static_cast<uint32_t>(ah); \
+    r##idx[6] = static_cast<uint32_t>(_mm_cvtsi128_si32(bx0)); \
+    r##idx[7] = static_cast<uint32_t>(_mm_cvtsi128_si32(bx1)); \
+    r##idx[8] = static_cast<uint32_t>(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \
+    v4_random_math(code##idx, r##idx); \
+
 
 #if defined (__arm64__) || defined (__aarch64__)
 static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
@@ -640,8 +689,7 @@ static inline void cn_implode_scratchpad_heavy(const __m128i* input, __m128i* ou
     _mm_store_si128(output + 11, xout7);
 }
 
-// n-Loop version. Seems to be little bit slower then the hardcoded one.
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, size_t NUM_HASH_BLOCKS>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT, size_t NUM_HASH_BLOCKS>
 class CryptoNightMultiHash
 {
 public:
@@ -650,79 +698,7 @@ public:
                             uint8_t* __restrict__ output,
                             ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-        __m128i ax[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size,
-                   scratchPad[hashBlock]->state, 200);
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                if (SOFT_AES) {
-                    cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
-                } else {
-                    cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
-                }
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            uint64_t hi, lo, cl, ch;
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                bx[hashBlock] = cx[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
+        //dummy
     }
 
     inline static void hashPowV2(const uint8_t* __restrict__ input,
@@ -730,200 +706,24 @@ public:
                                  uint8_t* __restrict__ output,
                                  ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        uint64_t tweak1_2[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-        __m128i ax[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state,
-                   200);
-            tweak1_2[hashBlock] = (*reinterpret_cast<const uint64_t*>(input + 35 + hashBlock * size) ^
-                                   *(reinterpret_cast<const uint64_t*>(scratchPad[hashBlock]->state) + 24));
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                if (SOFT_AES) {
-                    cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
-                } else {
-                    cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
-                }
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                const uint8_t tmp = reinterpret_cast<const uint8_t *>(&l[hashBlock][idx[hashBlock] & MASK])[11];
-                static const uint32_t table = 0x75310;
-                const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
-                ((uint8_t *) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            uint64_t hi, lo, cl, ch;
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cl = ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                bx[hashBlock] = cx[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
+        //dummy
     }
 
-    // multi
     inline static void hashPowV3(const uint8_t* __restrict__ input,
                             size_t size,
                             uint8_t* __restrict__ output,
                             ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        uint64_t sqrt_result[NUM_HASH_BLOCKS];
-        uint64_t division_result_xmm[NUM_HASH_BLOCKS];
-        __m128i bx0[NUM_HASH_BLOCKS];
-        __m128i bx1[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-        __m128i ax[NUM_HASH_BLOCKS];
+        //dummy
+    }
 
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size,
-                   scratchPad[hashBlock]->state, 200);
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx0[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            bx1[hashBlock] = _mm_set_epi64x(h[hashBlock][9] ^ h[hashBlock][11], h[hashBlock][8] ^ h[hashBlock][10]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-
-            division_result_xmm[hashBlock] = h[hashBlock][12];
-            sqrt_result[hashBlock] = h[hashBlock][13];
-        }
-
-        uint64_t sqrt_result0;
-        uint64_t division_result_xmm0;
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                if (SOFT_AES) {
-                    cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
-                } else {
-                    cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
-                }
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                SHUFFLE_PHASE_1(l[hashBlock], idx[hashBlock] & MASK, bx0[hashBlock], bx1[hashBlock], ax[hashBlock])
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx0[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            uint64_t hi, lo, cl, ch;
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-
-                sqrt_result0 = sqrt_result[hashBlock];
-                division_result_xmm0 = division_result_xmm[hashBlock];
-
-                INTEGER_MATH_V2(0, cl, cx[hashBlock])
-
-                sqrt_result[hashBlock] = sqrt_result0;
-                division_result_xmm[hashBlock] = division_result_xmm0;
-
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                SHUFFLE_PHASE_2(l[hashBlock], idx[hashBlock] & MASK, bx0[hashBlock], bx1[hashBlock], ax[hashBlock], lo, hi)
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                bx1[hashBlock] = bx0[hashBlock];
-                bx0[hashBlock] = cx[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        // dummy
     }
 
     inline static void hashLiteTube(const uint8_t* __restrict__ input,
@@ -931,87 +731,7 @@ public:
                                     uint8_t* __restrict__ output,
                                     ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        uint64_t tweak1_2[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state,
-                   200);
-            tweak1_2[hashBlock] = (*reinterpret_cast<const uint64_t*>(input + 35 + hashBlock * size) ^
-                                   *(reinterpret_cast<const uint64_t*>(scratchPad[hashBlock]->state) + 24));
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] =
-                    _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                __m128i cx;
-
-                if (SOFT_AES) {
-                    cx = soft_aesenc((uint32_t*) &l[hashBlock][idx[hashBlock] & MASK],
-                                     _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
-                } else {
-                    cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
-                }
-
-                _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx));
-
-                const uint8_t tmp = reinterpret_cast<const uint8_t*>(&l[hashBlock][idx[hashBlock] & MASK])[11];
-                static const uint32_t table = 0x75310;
-                const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
-                ((uint8_t*) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
-
-                idx[hashBlock] = EXTRACT64(cx);
-                bx[hashBlock] = cx;
-
-                uint64_t hi, lo, cl, ch;
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] ^= ((uint64_t*) &l[hashBlock][idx[hashBlock] &
-                                                                                                    MASK])[0];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
+        //dummy
     }
 
     inline static void hashHeavy(const uint8_t* __restrict__ input,
@@ -1019,161 +739,7 @@ public:
                                  uint8_t* __restrict__ output,
                                  ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size,
-                   scratchPad[hashBlock]->state, 200);
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                __m128i cx;
-
-                if (SOFT_AES) {
-                    cx = soft_aesenc((uint32_t*) &l[hashBlock][idx[hashBlock] & MASK],
-                                     _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
-                } else {
-                    cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
-                }
-
-                _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx));
-
-                idx[hashBlock] = EXTRACT64(cx);
-                bx[hashBlock] = cx;
-
-                uint64_t hi, lo, cl, ch;
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t*>(&l[hashBlock][idx[hashBlock] & MASK]));
-                const int64_t n = vgetq_lane_s64(x, 0);
-                const int32_t d = vgetq_lane_s32(x, 2);
-                const int64_t q = n / (d | 0x5);
-
-                ((int64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
-
-                idx[hashBlock] = d ^ q;
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
-    }
-
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                      size_t size,
-                                      uint8_t* __restrict__ output,
-                                      ScratchPad** __restrict__ scratchPad)
-    {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size,
-                   scratchPad[hashBlock]->state, 200);
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                __m128i cx;
-
-                if (SOFT_AES) {
-                    cx = soft_aesenc((uint32_t*) &l[hashBlock][idx[hashBlock] & MASK],
-                                     _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
-                } else {
-                    cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
-                }
-
-                _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx));
-
-                idx[hashBlock] = EXTRACT64(cx);
-                bx[hashBlock] = cx;
-
-                uint64_t hi, lo, cl, ch;
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t*>(&l[hashBlock][idx[hashBlock] & MASK]));
-                const int64_t n = vgetq_lane_s64(x, 0);
-                const int32_t d = vgetq_lane_s32(x, 2);
-                const int64_t q = n / (d | 0x5);
-
-                ((int64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
-
-                idx[hashBlock] = (~d) ^ q;
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
+        //dummy
     }
 
     inline static void hashHeavyTube(const uint8_t* __restrict__ input,
@@ -1181,125 +747,12 @@ public:
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        uint64_t tweak1_2[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state,
-                   200);
-            tweak1_2[hashBlock] = (*reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35 +
-                                                                      hashBlock * size) ^
-                                   *(reinterpret_cast<const uint64_t*>(scratchPad[hashBlock]->state) + 24));
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        union alignas(16)
-        {
-            uint32_t k[4];
-            uint64_t v64[2];
-        };
-        alignas(16) uint32_t x[4];
-
-#define BYTE(p, i) ((unsigned char*)&p)[i]
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                __m128i cx;
-
-                cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
-
-                const __m128i& key = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                _mm_store_si128((__m128i*) k, key);
-                cx = _mm_xor_si128(cx, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
-                _mm_store_si128((__m128i*) x, cx);
-
-                k[0] ^= saes_table[0][BYTE(x[0], 0)] ^ saes_table[1][BYTE(x[1], 1)] ^ saes_table[2][BYTE(x[2], 2)] ^
-                        saes_table[3][BYTE(x[3], 3)];
-                x[0] ^= k[0];
-                k[1] ^= saes_table[0][BYTE(x[1], 0)] ^ saes_table[1][BYTE(x[2], 1)] ^ saes_table[2][BYTE(x[3], 2)] ^
-                        saes_table[3][BYTE(x[0], 3)];
-                x[1] ^= k[1];
-                k[2] ^= saes_table[0][BYTE(x[2], 0)] ^ saes_table[1][BYTE(x[3], 1)] ^ saes_table[2][BYTE(x[0], 2)] ^
-                        saes_table[3][BYTE(x[1], 3)];
-                x[2] ^= k[2];
-                k[3] ^= saes_table[0][BYTE(x[3], 0)] ^ saes_table[1][BYTE(x[0], 1)] ^ saes_table[2][BYTE(x[1], 2)] ^
-                        saes_table[3][BYTE(x[2], 3)];
-
-                cx = _mm_load_si128((__m128i*) k);
-
-                _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK], _mm_xor_si128(bx[hashBlock], cx));
-
-                const uint8_t tmp = reinterpret_cast<const uint8_t*>(&l[hashBlock][idx[hashBlock] & MASK])[11];
-                static const uint32_t table = 0x75310;
-                const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
-                ((uint8_t*) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
-
-                idx[hashBlock] = EXTRACT64(cx);
-                bx[hashBlock] = cx;
-
-                uint64_t hi, lo, cl, ch;
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] ^= ((uint64_t*) &l[hashBlock][idx[hashBlock] &
-                                                                                                    MASK])[0];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t*>(&l[hashBlock][idx[hashBlock] & MASK]));
-                const int64_t n = vgetq_lane_s64(x, 0);
-                const int32_t d = vgetq_lane_s32(x, 2);
-                const int64_t q = n / (d | 0x5);
-
-                ((int64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
-
-                idx[hashBlock] = d ^ q;
-            }
-        }
-
-#undef BYTE
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
+        //dummy
     }
 };
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 1>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 1>
 {
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -1462,7 +915,7 @@ public:
                 cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
             }
 
-            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
+            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
 
@@ -1476,7 +929,85 @@ public:
 
             lo = __umul128(idx0, cl, &hi);
 
-            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
+
+            al0 += hi;
+            ah0 += lo;
+
+            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+            ah0 ^= ch;
+            al0 ^= cl;
+            idx0 = al0;
+
+            bx10 = bx00;
+            bx00 = cx0;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+
+        keccakf(h0, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+    }
+
+    // single
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+
+    {
+        keccak(input, (int) size, scratchPad[0]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+
+        uint64_t al0 = h0[0] ^h0[4];
+        uint64_t ah0 = h0[1] ^h0[5];
+
+        __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+        __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+
+        uint64_t idx0 = h0[0] ^h0[4];
+
+        VARIANT4_RANDOM_MATH_INIT(0, h0)
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx0;
+            const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+
+            if (SOFT_AES) {
+                cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+            } else {
+                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+                cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
+            }
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+
+            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+
+            idx0 = EXTRACT64(cx0);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+            if (VARIANT == POW_V4) {
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx0, cl, &hi);
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
 
             al0 += hi;
             ah0 += lo;
@@ -1628,7 +1159,11 @@ public:
 
             ((int64_t*) &l[idx & MASK])[0] = n ^ q;
 
-            idx = d ^ q;
+            if (VARIANT == POW_XHV) {
+                idx = (~d) ^ q;
+            } else {
+                idx = d ^ q;
+            }
         }
 
         cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) scratchPad[0]->memory, (__m128i*) scratchPad[0]->state);
@@ -1636,75 +1171,6 @@ public:
         extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
     }
 
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                      size_t size,
-                                      uint8_t* __restrict__ output,
-                                      ScratchPad** __restrict__ scratchPad)
-    {
-        const uint8_t* l;
-        uint64_t* h;
-        uint64_t al;
-        uint64_t ah;
-        __m128i bx;
-        uint64_t idx;
-
-        keccak(static_cast<const uint8_t*>(input), (int) size, scratchPad[0]->state, 200);
-
-        l = scratchPad[0]->memory;
-        h = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
-
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h, (__m128i*) l);
-
-        al = h[0] ^ h[4];
-        ah = h[1] ^ h[5];
-        bx = _mm_set_epi64x(h[3] ^ h[7], h[2] ^ h[6]);
-        idx = h[0] ^ h[4];
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            __m128i cx;
-
-            if (SOFT_AES) {
-                cx = soft_aesenc((uint32_t*) &l[idx & MASK], _mm_set_epi64x(ah, al));
-            } else {
-                cx = _mm_load_si128((__m128i*) &l[idx & MASK]);
-                cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al));
-            }
-
-            _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx));
-            idx = EXTRACT64(cx);
-            bx = cx;
-
-            uint64_t hi, lo, cl, ch;
-            cl = ((uint64_t*) &l[idx & MASK])[0];
-            ch = ((uint64_t*) &l[idx & MASK])[1];
-            lo = __umul128(idx, cl, &hi);
-
-            al += hi;
-            ah += lo;
-
-            ((uint64_t*) &l[idx & MASK])[0] = al;
-            ((uint64_t*) &l[idx & MASK])[1] = ah;
-
-            ah ^= ch;
-            al ^= cl;
-            idx = al;
-
-            const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t*>(&l[idx & MASK]));
-            const int64_t n = vgetq_lane_s64(x, 0);
-            const int32_t d = vgetq_lane_s32(x, 2);
-            const int64_t q = n / (d | 0x5);
-
-            ((int64_t*) &l[idx & MASK])[0] = n ^ q;
-
-            idx = (~d) ^ q;
-        }
-
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l, (__m128i*) h);
-        keccakf(h, 24);
-        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
-    }
-
-
     inline static void hashHeavyTube(const uint8_t* __restrict__ input,
                                      size_t size,
                                      uint8_t* __restrict__ output,
@@ -1809,8 +1275,8 @@ public:
 };
 
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 2>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 2>
 {
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -2067,8 +1533,8 @@ public:
                 cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
             }
 
-            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
-            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
+            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
             _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -2084,7 +1550,7 @@ public:
 
             lo = __umul128(idx0, cl, &hi);
 
-            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
 
             al0 += hi;
             ah0 += lo;
@@ -2107,7 +1573,136 @@ public:
 
             lo = __umul128(idx1, cl, &hi);
 
-            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
+
+            al1 += hi;
+            ah1 += lo;
+
+            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+            ah1 ^= ch;
+            al1 ^= cl;
+            idx1 = al1;
+
+            bx11 = bx01;
+            bx01 = cx1;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+
+        keccakf(h0, 24);
+        keccakf(h1, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+    }
+
+    // double
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        keccak(input, (int) size, scratchPad[0]->state, 200);
+        keccak(input + size, (int) size, scratchPad[1]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        const uint8_t* l1 = scratchPad[1]->memory;
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+
+        uint64_t al0 = h0[0] ^h0[4];
+        uint64_t al1 = h1[0] ^h1[4];
+        uint64_t ah0 = h0[1] ^h0[5];
+        uint64_t ah1 = h1[1] ^h1[5];
+
+        __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+        __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+
+        __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+        __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+
+        uint64_t idx0 = h0[0] ^h0[4];
+        uint64_t idx1 = h1[0] ^h1[4];
+
+        VARIANT4_RANDOM_MATH_INIT(0, h0)
+        VARIANT4_RANDOM_MATH_INIT(1, h1)
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx0;
+            __m128i cx1;
+
+            const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+            const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+
+            if (SOFT_AES) {
+                cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+                cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], ax1);
+            } else {
+                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+
+                cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
+                cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+            }
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+
+            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+
+            idx0 = EXTRACT64(cx0);
+            idx1 = EXTRACT64(cx1);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+            if (VARIANT == POW_V4) {
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx0, cl, &hi);
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
+
+            al0 += hi;
+            ah0 += lo;
+
+            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+            ah0 ^= ch;
+            al0 ^= cl;
+            idx0 = al0;
+
+            bx10 = bx00;
+            bx00 = cx0;
+
+
+            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11)
+
+            if (VARIANT == POW_V4) {
+                al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+                ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+            }
+
+            lo = __umul128(idx1, cl, &hi);
+
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1);
 
             al1 += hi;
             ah1 += lo;
@@ -2318,118 +1913,12 @@ public:
 
             ((int64_t*) &l0[idx0 & MASK])[0] = n0 ^ q0;
 
-            idx0 = d0 ^ q0;
-
-
-            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
-            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
-            lo = __umul128(idx1, cl, &hi);
-
-            al1 += hi;
-            ah1 += lo;
-
-            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
-            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
-
-            ah1 ^= ch;
-            al1 ^= cl;
-            idx1 = al1;
-
-            const int64x2_t x1 = vld1q_s64(reinterpret_cast<const int64_t*>(&l1[idx1 & MASK]));
-            const int64_t n1 = vgetq_lane_s64(x1, 0);
-            const int32_t d1 = vgetq_lane_s32(x1, 2);
-            const int64_t q1 = n1 / (d1 | 0x5);
-
-            ((int64_t*) &l1[idx1 & MASK])[0] = n1 ^ q1;
-
-            idx1 = d1 ^ q1;
-        }
-
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
-
-        keccakf(h0, 24);
-        keccakf(h1, 24);
-
-        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
-        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
-    }
-
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                      size_t size,
-                                      uint8_t* __restrict__ output,
-                                      ScratchPad** __restrict__ scratchPad)
-    {
-        keccak(input, (int) size, scratchPad[0]->state, 200);
-        keccak(input + size, (int) size, scratchPad[1]->state, 200);
-
-        const uint8_t* l0 = scratchPad[0]->memory;
-        const uint8_t* l1 = scratchPad[1]->memory;
-        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
-        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
-
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
-
-        uint64_t al0 = h0[0] ^h0[4];
-        uint64_t al1 = h1[0] ^h1[4];
-        uint64_t ah0 = h0[1] ^h0[5];
-        uint64_t ah1 = h1[1] ^h1[5];
-
-        __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-        __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-
-        uint64_t idx0 = h0[0] ^h0[4];
-        uint64_t idx1 = h1[0] ^h1[4];
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            __m128i cx0;
-            __m128i cx1;
-
-            if (SOFT_AES) {
-                cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
-                cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], _mm_set_epi64x(ah1, al1));
+            if (VARIANT == POW_XHV) {
+                idx0 = (~d0) ^ q0;
             } else {
-                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
-                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
-
-                cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
-                cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+                idx0 = d0 ^ q0;
             }
 
-            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
-            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
-
-            idx0 = EXTRACT64(cx0);
-            idx1 = EXTRACT64(cx1);
-
-            bx0 = cx0;
-            bx1 = cx1;
-
-            uint64_t hi, lo, cl, ch;
-            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-            lo = __umul128(idx0, cl, &hi);
-
-            al0 += hi;
-            ah0 += lo;
-
-            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
-            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
-
-            ah0 ^= ch;
-            al0 ^= cl;
-            idx0 = al0;
-
-            const int64x2_t x0 = vld1q_s64(reinterpret_cast<const int64_t*>(&l0[idx0 & MASK]));
-            const int64_t n0 = vgetq_lane_s64(x0, 0);
-            const int32_t d0 = vgetq_lane_s32(x0, 2);
-            const int64_t q0 = n0 / (d0 | 0x5);
-
-            ((int64_t*) &l0[idx0 & MASK])[0] = n0 ^ q0;
-
-            idx0 = (~d0) ^ q0;
-
             cl = ((uint64_t*) &l1[idx1 & MASK])[0];
             ch = ((uint64_t*) &l1[idx1 & MASK])[1];
             lo = __umul128(idx1, cl, &hi);
@@ -2451,7 +1940,11 @@ public:
 
             ((int64_t*) &l1[idx1 & MASK])[0] = n1 ^ q1;
 
-            idx1 = (~d1) ^ q1;
+            if (VARIANT == POW_XHV) {
+                idx1 = (~d1) ^ q1;
+            } else {
+                idx1 = d1 ^ q1;
+            }
         }
 
         cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
@@ -2635,8 +2128,8 @@ public:
     }
 };
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 3>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 3>
 {
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -2986,9 +2479,9 @@ public:
                 cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
             }
 
-            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
-            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
-            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2)
+            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
             _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -3006,7 +2499,7 @@ public:
 
             lo = __umul128(idx0, cl, &hi);
 
-            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
 
             al0 += hi;
             ah0 += lo;
@@ -3029,7 +2522,7 @@ public:
 
             lo = __umul128(idx1, cl, &hi);
 
-            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
 
             al1 += hi;
             ah1 += lo;
@@ -3052,7 +2545,185 @@ public:
 
             lo = __umul128(idx2, cl, &hi);
 
-            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi)
+            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
+
+            al2 += hi;
+            ah2 += lo;
+
+            ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
+            ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
+
+            ah2 ^= ch;
+            al2 ^= cl;
+            idx2 = al2;
+
+            bx12 = bx02;
+            bx02 = cx2;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
+
+        keccakf(h0, 24);
+        keccakf(h1, 24);
+        keccakf(h2, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+        extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
+    }
+
+    // triple
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        keccak(input, (int) size, scratchPad[0]->state, 200);
+        keccak(input + size, (int) size, scratchPad[1]->state, 200);
+        keccak(input + 2 * size, (int) size, scratchPad[2]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        const uint8_t* l1 = scratchPad[1]->memory;
+        const uint8_t* l2 = scratchPad[2]->memory;
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
+        uint64_t* h2 = reinterpret_cast<uint64_t*>(scratchPad[2]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
+
+        uint64_t al0 = h0[0] ^h0[4];
+        uint64_t al1 = h1[0] ^h1[4];
+        uint64_t al2 = h2[0] ^h2[4];
+        uint64_t ah0 = h0[1] ^h0[5];
+        uint64_t ah1 = h1[1] ^h1[5];
+        uint64_t ah2 = h2[1] ^h2[5];
+
+        __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+        __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+        __m128i bx02 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+
+        __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+        __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+        __m128i bx12 = _mm_set_epi64x(h2[9] ^ h2[11], h2[8] ^ h2[10]);
+
+        uint64_t idx0 = h0[0] ^h0[4];
+        uint64_t idx1 = h1[0] ^h1[4];
+        uint64_t idx2 = h2[0] ^h2[4];
+
+        VARIANT4_RANDOM_MATH_INIT(0, h0)
+        VARIANT4_RANDOM_MATH_INIT(1, h1)
+        VARIANT4_RANDOM_MATH_INIT(2, h2)
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx0;
+            __m128i cx1;
+            __m128i cx2;
+
+            const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+            const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+            const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+
+            if (SOFT_AES) {
+                cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+                cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], ax1);
+                cx2 = soft_aesenc((uint32_t*) &l2[idx2 & MASK], ax2);
+            } else {
+                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+                cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
+
+                cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
+                cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+                cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
+            }
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+
+            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+            _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx02, cx2));
+
+            idx0 = EXTRACT64(cx0);
+            idx1 = EXTRACT64(cx1);
+            idx2 = EXTRACT64(cx2);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+            if (VARIANT == POW_V4) {
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx0, cl, &hi);
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
+
+            al0 += hi;
+            ah0 += lo;
+
+            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+            ah0 ^= ch;
+            al0 ^= cl;
+            idx0 = al0;
+
+            bx10 = bx00;
+            bx00 = cx0;
+
+
+            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11)
+
+            if (VARIANT == POW_V4) {
+                al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+                ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+            }
+
+            lo = __umul128(idx1, cl, &hi);
+
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1);
+
+            al1 += hi;
+            ah1 += lo;
+
+            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+            ah1 ^= ch;
+            al1 ^= cl;
+            idx1 = al1;
+
+            bx11 = bx01;
+            bx01 = cx1;
+
+
+            cl = ((uint64_t*) &l2[idx2 & MASK])[0];
+            ch = ((uint64_t*) &l2[idx2 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(2, al2, ah2, cl, bx02, bx12)
+
+            if (VARIANT == POW_V4) {
+                al2 ^= r2[2] | ((uint64_t)(r2[3]) << 32);
+                ah2 ^= r2[0] | ((uint64_t)(r2[1]) << 32);
+            }
+
+            lo = __umul128(idx2, cl, &hi);
+
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
 
             al2 += hi;
             ah2 += lo;
@@ -3326,162 +2997,12 @@ public:
 
             ((int64_t*) &l0[idx0 & MASK])[0] = n0 ^ q0;
 
-            idx0 = d0 ^ q0;
-
-            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
-            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
-            lo = __umul128(idx1, cl, &hi);
-
-            al1 += hi;
-            ah1 += lo;
-
-            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
-            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
-
-            ah1 ^= ch;
-            al1 ^= cl;
-            idx1 = al1;
-
-            const int64x2_t x1 = vld1q_s64(reinterpret_cast<const int64_t*>(&l1[idx1 & MASK]));
-            const int64_t n1 = vgetq_lane_s64(x1, 0);
-            const int32_t d1 = vgetq_lane_s32(x1, 2);
-            const int64_t q1 = n1 / (d1 | 0x5);
-
-            ((int64_t*) &l1[idx1 & MASK])[0] = n1 ^ q1;
-
-            idx1 = d1 ^ q1;
-
-
-            cl = ((uint64_t*) &l2[idx2 & MASK])[0];
-            ch = ((uint64_t*) &l2[idx2 & MASK])[1];
-            lo = __umul128(idx2, cl, &hi);
-
-            al2 += hi;
-            ah2 += lo;
-
-            ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
-            ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
-
-            ah2 ^= ch;
-            al2 ^= cl;
-            idx2 = al2;
-
-
-            const int64x2_t x2 = vld1q_s64(reinterpret_cast<const int64_t*>(&l2[idx2 & MASK]));
-            const int64_t n2 = vgetq_lane_s64(x2, 0);
-            const int32_t d2 = vgetq_lane_s32(x2, 2);
-            const int64_t q2 = n2 / (d2 | 0x5);
-
-            ((int64_t*) &l2[idx2 & MASK])[0] = n2 ^ q2;
-
-            idx2 = d2 ^ q2;
-        }
-
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
-
-        keccakf(h0, 24);
-        keccakf(h1, 24);
-        keccakf(h2, 24);
-
-        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
-        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
-        extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
-    }
-
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                      size_t size,
-                                      uint8_t* __restrict__ output,
-                                      ScratchPad** __restrict__ scratchPad)
-    {
-        keccak(input, (int) size, scratchPad[0]->state, 200);
-        keccak(input + size, (int) size, scratchPad[1]->state, 200);
-        keccak(input + 2 * size, (int) size, scratchPad[2]->state, 200);
-
-        const uint8_t* l0 = scratchPad[0]->memory;
-        const uint8_t* l1 = scratchPad[1]->memory;
-        const uint8_t* l2 = scratchPad[2]->memory;
-        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
-        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
-        uint64_t* h2 = reinterpret_cast<uint64_t*>(scratchPad[2]->state);
-
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
-
-        uint64_t al0 = h0[0] ^h0[4];
-        uint64_t al1 = h1[0] ^h1[4];
-        uint64_t al2 = h2[0] ^h2[4];
-        uint64_t ah0 = h0[1] ^h0[5];
-        uint64_t ah1 = h1[1] ^h1[5];
-        uint64_t ah2 = h2[1] ^h2[5];
-
-        __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-        __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-        __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
-
-        uint64_t idx0 = h0[0] ^h0[4];
-        uint64_t idx1 = h1[0] ^h1[4];
-        uint64_t idx2 = h2[0] ^h2[4];
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            __m128i cx0;
-            __m128i cx1;
-            __m128i cx2;
-
-            if (SOFT_AES) {
-                cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
-                cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], _mm_set_epi64x(ah1, al1));
-                cx2 = soft_aesenc((uint32_t*) &l2[idx2 & MASK], _mm_set_epi64x(ah2, al2));
+            if (VARIANT == POW_XHV) {
+                idx0 = (~d0) ^ q0;
             } else {
-                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
-                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
-                cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
-
-                cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
-                cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
-                cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
+                idx0 = d0 ^ q0;
             }
 
-            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
-            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
-            _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
-
-            idx0 = EXTRACT64(cx0);
-            idx1 = EXTRACT64(cx1);
-            idx2 = EXTRACT64(cx2);
-
-            bx0 = cx0;
-            bx1 = cx1;
-            bx2 = cx2;
-
-
-            uint64_t hi, lo, cl, ch;
-            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-            lo = __umul128(idx0, cl, &hi);
-
-            al0 += hi;
-            ah0 += lo;
-
-            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
-            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
-
-            ah0 ^= ch;
-            al0 ^= cl;
-            idx0 = al0;
-
-            const int64x2_t x0 = vld1q_s64(reinterpret_cast<const int64_t*>(&l0[idx0 & MASK]));
-            const int64_t n0 = vgetq_lane_s64(x0, 0);
-            const int32_t d0 = vgetq_lane_s32(x0, 2);
-            const int64_t q0 = n0 / (d0 | 0x5);
-
-            ((int64_t*) &l0[idx0 & MASK])[0] = n0 ^ q0;
-
-            idx0 = (~d0) ^ q0;
-
-
             cl = ((uint64_t*) &l1[idx1 & MASK])[0];
             ch = ((uint64_t*) &l1[idx1 & MASK])[1];
             lo = __umul128(idx1, cl, &hi);
@@ -3503,7 +3024,11 @@ public:
 
             ((int64_t*) &l1[idx1 & MASK])[0] = n1 ^ q1;
 
-            idx1 = (~d1) ^ q1;
+            if (VARIANT == POW_XHV) {
+                idx1 = (~d1) ^ q1;
+            } else {
+                idx1 = d1 ^ q1;
+            }
 
             cl = ((uint64_t*) &l2[idx2 & MASK])[0];
             ch = ((uint64_t*) &l2[idx2 & MASK])[1];
@@ -3519,6 +3044,7 @@ public:
             al2 ^= cl;
             idx2 = al2;
 
+
             const int64x2_t x2 = vld1q_s64(reinterpret_cast<const int64_t*>(&l2[idx2 & MASK]));
             const int64_t n2 = vgetq_lane_s64(x2, 0);
             const int32_t d2 = vgetq_lane_s32(x2, 2);
@@ -3526,7 +3052,11 @@ public:
 
             ((int64_t*) &l2[idx2 & MASK])[0] = n2 ^ q2;
 
-            idx2 = (~d2) ^ q2;
+            if (VARIANT == POW_XHV) {
+                idx2 = (~d2) ^ q2;
+            } else {
+                idx2 = d2 ^ q2;
+            }
         }
 
         cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
@@ -3780,8 +3310,8 @@ public:
     }
 };
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 4>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 4>
 {
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -4220,10 +3750,10 @@ public:
                 cx3 = _mm_aesenc_si128(cx3, ax3);
             }
 
-            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
-            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
-            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2)
-            SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3)
+            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
             _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -4243,7 +3773,7 @@ public:
 
             lo = __umul128(idx0, cl, &hi);
 
-            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
 
             al0 += hi;
             ah0 += lo;
@@ -4266,7 +3796,7 @@ public:
 
             lo = __umul128(idx1, cl, &hi);
 
-            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
 
             al1 += hi;
             ah1 += lo;
@@ -4289,7 +3819,7 @@ public:
 
             lo = __umul128(idx2, cl, &hi);
 
-            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi);
+            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
 
             al2 += hi;
             ah2 += lo;
@@ -4312,7 +3842,235 @@ public:
 
             lo = __umul128(idx3, cl, &hi);
 
-            SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi);
+            SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
+
+            al3 += hi;
+            ah3 += lo;
+
+            ((uint64_t*) &l3[idx3 & MASK])[0] = al3;
+            ((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
+
+            ah3 ^= ch;
+            al3 ^= cl;
+            idx3 = al3;
+
+            bx13 = bx03;
+            bx03 = cx3;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l3, (__m128i*) h3);
+
+        keccakf(h0, 24);
+        keccakf(h1, 24);
+        keccakf(h2, 24);
+        keccakf(h3, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+        extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
+        extra_hashes[scratchPad[3]->state[0] & 3](scratchPad[3]->state, 200, output + 96);
+    }
+
+    // quadruple
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        keccak(input, (int) size, scratchPad[0]->state, 200);
+        keccak(input + size, (int) size, scratchPad[1]->state, 200);
+        keccak(input + 2 * size, (int) size, scratchPad[2]->state, 200);
+        keccak(input + 3 * size, (int) size, scratchPad[3]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        const uint8_t* l1 = scratchPad[1]->memory;
+        const uint8_t* l2 = scratchPad[2]->memory;
+        const uint8_t* l3 = scratchPad[3]->memory;
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
+        uint64_t* h2 = reinterpret_cast<uint64_t*>(scratchPad[2]->state);
+        uint64_t* h3 = reinterpret_cast<uint64_t*>(scratchPad[3]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h3, (__m128i*) l3);
+
+        uint64_t al0 = h0[0] ^h0[4];
+        uint64_t al1 = h1[0] ^h1[4];
+        uint64_t al2 = h2[0] ^h2[4];
+        uint64_t al3 = h3[0] ^h3[4];
+        uint64_t ah0 = h0[1] ^h0[5];
+        uint64_t ah1 = h1[1] ^h1[5];
+        uint64_t ah2 = h2[1] ^h2[5];
+        uint64_t ah3 = h3[1] ^h3[5];
+
+        __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+        __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+        __m128i bx02 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+        __m128i bx03 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
+
+        __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+        __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+        __m128i bx12 = _mm_set_epi64x(h2[9] ^ h2[11], h2[8] ^ h2[10]);
+        __m128i bx13 = _mm_set_epi64x(h3[9] ^ h3[11], h3[8] ^ h3[10]);
+
+        uint64_t idx0 = h0[0] ^h0[4];
+        uint64_t idx1 = h1[0] ^h1[4];
+        uint64_t idx2 = h2[0] ^h2[4];
+        uint64_t idx3 = h3[0] ^h3[4];
+
+        VARIANT4_RANDOM_MATH_INIT(0, h0)
+        VARIANT4_RANDOM_MATH_INIT(1, h1)
+        VARIANT4_RANDOM_MATH_INIT(2, h2)
+        VARIANT4_RANDOM_MATH_INIT(3, h3)
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx0;
+            __m128i cx1;
+            __m128i cx2;
+            __m128i cx3;
+
+            const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+            const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+            const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+            const __m128i ax3 = _mm_set_epi64x(ah3, al3);
+
+            if (SOFT_AES) {
+                cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+                cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], ax1);
+                cx2 = soft_aesenc((uint32_t*) &l2[idx2 & MASK], ax2);
+                cx3 = soft_aesenc((uint32_t*) &l3[idx3 & MASK], ax3);
+            } else {
+                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+                cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
+                cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
+
+                cx0 = _mm_aesenc_si128(cx0, ax0);
+                cx1 = _mm_aesenc_si128(cx1, ax1);
+                cx2 = _mm_aesenc_si128(cx2, ax2);
+                cx3 = _mm_aesenc_si128(cx3, ax3);
+            }
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+            SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3)
+
+            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+            _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx02, cx2));
+            _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx03, cx3));
+
+            idx0 = EXTRACT64(cx0);
+            idx1 = EXTRACT64(cx1);
+            idx2 = EXTRACT64(cx2);
+            idx3 = EXTRACT64(cx3);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+            if (VARIANT == POW_V4) {
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx0, cl, &hi);
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+
+            al0 += hi;
+            ah0 += lo;
+
+            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+            ah0 ^= ch;
+            al0 ^= cl;
+            idx0 = al0;
+
+            bx10 = bx00;
+            bx00 = cx0;
+
+
+            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11)
+
+            if (VARIANT == POW_V4) {
+                al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+                ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+            }
+
+
+            lo = __umul128(idx1, cl, &hi);
+
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+
+            al1 += hi;
+            ah1 += lo;
+
+            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+            ah1 ^= ch;
+            al1 ^= cl;
+            idx1 = al1;
+
+            bx11 = bx01;
+            bx01 = cx1;
+
+
+            cl = ((uint64_t*) &l2[idx2 & MASK])[0];
+            ch = ((uint64_t*) &l2[idx2 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(2, al2, ah2, cl, bx02, bx12)
+
+            if (VARIANT == POW_V4) {
+                al2 ^= r2[2] | ((uint64_t)(r2[3]) << 32);
+                ah2 ^= r2[0] | ((uint64_t)(r2[1]) << 32);
+            }
+
+            lo = __umul128(idx2, cl, &hi);
+
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+
+            al2 += hi;
+            ah2 += lo;
+
+            ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
+            ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
+
+            ah2 ^= ch;
+            al2 ^= cl;
+            idx2 = al2;
+
+            bx12 = bx02;
+            bx02 = cx2;
+
+
+            cl = ((uint64_t*) &l3[idx3 & MASK])[0];
+            ch = ((uint64_t*) &l3[idx3 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(3, al3, ah3, cl, bx03, bx13)
+
+            if (VARIANT == POW_V4) {
+                al3 ^= r3[2] | ((uint64_t)(r3[3]) << 32);
+                ah3 ^= r3[0] | ((uint64_t)(r3[1]) << 32);
+            }
+
+            lo = __umul128(idx3, cl, &hi);
+
+            SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3)
 
             al3 += hi;
             ah3 += lo;
@@ -4550,14 +4308,6 @@ public:
         // not supported
     }
 
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                      size_t size,
-                                      uint8_t* __restrict__ output,
-                                      ScratchPad** __restrict__ scratchPad)
-    {
-        // not supported
-    }
-
     inline static void hashHeavyTube(const uint8_t* __restrict__ input,
                                      size_t size,
                                      uint8_t* __restrict__ output,
@@ -4567,8 +4317,8 @@ public:
     }
 };
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 5>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 5>
 {//
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -5095,11 +4845,11 @@ public:
                 cx4 = _mm_aesenc_si128(cx4, ax4);
             }
 
-            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
-            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
-            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2)
-            SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3)
-            SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4)
+            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
             _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -5121,7 +4871,7 @@ public:
 
             lo = __umul128(idx0, cl, &hi);
 
-            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
 
             al0 += hi;
             ah0 += lo;
@@ -5144,7 +4894,7 @@ public:
 
             lo = __umul128(idx1, cl, &hi);
 
-            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
 
             al1 += hi;
             ah1 += lo;
@@ -5167,7 +4917,7 @@ public:
 
             lo = __umul128(idx2, cl, &hi);
 
-            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi);
+            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
 
             al2 += hi;
             ah2 += lo;
@@ -5190,7 +4940,7 @@ public:
 
             lo = __umul128(idx3, cl, &hi);
 
-            SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi);
+            SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
 
             al3 += hi;
             ah3 += lo;
@@ -5213,7 +4963,283 @@ public:
 
             lo = __umul128(idx4, cl, &hi);
 
-            SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi);
+            SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ)
+
+            al4 += hi;
+            ah4 += lo;
+
+            ((uint64_t*) &l4[idx4 & MASK])[0] = al4;
+            ((uint64_t*) &l4[idx4 & MASK])[1] = ah4;
+
+            ah4 ^= ch;
+            al4 ^= cl;
+            idx4 = al4;
+
+            bx14 = bx04;
+            bx04 = cx4;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l3, (__m128i*) h3);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l4, (__m128i*) h4);
+
+        keccakf(h0, 24);
+        keccakf(h1, 24);
+        keccakf(h2, 24);
+        keccakf(h3, 24);
+        keccakf(h4, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+        extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
+        extra_hashes[scratchPad[3]->state[0] & 3](scratchPad[3]->state, 200, output + 96);
+        extra_hashes[scratchPad[4]->state[0] & 3](scratchPad[4]->state, 200, output + 128);
+    }
+
+    // quintuple
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        keccak(input, (int) size, scratchPad[0]->state, 200);
+        keccak(input + size, (int) size, scratchPad[1]->state, 200);
+        keccak(input + 2 * size, (int) size, scratchPad[2]->state, 200);
+        keccak(input + 3 * size, (int) size, scratchPad[3]->state, 200);
+        keccak(input + 4 * size, (int) size, scratchPad[4]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        const uint8_t* l1 = scratchPad[1]->memory;
+        const uint8_t* l2 = scratchPad[2]->memory;
+        const uint8_t* l3 = scratchPad[3]->memory;
+        const uint8_t* l4 = scratchPad[4]->memory;
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
+        uint64_t* h2 = reinterpret_cast<uint64_t*>(scratchPad[2]->state);
+        uint64_t* h3 = reinterpret_cast<uint64_t*>(scratchPad[3]->state);
+        uint64_t* h4 = reinterpret_cast<uint64_t*>(scratchPad[4]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h3, (__m128i*) l3);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h4, (__m128i*) l4);
+
+        uint64_t al0 = h0[0] ^h0[4];
+        uint64_t al1 = h1[0] ^h1[4];
+        uint64_t al2 = h2[0] ^h2[4];
+        uint64_t al3 = h3[0] ^h3[4];
+        uint64_t al4 = h4[0] ^h4[4];
+        uint64_t ah0 = h0[1] ^h0[5];
+        uint64_t ah1 = h1[1] ^h1[5];
+        uint64_t ah2 = h2[1] ^h2[5];
+        uint64_t ah3 = h3[1] ^h3[5];
+        uint64_t ah4 = h4[1] ^h4[5];
+
+        __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+        __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+        __m128i bx02 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+        __m128i bx03 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
+        __m128i bx04 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
+
+        __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+        __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+        __m128i bx12 = _mm_set_epi64x(h2[9] ^ h2[11], h2[8] ^ h2[10]);
+        __m128i bx13 = _mm_set_epi64x(h3[9] ^ h3[11], h3[8] ^ h3[10]);
+        __m128i bx14 = _mm_set_epi64x(h4[9] ^ h4[11], h4[8] ^ h4[10]);
+
+        uint64_t idx0 = h0[0] ^h0[4];
+        uint64_t idx1 = h1[0] ^h1[4];
+        uint64_t idx2 = h2[0] ^h2[4];
+        uint64_t idx3 = h3[0] ^h3[4];
+        uint64_t idx4 = h4[0] ^h4[4];
+
+        VARIANT4_RANDOM_MATH_INIT(0, h0)
+        VARIANT4_RANDOM_MATH_INIT(1, h1)
+        VARIANT4_RANDOM_MATH_INIT(2, h2)
+        VARIANT4_RANDOM_MATH_INIT(3, h3)
+        VARIANT4_RANDOM_MATH_INIT(4, h4)
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx0;
+            __m128i cx1;
+            __m128i cx2;
+            __m128i cx3;
+            __m128i cx4;
+
+            const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+            const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+            const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+            const __m128i ax3 = _mm_set_epi64x(ah3, al3);
+            const __m128i ax4 = _mm_set_epi64x(ah4, al4);
+
+            if (SOFT_AES) {
+                cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+                cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], ax1);
+                cx2 = soft_aesenc((uint32_t*) &l2[idx2 & MASK], ax2);
+                cx3 = soft_aesenc((uint32_t*) &l3[idx3 & MASK], ax3);
+                cx4 = soft_aesenc((uint32_t*) &l4[idx4 & MASK], ax4);
+            } else {
+                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+                cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
+                cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
+                cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]);
+
+                cx0 = _mm_aesenc_si128(cx0, ax0);
+                cx1 = _mm_aesenc_si128(cx1, ax1);
+                cx2 = _mm_aesenc_si128(cx2, ax2);
+                cx3 = _mm_aesenc_si128(cx3, ax3);
+                cx4 = _mm_aesenc_si128(cx4, ax4);
+            }
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+            SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3)
+            SHUFFLE_V4(l4, (idx4&MASK), bx04, bx14, ax4, cx4)
+
+            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+            _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx02, cx2));
+            _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx03, cx3));
+            _mm_store_si128((__m128i*) &l4[idx4 & MASK], _mm_xor_si128(bx04, cx4));
+
+            idx0 = EXTRACT64(cx0);
+            idx1 = EXTRACT64(cx1);
+            idx2 = EXTRACT64(cx2);
+            idx3 = EXTRACT64(cx3);
+            idx4 = EXTRACT64(cx4);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+            if (VARIANT == POW_V4) {
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx0, cl, &hi);
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
+
+            al0 += hi;
+            ah0 += lo;
+
+            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+            ah0 ^= ch;
+            al0 ^= cl;
+            idx0 = al0;
+
+            bx10 = bx00;
+            bx00 = cx0;
+
+
+            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11)
+
+            if (VARIANT == POW_V4) {
+                al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+                ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+            }
+
+            lo = __umul128(idx1, cl, &hi);
+
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1);
+
+            al1 += hi;
+            ah1 += lo;
+
+            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+            ah1 ^= ch;
+            al1 ^= cl;
+            idx1 = al1;
+
+            bx11 = bx01;
+            bx01 = cx1;
+
+
+            cl = ((uint64_t*) &l2[idx2 & MASK])[0];
+            ch = ((uint64_t*) &l2[idx2 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(2, al2, ah2, cl, bx02, bx12)
+
+            if (VARIANT == POW_V4) {
+                al2 ^= r2[2] | ((uint64_t)(r2[3]) << 32);
+                ah2 ^= r2[0] | ((uint64_t)(r2[1]) << 32);
+            }
+
+            lo = __umul128(idx2, cl, &hi);
+
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2);
+
+            al2 += hi;
+            ah2 += lo;
+
+            ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
+            ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
+
+            ah2 ^= ch;
+            al2 ^= cl;
+            idx2 = al2;
+
+            bx12 = bx02;
+            bx02 = cx2;
+
+
+            cl = ((uint64_t*) &l3[idx3 & MASK])[0];
+            ch = ((uint64_t*) &l3[idx3 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(3, al3, ah3, cl, bx03, bx13)
+
+            if (VARIANT == POW_V4) {
+                al3 ^= r3[2] | ((uint64_t)(r3[3]) << 32);
+                ah3 ^= r3[0] | ((uint64_t)(r3[1]) << 32);
+            }
+
+            lo = __umul128(idx3, cl, &hi);
+
+            SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3);
+
+            al3 += hi;
+            ah3 += lo;
+
+            ((uint64_t*) &l3[idx3 & MASK])[0] = al3;
+            ((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
+
+            ah3 ^= ch;
+            al3 ^= cl;
+            idx3 = al3;
+
+            bx13 = bx03;
+            bx03 = cx3;
+
+
+            cl = ((uint64_t*) &l4[idx4 & MASK])[0];
+            ch = ((uint64_t*) &l4[idx4 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(4, al4, ah4, cl, bx04, bx14)
+
+            if (VARIANT == POW_V4) {
+                al4 ^= r4[2] | ((uint64_t)(r4[3]) << 32);
+                ah4 ^= r4[0] | ((uint64_t)(r4[1]) << 32);
+            }
+
+            lo = __umul128(idx4, cl, &hi);
+
+            SHUFFLE_V4(l4, (idx4&MASK), bx04, bx14, ax4, cx4);
 
             al4 += hi;
             ah4 += lo;
@@ -5496,14 +5522,6 @@ public:
         // not supported
     }
 
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                      size_t size,
-                                      uint8_t* __restrict__ output,
-                                      ScratchPad** __restrict__ scratchPad)
-    {
-        // not supported
-    }
-
     inline static void hashHeavyTube(const uint8_t* __restrict__ input,
                                       size_t size,
                                       uint8_t* __restrict__ output,
diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h
index 836f2822..373c51cc 100644
--- a/src/crypto/CryptoNight_test.h
+++ b/src/crypto/CryptoNight_test.h
@@ -138,6 +138,53 @@ const static uint8_t test_output_xtl_v9[64] = {
     0xF1, 0xF0, 0x55, 0x34, 0x15, 0x29, 0x93, 0x04, 0x2D, 0xED, 0xD2, 0x33, 0x50, 0x6E, 0xBE, 0x25
 };
 
+// CN XCASH
+const static uint8_t test_output_xcash[32] = {
+    0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,
+    0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21
+};
+
+// CN ZELERIUS
+const static uint8_t test_output_zelerius[32] = {
+	0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E,
+	0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2
+};
+
+// CN RWZ
+const static uint8_t test_output_rwz[64] = {
+    0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85,
+    0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8,
+    0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52,
+    0x49, 0x54, 0x0b, 0x91, 0xea, 0x61, 0x7f, 0x98, 0x7d, 0x39, 0x17, 0xb7, 0xd7, 0x65, 0xff, 0x75
+};
+
+// CN V9 aka CN V4/V5 aka CN-R (height 10000)
+const static uint8_t test_output_v4[160] = {
+	0x90, 0x20, 0x14, 0x86, 0x1E, 0xCD, 0x01, 0xC5, 0x43, 0xB5, 0x61, 0xFA, 0xC8, 0x3D, 0xFF, 0x7D,
+	0x76, 0x67, 0xC2, 0xD7, 0xB3, 0xD4, 0xE3, 0x4B, 0x4C, 0x7E, 0x6D, 0x04, 0x31, 0x79, 0xE6, 0x96,
+	0xEA, 0xF4, 0x14, 0x76, 0x38, 0x94, 0x7C, 0xCE, 0x02, 0x50, 0x7A, 0x31, 0xB8, 0x4D, 0xDD, 0x3B,
+	0x92, 0xAA, 0xC6, 0x49, 0xA1, 0x64, 0xA1, 0xA8, 0x7C, 0xD9, 0x43, 0x14, 0xC5, 0x12, 0x86, 0x61,
+	0x0A, 0x18, 0xBD, 0x11, 0x36, 0x06, 0x31, 0x0D, 0x9D, 0xC0, 0x8C, 0x41, 0x88, 0xCB, 0x7C, 0xE9,
+    0x5D, 0xD2, 0xBA, 0xA5, 0xFB, 0x0D, 0x2B, 0xA6, 0x6E, 0x7C, 0x78, 0x72, 0x38, 0xFE, 0x53, 0x17,
+	0x1A, 0x96, 0x89, 0x0E, 0x14, 0xFF, 0x34, 0x42, 0xC0, 0x5A, 0xAB, 0xC0, 0x3F, 0x39, 0x4E, 0x43,
+	0x91, 0x38, 0x67, 0x79, 0x5B, 0xAE, 0xCC, 0xA7, 0xDB, 0x4C, 0xFE, 0x8B, 0x75, 0x76, 0x1F, 0xC4,
+	0x98, 0x71, 0xE6, 0xC1, 0x08, 0x9D, 0xED, 0xCC, 0x47, 0xC3, 0xF3, 0x7A, 0xA9, 0x4A, 0x3A, 0xB9,
+	0xAC, 0xB8, 0x5C, 0x9F, 0xCC, 0xCB, 0xC1, 0x93, 0x9E, 0xC6, 0x6D, 0xCC, 0x45, 0xF4, 0xBA, 0xBD
+};
+
+// CN V9 aka CN V4/V5 aka CN-R (height 10001)
+const static uint8_t test_output_v4_1[32] = {
+	0x82, 0x58, 0x7D, 0x63, 0x7B, 0x6C, 0x0C, 0x96, 0x6A, 0x50, 0xF6, 0xC0, 0xAB, 0xB5, 0xEA, 0x1A,
+	0x58, 0x2B, 0xEA, 0x7E, 0xF0, 0x2F, 0x3C, 0xA1, 0x7C, 0x1C, 0x7C, 0x2E, 0xF9, 0xE5, 0x66, 0xF2
+};
+
+// CN V9 aka CN V4/V5 aka CN-R (height 10002)
+const static uint8_t test_output_v4_2[32] = {
+	0x64, 0xB2, 0x4E, 0x48, 0x4A, 0x28, 0xBF, 0x11, 0xC4, 0x8A, 0x68, 0xE7, 0xB7, 0x4B, 0xFD, 0xA7,
+	0xFB, 0x95, 0x66, 0x05, 0x0C, 0xF7, 0xFA, 0xA7, 0x4B, 0xD9, 0x18, 0x59, 0x88, 0x7F, 0x47, 0xA2
+};
+
+
 // CN-LITE
 const static uint8_t test_output_v0_lite[160] = {
 	0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h
index 0c9127f1..cbe970e7 100644
--- a/src/crypto/CryptoNight_x86.h
+++ b/src/crypto/CryptoNight_x86.h
@@ -36,10 +36,24 @@
 #   define __restrict__ __restrict
 #endif
 
+#define SWAP32LE(x) x
+#define SWAP64LE(x) x
+#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
+
+#ifndef NOINLINE
+#ifdef __GNUC__
+#define NOINLINE __attribute__ ((noinline))
+#elif _MSC_VER
+#define NOINLINE __declspec(noinline)
+#else
+#define NOINLINE
+#endif
+#endif
 
 #include "crypto/CryptoNight.h"
 #include "crypto/soft_aes.h"
 #include "AsmOptimization.h"
+#include "variant4_random_math.h"
 
 extern "C"
 {
@@ -71,6 +85,19 @@ extern "C"
     void cnv2_main_loop_ultralite_bulldozer_asm(ScratchPad* ctx0);
     void cnv2_double_main_loop_ultralite_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
 
+    void cnv2_main_loop_xcash_ivybridge_asm(ScratchPad* ctx0);
+    void cnv2_main_loop_xcash_ryzen_asm(ScratchPad* ctx0);
+    void cnv2_main_loop_xcash_bulldozer_asm(ScratchPad* ctx0);
+    void cnv2_double_main_loop_xcash_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
+
+    void cnv2_main_loop_zelerius_ivybridge_asm(ScratchPad* ctx0);
+    void cnv2_main_loop_zelerius_ryzen_asm(ScratchPad* ctx0);
+    void cnv2_main_loop_zelerius_bulldozer_asm(ScratchPad* ctx0);
+    void cnv2_double_main_loop_zelerius_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
+
+    void cnv2_main_loop_rwz_all_asm(ScratchPad* ctx0);
+    void cnv2_double_main_loop_rwz_all_asm(ScratchPad* ctx0, ScratchPad* ctx1);
+
     void cnv1_main_loop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
     void cnv1_main_loop_lite_soft_aes_sandybridge_asm(ScratchPad* ctx0);
     void cnv1_main_loop_fast_soft_aes_sandybridge_asm(ScratchPad* ctx0);
@@ -80,6 +107,16 @@ extern "C"
     void cnv2_main_loop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
     void cnv2_main_loop_fastv2_soft_aes_sandybridge_asm(ScratchPad* ctx0);
     void cnv2_main_loop_ultralite_soft_aes_sandybridge_asm(ScratchPad* ctx);
+    void cnv2_main_loop_xcash_soft_aes_sandybridge_asm(ScratchPad* ctx);
+    void cnv2_main_loop_zelerius_soft_aes_sandybridge_asm(ScratchPad* ctx);
+
+    void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+    void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+    void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+
+    void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+    void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+    void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
 #endif
 }
 
@@ -148,24 +185,22 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin
 }
 #endif
 
-#ifdef _MSC_VER
-#else
-#endif
-
 #ifdef _MSC_VER
 #   define SET_ROUNDING_MODE_UP() _control87(RC_UP, MCW_RC);
+#   define SET_ROUNDING_MODE_DOWN() _control87(RC_DOWN, MCW_RC);
 #else
 #   define SET_ROUNDING_MODE_UP() std::fesetround(FE_UPWARD);
+#   define SET_ROUNDING_MODE_DOWN() fesetround(FE_DOWNWARD);
 #endif
 
-#   define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax) \
+#   define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax, reverse) \
 { \
-   const __m128i chunk1 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x10))); \
-   const __m128i chunk2 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x20))); \
-   const __m128i chunk3 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x30))); \
-   _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x10)), _mm_add_epi64(chunk3, bx1)); \
-   _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x20)), _mm_add_epi64(chunk1, bx0)); \
-   _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x30)), _mm_add_epi64(chunk2, ax)); \
+    const __m128i chunk1 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x10))); \
+    const __m128i chunk2 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x20))); \
+    const __m128i chunk3 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x30))); \
+    _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x10)), _mm_add_epi64(chunk3, bx1)); \
+    _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x20)), _mm_add_epi64(chunk1, bx0)); \
+    _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x30)), _mm_add_epi64(chunk2, ax)); \
 }
 
 #   define INTEGER_MATH_V2(idx, cl, cx) \
@@ -179,18 +214,47 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin
     sqrt_result##idx = int_sqrt_v2(cx_ + division_result); \
 }
 
-#   define SHUFFLE_PHASE_2(l, idx, bx0, bx1, ax, lo, hi) \
+#   define SHUFFLE_PHASE_2(l, idx, bx0, bx1, ax, lo, hi, reverse) \
 { \
     const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((l) + ((idx) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
     const __m128i chunk2 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x20))); \
     const __m128i chunk3 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x30))); \
     hi ^= ((uint64_t*)((l) + ((idx) ^ 0x20)))[0]; \
     lo ^= ((uint64_t*)((l) + ((idx) ^ 0x20)))[1]; \
+        _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x10)), _mm_add_epi64(chunk3, bx1)); \
+        _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x20)), _mm_add_epi64(chunk1, bx0)); \
+    _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x30)), _mm_add_epi64(chunk2, ax)); \
+}
+
+#   define SHUFFLE_V4(l, idx, bx0, bx1, ax, cx) \
+{ \
+    const __m128i chunk1 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x10))); \
+    const __m128i chunk2 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x20))); \
+    const __m128i chunk3 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x30))); \
     _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x10)), _mm_add_epi64(chunk3, bx1)); \
     _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x20)), _mm_add_epi64(chunk1, bx0)); \
     _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x30)), _mm_add_epi64(chunk2, ax)); \
+    cx = _mm_xor_si128(_mm_xor_si128(cx, chunk3), _mm_xor_si128(chunk1, chunk2)); \
 }
 
+#   define VARIANT4_RANDOM_MATH_INIT(idx, h) \
+    uint32_t r##idx[9]; \
+    struct V4_Instruction code##idx[256]; \
+    r##idx[0] = (uint32_t)(h[12]); \
+    r##idx[1] = (uint32_t)(h[12] >> 32); \
+    r##idx[2] = (uint32_t)(h[13]); \
+    r##idx[3] = (uint32_t)(h[13] >> 32); \
+    v4_random_math_init(code##idx, VARIANT, height);
+
+#   define VARIANT4_RANDOM_MATH(idx, al, ah, cl, bx0, bx1) \
+    cl ^= (r##idx[0] + r##idx[1]) | ((uint64_t)(r##idx[2] + r##idx[3]) << 32); \
+    r##idx[4] = static_cast<uint32_t>(al); \
+    r##idx[5] = static_cast<uint32_t>(ah); \
+    r##idx[6] = static_cast<uint32_t>(_mm_cvtsi128_si32(bx0)); \
+    r##idx[7] = static_cast<uint32_t>(_mm_cvtsi128_si32(bx1)); \
+    r##idx[8] = static_cast<uint32_t>(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \
+    v4_random_math(code##idx, r##idx); \
+
 static inline void do_blake_hash(const uint8_t *input, size_t len, uint8_t *output) {
     blake256_hash(output, input, len);
 }
@@ -592,7 +656,7 @@ return r;
 }
 
 // n-Loop version. Seems to be little bit slower then the hardcoded one.
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, size_t NUM_HASH_BLOCKS>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT, size_t NUM_HASH_BLOCKS>
 class CryptoNightMultiHash
 {
 public:
@@ -601,78 +665,7 @@ public:
                             uint8_t* __restrict__ output,
                             ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-        __m128i ax[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                if (SOFT_AES) {
-                    cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
-                } else {
-                    cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
-                }
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            uint64_t hi, lo, cl, ch;
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                bx[hashBlock] = cx[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                       output + hashBlock * 32);
-        }
+        // dummy
     }
 
     inline static void hashPowV2(const uint8_t* __restrict__ input,
@@ -680,220 +673,53 @@ public:
                             uint8_t* __restrict__ output,
                             ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        uint64_t tweak1_2[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-        __m128i ax[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
-            tweak1_2[hashBlock] = (*reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35 + hashBlock * size) ^
-                    *(reinterpret_cast<const uint64_t*>(scratchPad[hashBlock]->state) + 24));
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                if (SOFT_AES) {
-                    cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
-                } else {
-                    cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
-                }
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK], _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                const uint8_t tmp = reinterpret_cast<const uint8_t *>(&l[hashBlock][idx[hashBlock] & MASK])[11];
-                static const uint32_t table = 0x75310;
-                const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
-                ((uint8_t *) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            uint64_t hi, lo, cl, ch;
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                bx[hashBlock] = cx[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                       output + hashBlock * 32);
-        }
+        // dummy
     }
 
     inline static void hashPowV2_asm(const uint8_t* __restrict__ input,
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
     {
-        // not supported
+        // dummy
     }
 
 
-    // multi
     inline static void hashPowV3(const uint8_t* __restrict__ input,
                                  size_t size,
                                  uint8_t* __restrict__ output,
                                  ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        uint64_t sqrt_result[NUM_HASH_BLOCKS];
-        __m128i bx0[NUM_HASH_BLOCKS];
-        __m128i bx1[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-        __m128i ax[NUM_HASH_BLOCKS];
-        __m128i division_result_xmm[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx0[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            bx1[hashBlock] = _mm_set_epi64x(h[hashBlock][9] ^ h[hashBlock][11], h[hashBlock][8] ^ h[hashBlock][10]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-
-            division_result_xmm[hashBlock] = _mm_cvtsi64_si128(h[hashBlock][12]);
-            sqrt_result[hashBlock] = h[hashBlock][13];
-        }
-
-        SET_ROUNDING_MODE_UP();
-
-        uint64_t sqrt_result0;
-        __m128i division_result_xmm0;
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                if (SOFT_AES) {
-                    cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
-                } else {
-                    cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
-                }
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                SHUFFLE_PHASE_1(l[hashBlock], idx[hashBlock] & MASK, bx0[hashBlock], bx1[hashBlock], ax[hashBlock])
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
-                    _mm_xor_si128(bx0[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            uint64_t hi, lo, cl, ch;
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cl = ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[1];
-
-                sqrt_result0 = sqrt_result[hashBlock];
-                division_result_xmm0 = division_result_xmm[hashBlock];
-
-                INTEGER_MATH_V2(0, cl, cx[hashBlock])
-
-                sqrt_result[hashBlock] = sqrt_result0;
-                division_result_xmm[hashBlock] = division_result_xmm0;
-
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                SHUFFLE_PHASE_2(l[hashBlock], idx[hashBlock] & MASK, bx0[hashBlock], bx1[hashBlock], ax[hashBlock], lo, hi)
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                bx1[hashBlock] = bx0[hashBlock];
-                bx0[hashBlock] = cx[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
+        // dummy
     }
 
     inline static void hashPowV3_asm(const uint8_t* __restrict__ input,
                                  size_t size,
                                  uint8_t* __restrict__ output,
                                  ScratchPad** __restrict__ scratchPad,
-                                 AsmOptimization asmOptimization,
-                                 PowVariant powVariant)
+                                 AsmOptimization asmOptimization)
     {
-        // not supported
+        // dummy
+    }
+
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        // dummy
+    }
+
+    inline static void hashPowV4_asm(const uint8_t* __restrict__ input,
+                                size_t size,
+                                uint8_t* __restrict__ output,
+                                ScratchPad** __restrict__ scratchPad,
+                                uint64_t height,
+                                AsmOptimization asmOptimization)
+    {
+        // dummy
     }
 
     inline static void hashLiteTube(const uint8_t* __restrict__ input,
@@ -901,94 +727,7 @@ public:
                                     uint8_t* __restrict__ output,
                                     ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        uint64_t tweak1_2[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-        __m128i ax[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
-            tweak1_2[hashBlock] = (*reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35 + hashBlock * size) ^
-                                   *(reinterpret_cast<const uint64_t*>(scratchPad[hashBlock]->state) + 24));
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                if (SOFT_AES) {
-                    cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
-                } else {
-                    cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
-                }
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                const uint8_t tmp = reinterpret_cast<const uint8_t *>(&l[hashBlock][idx[hashBlock] & MASK])[11];
-                static const uint32_t table = 0x75310;
-                const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
-                ((uint8_t *) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            uint64_t hi, lo, cl, ch;
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t*)&l[hashBlock][idx[hashBlock] & MASK])[1] ^= ((uint64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                bx[hashBlock] = cx[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                       output + hashBlock * 32);
-        }
+        // dummy
     }
 
     inline static void hashHeavy(const uint8_t* __restrict__ input,
@@ -996,171 +735,7 @@ public:
                                  uint8_t* __restrict__ output,
                                  ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-        __m128i ax[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                if (SOFT_AES) {
-                    cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
-                } else {
-                    cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
-                }
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            uint64_t hi, lo, cl, ch;
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                int64_t n  = ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
-                int32_t d  = ((int32_t*)&l[hashBlock][idx[hashBlock] & MASK])[2];
-                int64_t q = n / (d | 0x5);
-
-                ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
-                idx[hashBlock] = d ^ q;
-
-                bx[hashBlock] = cx[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
-    }
-
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                 size_t size,
-                                 uint8_t* __restrict__ output,
-                                 ScratchPad** __restrict__ scratchPad)
-    {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-        __m128i ax[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                if (SOFT_AES) {
-                    cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
-                } else {
-                    cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-                    cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
-                }
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            uint64_t hi, lo, cl, ch;
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                int64_t n  = ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
-                int32_t d  = ((int32_t*)&l[hashBlock][idx[hashBlock] & MASK])[2];
-                int64_t q = n / (d | 0x5);
-
-                ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
-                idx[hashBlock] = (~d) ^ q;
-
-                bx[hashBlock] = cx[hashBlock];
-            }
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                       output + hashBlock * 32);
-        }
+        // dummy
     }
 
     inline static void hashHeavyTube(const uint8_t* __restrict__ input,
@@ -1168,130 +743,12 @@ public:
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad)
     {
-        const uint8_t* l[NUM_HASH_BLOCKS];
-        uint64_t* h[NUM_HASH_BLOCKS];
-        uint64_t al[NUM_HASH_BLOCKS];
-        uint64_t ah[NUM_HASH_BLOCKS];
-        uint64_t idx[NUM_HASH_BLOCKS];
-        uint64_t tweak1_2[NUM_HASH_BLOCKS];
-        __m128i bx[NUM_HASH_BLOCKS];
-        __m128i cx[NUM_HASH_BLOCKS];
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
-            tweak1_2[hashBlock] = (*reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + 35 + hashBlock * size) ^
-                                   *(reinterpret_cast<const uint64_t*>(scratchPad[hashBlock]->state) + 24));
-        }
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            l[hashBlock] = scratchPad[hashBlock]->memory;
-            h[hashBlock] = reinterpret_cast<uint64_t*>(scratchPad[hashBlock]->state);
-
-            cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
-            al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-            ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
-            bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
-            idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-        }
-
-        union alignas(16) {
-            uint32_t k[4];
-            uint64_t v64[2];
-        };
-        alignas(16) uint32_t x[4];
-
-#define BYTE(p, i) ((unsigned char*)&p)[i]
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                const __m128i &key = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
-                _mm_store_si128((__m128i *) k, key);
-                cx[hashBlock] = _mm_xor_si128(cx[hashBlock], _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
-                _mm_store_si128((__m128i *) x, cx[hashBlock]);
-
-                k[0] ^= saes_table[0][BYTE(x[0], 0)] ^ saes_table[1][BYTE(x[1], 1)] ^ saes_table[2][BYTE(x[2], 2)] ^
-                        saes_table[3][BYTE(x[3], 3)];
-                x[0] ^= k[0];
-                k[1] ^= saes_table[0][BYTE(x[1], 0)] ^ saes_table[1][BYTE(x[2], 1)] ^ saes_table[2][BYTE(x[3], 2)] ^
-                        saes_table[3][BYTE(x[0], 3)];
-                x[1] ^= k[1];
-                k[2] ^= saes_table[0][BYTE(x[2], 0)] ^ saes_table[1][BYTE(x[3], 1)] ^ saes_table[2][BYTE(x[0], 2)] ^
-                        saes_table[3][BYTE(x[1], 3)];
-                x[2] ^= k[2];
-                k[3] ^= saes_table[0][BYTE(x[3], 0)] ^ saes_table[1][BYTE(x[0], 1)] ^ saes_table[2][BYTE(x[1], 2)] ^
-                        saes_table[3][BYTE(x[2], 3)];
-
-                cx[hashBlock] = _mm_load_si128((__m128i *) k);
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
-                                _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                const uint8_t tmp = reinterpret_cast<const uint8_t *>(&l[hashBlock][idx[hashBlock] & MASK])[11];
-                static const uint32_t table = 0x75310;
-                const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
-                ((uint8_t *) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                idx[hashBlock] = EXTRACT64(cx[hashBlock]);
-            }
-
-            for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-                uint64_t hi, lo, cl, ch;
-                cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
-                ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-                lo = __umul128(idx[hashBlock], cl, &hi);
-
-                al[hashBlock] += hi;
-                ah[hashBlock] += lo;
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
-                ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
-                ah[hashBlock] ^= tweak1_2[hashBlock];
-
-                ((uint64_t*)&l[hashBlock][idx[hashBlock] & MASK])[1] ^= ((uint64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
-
-                ah[hashBlock] ^= ch;
-                al[hashBlock] ^= cl;
-                idx[hashBlock] = al[hashBlock];
-
-                int64_t n  = ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
-                int32_t d  = ((int32_t*)&l[hashBlock][idx[hashBlock] & MASK])[2];
-                int64_t q = n / (d | 0x5);
-
-                ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
-                idx[hashBlock] = d ^ q;
-
-                bx[hashBlock] = cx[hashBlock];
-            }
-        }
-
-#undef BYTE
-
-        for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
-            cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
-            keccakf(h[hashBlock], 24);
-            extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
-                                                              output + hashBlock * 32);
-        }
+        // dummy
     }
 };
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 1>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 1>
 {
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -1425,8 +882,7 @@ public:
                                  size_t size,
                                  uint8_t* __restrict__ output,
                                  ScratchPad** __restrict__ scratchPad,
-                                 AsmOptimization asmOptimization,
-                                 PowVariant powVariant)
+                                 AsmOptimization asmOptimization)
     {
         keccak(static_cast<const uint8_t*>(input), (int) size, scratchPad[0]->state, 200);
 
@@ -1447,7 +903,7 @@ public:
         if (SOFT_AES) {
             scratchPad[0]->t_fn = (const uint32_t*)saes_table;
 
-            switch (powVariant)
+            switch (VARIANT)
             {
                 case POW_MSR:
                     cnv1_main_loop_fast_soft_aes_sandybridge_asm(scratchPad[0]);
@@ -1468,7 +924,7 @@ public:
                     break;
             }
         } else {
-            switch (powVariant)
+            switch (VARIANT)
             {
                 case POW_MSR:
                     cnv1_main_loop_fast_sandybridge_asm(scratchPad[0]);
@@ -1533,7 +989,7 @@ public:
                 cx = _mm_aesenc_si128(cx, ax);
             }
 
-            SHUFFLE_PHASE_1(l, (idx&MASK), bx0, bx1, ax)
+            SHUFFLE_PHASE_1(l, (idx&MASK), bx0, bx1, ax, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx0, cx));
 
@@ -1547,7 +1003,7 @@ public:
 
             lo = __umul128(idx, cl, &hi);
 
-            SHUFFLE_PHASE_2(l, (idx&MASK), bx0, bx1, ax, lo, hi)
+            SHUFFLE_PHASE_2(l, (idx&MASK), bx0, bx1, ax, lo, hi, VARIANT == POW_RWZ)
 
             al += hi;        // two fence statements are overhead
             ah += lo;
@@ -1568,14 +1024,12 @@ public:
         extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
     }
 
-
     // single asm
     inline static void hashPowV3_asm(const uint8_t* __restrict__ input,
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
     {
         const uint8_t* l = scratchPad[0]->memory;
         uint64_t* h = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
@@ -1589,7 +1043,7 @@ public:
                 scratchPad[0]->input = input;
                 scratchPad[0]->t_fn = (const uint32_t*)saes_table;
 
-                switch (powVariant)
+                switch (VARIANT)
                 {
                     case POW_FAST_2:
                         cnv2_main_loop_fastv2_soft_aes_sandybridge_asm(scratchPad[0]);
@@ -1597,12 +1051,18 @@ public:
                     case POW_TURTLE:
                         cnv2_main_loop_ultralite_soft_aes_sandybridge_asm(scratchPad[0]);
                         break;
+                    case POW_DOUBLE:
+                        cnv2_main_loop_xcash_soft_aes_sandybridge_asm(scratchPad[0]);
+                        break;
+                    case POW_ZELERIUS:
+                        cnv2_main_loop_zelerius_soft_aes_sandybridge_asm(scratchPad[0]);
+                        break;
                     default:
                         cnv2_main_loop_soft_aes_sandybridge_asm(scratchPad[0]);
                         break;
                 }
             } else {
-                switch (powVariant)
+                switch (VARIANT)
                 {
                     case POW_FAST_2:
                         cnv2_main_loop_fastv2_ivybridge_asm(scratchPad[0]);
@@ -1610,13 +1070,22 @@ public:
                     case POW_TURTLE:
                         cnv2_main_loop_ultralite_ivybridge_asm(scratchPad[0]);
                         break;
+                    case POW_DOUBLE:
+                        cnv2_main_loop_xcash_ivybridge_asm(scratchPad[0]);
+                        break;
+                    case POW_ZELERIUS:
+                        cnv2_main_loop_zelerius_ivybridge_asm(scratchPad[0]);
+                        break;
+                    case POW_RWZ:
+                        cnv2_main_loop_rwz_all_asm(scratchPad[0]);
+                        break;
                     default:
                         cnv2_main_loop_ivybridge_asm(scratchPad[0]);
                         break;
                 }
             }
         } else if (asmOptimization == AsmOptimization::ASM_RYZEN) {
-            switch (powVariant)
+            switch (VARIANT)
             {
                 case POW_FAST_2:
                     cnv2_main_loop_fastv2_ryzen_asm(scratchPad[0]);
@@ -1624,12 +1093,21 @@ public:
                 case POW_TURTLE:
                     cnv2_main_loop_ultralite_ryzen_asm(scratchPad[0]);
                     break;
+                case POW_DOUBLE:
+                    cnv2_main_loop_xcash_ryzen_asm(scratchPad[0]);
+                    break;
+                case POW_ZELERIUS:
+                    cnv2_main_loop_zelerius_ryzen_asm(scratchPad[0]);
+                    break;
+                case POW_RWZ:
+                    cnv2_main_loop_rwz_all_asm(scratchPad[0]);
+                    break;
                 default:
                     cnv2_main_loop_ryzen_asm(scratchPad[0]);
                     break;
             }
         } else if (asmOptimization == AsmOptimization::ASM_BULLDOZER) {
-            switch (powVariant)
+            switch (VARIANT)
             {
                 case POW_FAST_2:
                     cnv2_main_loop_fastv2_bulldozer_asm(scratchPad[0]);
@@ -1637,6 +1115,15 @@ public:
                 case POW_TURTLE:
                     cnv2_main_loop_ultralite_bulldozer_asm(scratchPad[0]);
                     break;
+                case POW_DOUBLE:
+                    cnv2_main_loop_xcash_bulldozer_asm(scratchPad[0]);
+                    break;
+                case POW_ZELERIUS:
+                    cnv2_main_loop_zelerius_bulldozer_asm(scratchPad[0]);
+                    break;
+                case POW_RWZ:
+                    cnv2_main_loop_rwz_all_asm(scratchPad[0]);
+                    break;
                 default:
                     cnv2_main_loop_bulldozer_asm(scratchPad[0]);
                     break;
@@ -1649,6 +1136,140 @@ public:
         extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
     }
 
+    // single
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        keccak(static_cast<const uint8_t*>(input), (int) size, scratchPad[0]->state, 200);
+
+        const uint8_t*l = scratchPad[0]->memory;
+        uint64_t* h = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h, (__m128i*) l);
+
+        uint64_t al = h[0] ^ h[4];
+        uint64_t ah = h[1] ^ h[5];
+
+        __m128i bx0 = _mm_set_epi64x(h[3] ^ h[7], h[2] ^ h[6]);
+        __m128i bx1 = _mm_set_epi64x(h[9] ^ h[11], h[8] ^ h[10]);
+
+        uint64_t idx = h[0] ^ h[4];
+
+        VARIANT4_RANDOM_MATH_INIT(0, h)
+
+        SET_ROUNDING_MODE_UP();
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx;
+
+            const __m128i ax = _mm_set_epi64x(ah, al);
+
+            if (SOFT_AES) {
+                cx = soft_aesenc((uint32_t*)&l[idx & MASK], ax);
+            } else {
+                cx = _mm_load_si128((__m128i*) &l[idx & MASK]);
+                cx = _mm_aesenc_si128(cx, ax);
+            }
+
+            SHUFFLE_V4(l, (idx&MASK), bx0, bx1, ax, cx)
+
+            _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx0, cx));
+
+            idx = EXTRACT64(cx);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l[idx & MASK])[0];
+            ch = ((uint64_t*) &l[idx & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al, ah, cl, bx0, bx1)
+
+            if (VARIANT == POW_V4) {
+                al ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx, cl, &hi);
+
+            SHUFFLE_V4(l, (idx&MASK), bx0, bx1, ax, cx)
+
+            al += hi;        // two fence statements are overhead
+            ah += lo;
+
+            ((uint64_t*) &l[idx & MASK])[0] = al;
+            ((uint64_t*) &l[idx & MASK])[1] = ah;
+
+            ah ^= ch;
+            al ^= cl;
+            idx = al;
+
+            bx1 = bx0;
+            bx0 = cx;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l, (__m128i*) h);
+        keccakf(h, 24);
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+    }
+
+    // single asm
+    inline static void hashPowV4_asm(const uint8_t* __restrict__ input,
+                                     size_t size,
+                                     uint8_t* __restrict__ output,
+                                     ScratchPad** __restrict__ scratchPad,
+                                     uint64_t height,
+                                     AsmOptimization asmOptimization)
+    {
+        const uint8_t* l = scratchPad[0]->memory;
+        uint64_t* h = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+
+        keccak(static_cast<const uint8_t*>(input), (int) size, scratchPad[0]->state, 200);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h, (__m128i*) l);
+
+#ifndef XMRIG_NO_ASM
+        if (SOFT_AES) {
+                if (!scratchPad[0]->generated_code_data.match(VARIANT, height)) {
+                    V4_Instruction code[256];
+                    const int code_size = v4_random_math_init(code, VARIANT, height);
+
+                    if (VARIANT == POW_WOW) {
+                        wow_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(scratchPad[0]->generated_code), ASM_OFF);
+                    } else {
+                        v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(scratchPad[0]->generated_code), ASM_OFF);
+                    }
+
+                    scratchPad[0]->generated_code_data.variant = VARIANT;
+                    scratchPad[0]->generated_code_data.height = height;
+                }
+
+                scratchPad[0]->input = input;
+                scratchPad[0]->t_fn = (const uint32_t*)saes_table;
+                scratchPad[0]->generated_code(scratchPad[0]);
+        } else {
+            if (!scratchPad[0]->generated_code_data.match(VARIANT, height)) {
+                V4_Instruction code[256];
+                const int code_size = v4_random_math_init(code, VARIANT, height);
+
+                if (VARIANT == POW_WOW) {
+                    wow_compile_code(code, code_size, reinterpret_cast<void*>(scratchPad[0]->generated_code), asmOptimization);
+                } else {
+                    v4_compile_code(code, code_size, reinterpret_cast<void*>(scratchPad[0]->generated_code), asmOptimization);
+                }
+
+                scratchPad[0]->generated_code_data.variant = VARIANT;
+                scratchPad[0]->generated_code_data.height = height;
+            }
+
+            scratchPad[0]->generated_code(scratchPad[0]);
+        }
+#endif
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l, (__m128i*) h);
+        keccakf(h, 24);
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+    }
 
     inline static void hashLiteTube(const uint8_t* __restrict__ input,
                                  size_t size,
@@ -1777,73 +1398,12 @@ public:
             int64_t q = n / (d | 0x5);
 
             ((int64_t*)&l[idx & MASK])[0] = n ^ q;
-            idx = d ^ q;
-        }
 
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l, (__m128i*) h);
-        keccakf(h, 24);
-        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
-    }
-
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                 size_t size,
-                                 uint8_t* __restrict__ output,
-                                 ScratchPad** __restrict__ scratchPad)
-    {
-        const uint8_t* l;
-        uint64_t* h;
-        uint64_t al;
-        uint64_t ah;
-        __m128i bx;
-        uint64_t idx;
-
-        keccak(static_cast<const uint8_t*>(input), (int) size, scratchPad[0]->state, 200);
-
-        l = scratchPad[0]->memory;
-        h = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
-
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h, (__m128i*) l);
-
-        al = h[0] ^ h[4];
-        ah = h[1] ^ h[5];
-        bx = _mm_set_epi64x(h[3] ^ h[7], h[2] ^ h[6]);
-        idx = h[0] ^ h[4];
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            __m128i cx;
-
-            if (SOFT_AES) {
-                cx = soft_aesenc((uint32_t*)&l[idx & MASK], _mm_set_epi64x(ah, al));
+            if (VARIANT == POW_XHV) {
+                idx = (~d) ^ q;
             } else {
-                cx = _mm_load_si128((__m128i*) &l[idx & MASK]);
-                cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al));
+                idx = d ^ q;
             }
-
-            _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx));
-            idx = EXTRACT64(cx);
-            bx = cx;
-
-            uint64_t hi, lo, cl, ch;
-            cl = ((uint64_t*) &l[idx & MASK])[0];
-            ch = ((uint64_t*) &l[idx & MASK])[1];
-            lo = __umul128(idx, cl, &hi);
-
-            al += hi;
-            ah += lo;
-
-            ((uint64_t*) &l[idx & MASK])[0] = al;
-            ((uint64_t*) &l[idx & MASK])[1] = ah;
-
-            ah ^= ch;
-            al ^= cl;
-            idx = al;
-
-            int64_t n  = ((int64_t*)&l[idx & MASK])[0];
-            int32_t d  = ((int32_t*)&l[idx & MASK])[2];
-            int64_t q = n / (d | 0x5);
-
-            ((int64_t*)&l[idx & MASK])[0] = n ^ q;
-            idx = (~d) ^ q;
         }
 
         cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l, (__m128i*) h);
@@ -1947,8 +1507,8 @@ public:
     }
 };
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 2>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 2>
 {
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -2154,8 +1714,7 @@ public:
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
     {
         // not supported
     }
@@ -2221,8 +1780,8 @@ public:
                 cx1 = _mm_aesenc_si128(cx1, ax1);
             }
 
-            SHUFFLE_PHASE_1(l0, (idx0 & MASK), bx00, bx10, ax0)
-            SHUFFLE_PHASE_1(l1, (idx1 & MASK), bx01, bx11, ax1)
+            SHUFFLE_PHASE_1(l0, (idx0 & MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l1, (idx1 & MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
             _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -2243,7 +1802,7 @@ public:
 
             lo = __umul128(idx0, cl, &hi);
 
-            SHUFFLE_PHASE_2(l0, (idx0 & MASK), bx00, bx10, ax0, lo, hi)
+            SHUFFLE_PHASE_2(l0, (idx0 & MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
 
             al0 += hi;
             ah0 += lo;
@@ -2312,7 +1871,7 @@ public:
 
             lo = __umul128(idx1, cl, &hi);
 
-            SHUFFLE_PHASE_2(l1, (idx1 & MASK), bx01, bx11, ax1, lo, hi)
+            SHUFFLE_PHASE_2(l1, (idx1 & MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
 
             al1 += hi;
             ah1 += lo;
@@ -2343,8 +1902,7 @@ public:
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
     {
         keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200);
         keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200);
@@ -2358,13 +1916,22 @@ public:
         cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
 
 #ifndef XMRIG_NO_ASM
-        switch(powVariant) {
+        switch(VARIANT) {
             case POW_FAST_2:
                 cnv2_double_main_loop_fastv2_sandybridge_asm(scratchPad[0], scratchPad[1]);
                 break;
             case POW_TURTLE:
                 cnv2_double_main_loop_ultralite_sandybridge_asm(scratchPad[0], scratchPad[1]);
                 break;
+            case POW_DOUBLE:
+                cnv2_double_main_loop_xcash_sandybridge_asm(scratchPad[0], scratchPad[1]);
+                break;
+            case POW_ZELERIUS:
+                cnv2_double_main_loop_zelerius_sandybridge_asm(scratchPad[0], scratchPad[1]);
+                break;
+            case POW_RWZ:
+                cnv2_double_main_loop_rwz_all_asm(scratchPad[0], scratchPad[1]);
+                break;
             default:
                 cnv2_double_main_loop_sandybridge_asm(scratchPad[0], scratchPad[1]);
                 break;
@@ -2380,6 +1947,184 @@ public:
         extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
     }
 
+    // double
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200);
+        keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        const uint8_t* l1 = scratchPad[1]->memory;
+
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+
+        uint64_t al0 = h0[0] ^h0[4];
+        uint64_t al1 = h1[0] ^h1[4];
+
+        uint64_t ah0 = h0[1] ^h0[5];
+        uint64_t ah1 = h1[1] ^h1[5];
+
+        __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+        __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+
+        __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+        __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+
+        uint64_t idx0 = h0[0] ^h0[4];
+        uint64_t idx1 = h1[0] ^h1[4];
+
+        SET_ROUNDING_MODE_UP();
+
+        VARIANT4_RANDOM_MATH_INIT(0, h0)
+        VARIANT4_RANDOM_MATH_INIT(1, h1)
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx0;
+            __m128i cx1;
+
+            const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+            const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+
+            if (SOFT_AES) {
+                cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
+                cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
+            } else {
+                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+
+                cx0 = _mm_aesenc_si128(cx0, ax0);
+                cx1 = _mm_aesenc_si128(cx1, ax1);
+            }
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+
+            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+
+            idx0 = EXTRACT64(cx0);
+            idx1 = EXTRACT64(cx1);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10);
+
+            if (VARIANT == POW_V4) {
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx0, cl, &hi);
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
+
+            al0 += hi;
+            ah0 += lo;
+
+            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+            ah0 ^= ch;
+            al0 ^= cl;
+            idx0 = al0;
+
+            bx10 = bx00;
+            bx00 = cx0;
+
+
+            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11);
+
+            if (VARIANT == POW_V4) {
+                al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+                ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+            }
+
+            lo = __umul128(idx1, cl, &hi);
+
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1);
+
+            al1 += hi;
+            ah1 += lo;
+
+            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+            ah1 ^= ch;
+            al1 ^= cl;
+            idx1 = al1;
+
+            bx11 = bx01;
+            bx01 = cx1;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+
+        keccakf(h0, 24);
+        keccakf(h1, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+    }
+
+    inline static void hashPowV4_asm(const uint8_t* __restrict__ input,
+                                     size_t size,
+                                     uint8_t* __restrict__ output,
+                                     ScratchPad** __restrict__ scratchPad,
+                                     uint64_t height,
+                                     AsmOptimization asmOptimization)
+    {
+        keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200);
+        keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        const uint8_t* l1 = scratchPad[1]->memory;
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+
+#ifndef XMRIG_NO_ASM
+        if (!scratchPad[0]->generated_code_double_data.match(VARIANT, height)) {
+            V4_Instruction code[256];
+            const int code_size = v4_random_math_init(code, VARIANT, height);
+
+            if (VARIANT == POW_WOW) {
+                wow_compile_code_double(code, code_size, reinterpret_cast<void*>(scratchPad[0]->generated_code_double), asmOptimization);
+            } else {
+                v4_compile_code_double(code, code_size, reinterpret_cast<void*>(scratchPad[0]->generated_code_double), asmOptimization);
+            }
+
+            scratchPad[0]->generated_code_double_data.variant = VARIANT;
+            scratchPad[0]->generated_code_double_data.height = height;
+        }
+
+        scratchPad[0]->generated_code_double(scratchPad[0], scratchPad[1]);
+#endif
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+
+        keccakf(h0, 24);
+        keccakf(h1, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+    }
+
     inline static void hashLiteTube(const uint8_t* __restrict__ input,
                                  size_t size,
                                  uint8_t* __restrict__ output,
@@ -2566,120 +2311,14 @@ public:
             int64_t q = n / (d | 0x5);
 
             ((int64_t*)&l0[idx[0] & MASK])[0] = n ^ q;
-            idx[0] = d ^ q;
 
-            bx0 = cx0;
-
-            cl = ((uint64_t*) &l1[idx[1] & MASK])[0];
-            ch = ((uint64_t*) &l1[idx[1] & MASK])[1];
-            lo = __umul128(idx[1], cl, &hi);
-
-            al1 += hi;
-            ah1 += lo;
-
-            ((uint64_t*) &l1[idx[1] & MASK])[0] = al1;
-            ((uint64_t*) &l1[idx[1] & MASK])[1] = ah1;
-
-            ah1 ^= ch;
-            al1 ^= cl;
-            idx[1] = al1;
-
-            n  = ((int64_t*)&l1[idx[1] & MASK])[0];
-            d  = ((int32_t*)&l1[idx[1] & MASK])[2];
-            q = n / (d | 0x5);
-
-            ((int64_t*)&l1[idx[1] & MASK])[0] = n ^ q;
-            idx[1] = d ^ q;
-
-            bx1 = cx1;
-
-        }
-
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
-
-        keccakf(h0, 24);
-        keccakf(h1, 24);
-
-        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
-        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
-    }
-
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                 size_t size,
-                                 uint8_t* __restrict__ output,
-                                 ScratchPad** __restrict__ scratchPad)
-    {
-        keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200);
-        keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200);
-
-        const uint8_t* l0 = scratchPad[0]->memory;
-        const uint8_t* l1 = scratchPad[1]->memory;
-        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
-        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
-
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
-
-        uint64_t al0 = h0[0] ^h0[4];
-        uint64_t al1 = h1[0] ^h1[4];
-        uint64_t ah0 = h0[1] ^h0[5];
-        uint64_t ah1 = h1[1] ^h1[5];
-
-        __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-        __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-
-        uint64_t idx[2];
-
-        idx[0] = al0;
-        idx[1] = al1;
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            __m128i cx0;
-            __m128i cx1;
-
-            if (SOFT_AES) {
-                cx0 = soft_aesenc((uint32_t*)&l0[idx[0] & MASK], _mm_set_epi64x(ah0, al0));
-                cx1 = soft_aesenc((uint32_t*)&l1[idx[1] & MASK], _mm_set_epi64x(ah1, al1));
+            if (VARIANT == POW_XHV) {
+                idx[0] = (~d) ^ q;
             } else {
-                cx0 = _mm_load_si128((__m128i*) &l0[idx[0] & MASK]);
-                cx1 = _mm_load_si128((__m128i*) &l1[idx[1] & MASK]);
-
-                cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
-                cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+                idx[0] = d ^ q;
             }
 
-            _mm_store_si128((__m128i*) &l0[idx[0] & MASK], _mm_xor_si128(bx0, cx0));
-            _mm_store_si128((__m128i*) &l1[idx[1] & MASK], _mm_xor_si128(bx1, cx1));
-
-            idx[0] = EXTRACT64(cx0);
-            idx[1] = EXTRACT64(cx1);
-
             bx0 = cx0;
-            bx1 = cx1;
-
-            uint64_t hi, lo, cl, ch;
-            cl = ((uint64_t*) &l0[idx[0] & MASK])[0];
-            ch = ((uint64_t*) &l0[idx[0] & MASK])[1];
-            lo = __umul128(idx[0], cl, &hi);
-
-            al0 += hi;
-            ah0 += lo;
-
-            ((uint64_t*) &l0[idx[0] & MASK])[0] = al0;
-            ((uint64_t*) &l0[idx[0] & MASK])[1] = ah0;
-
-            ah0 ^= ch;
-            al0 ^= cl;
-            idx[0] = al0;
-
-            int64_t n  = ((int64_t*)&l0[idx[0] & MASK])[0];
-            int32_t d  = ((int32_t*)&l0[idx[0] & MASK])[2];
-            int64_t q = n / (d | 0x5);
-
-            ((int64_t*)&l0[idx[0] & MASK])[0] = n ^ q;
-            idx[0] = (~d) ^ q;
-
 
             cl = ((uint64_t*) &l1[idx[1] & MASK])[0];
             ch = ((uint64_t*) &l1[idx[1] & MASK])[1];
@@ -2700,7 +2339,15 @@ public:
             q = n / (d | 0x5);
 
             ((int64_t*)&l1[idx[1] & MASK])[0] = n ^ q;
-            idx[1] = (~d) ^ q;
+
+            if (VARIANT == POW_XHV) {
+                idx[1] = (~d) ^ q;
+            } else {
+                idx[1] = d ^ q;
+            }
+
+            bx1 = cx1;
+
         }
 
         cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
@@ -2870,8 +2517,8 @@ public:
     }
 };
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 3>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 3>
 {
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -3154,8 +2801,7 @@ public:
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
     {
         // not supported
     }
@@ -3234,9 +2880,9 @@ public:
                 cx2 = _mm_aesenc_si128(cx2, ax2);
             }
 
-            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
-            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
-            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2)
+            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
             _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -3254,7 +2900,7 @@ public:
 
             lo = __umul128(idx0, cl, &hi);
 
-            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
 
             al0 += hi;
             ah0 += lo;
@@ -3277,7 +2923,7 @@ public:
 
             lo = __umul128(idx1, cl, &hi);
 
-            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
 
             al1 += hi;
             ah1 += lo;
@@ -3299,7 +2945,7 @@ public:
 
             lo = __umul128(idx2, cl, &hi);
 
-            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi)
+            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
 
             al2 += hi;
             ah2 += lo;
@@ -3332,8 +2978,197 @@ public:
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
+    {
+        // not supported
+    }
+
+    // triple
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200);
+        keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200);
+        keccak((const uint8_t*) input + 2 * size, (int) size, scratchPad[2]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        const uint8_t* l1 = scratchPad[1]->memory;
+        const uint8_t* l2 = scratchPad[2]->memory;
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
+        uint64_t* h2 = reinterpret_cast<uint64_t*>(scratchPad[2]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
+
+        uint64_t al0 = h0[0] ^h0[4];
+        uint64_t al1 = h1[0] ^h1[4];
+        uint64_t al2 = h2[0] ^h2[4];
+
+        uint64_t ah0 = h0[1] ^h0[5];
+        uint64_t ah1 = h1[1] ^h1[5];
+        uint64_t ah2 = h2[1] ^h2[5];
+
+        __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+        __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+        __m128i bx02 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+
+        __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+        __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+        __m128i bx12 = _mm_set_epi64x(h2[9] ^ h2[11], h2[8] ^ h2[10]);
+
+        uint64_t idx0 = h0[0] ^h0[4];
+        uint64_t idx1 = h1[0] ^h1[4];
+        uint64_t idx2 = h2[0] ^h2[4];
+
+        SET_ROUNDING_MODE_UP();
+
+        VARIANT4_RANDOM_MATH_INIT(0, h0)
+        VARIANT4_RANDOM_MATH_INIT(1, h1)
+        VARIANT4_RANDOM_MATH_INIT(2, h2)
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx0;
+            __m128i cx1;
+            __m128i cx2;
+
+            const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+            const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+            const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+
+            if (SOFT_AES) {
+                cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
+                cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
+                cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], ax2);
+            } else {
+                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+                cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
+
+                cx0 = _mm_aesenc_si128(cx0, ax0);
+                cx1 = _mm_aesenc_si128(cx1, ax1);
+                cx2 = _mm_aesenc_si128(cx2, ax2);
+            }
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+
+            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+            _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx02, cx2));
+
+            idx0 = EXTRACT64(cx0);
+            idx1 = EXTRACT64(cx1);
+            idx2 = EXTRACT64(cx2);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10);
+
+            if (VARIANT == POW_V4) {
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx0, cl, &hi);
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
+
+            al0 += hi;
+            ah0 += lo;
+
+            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+            ah0 ^= ch;
+            al0 ^= cl;
+            idx0 = al0;
+
+            bx10 = bx00;
+            bx00 = cx0;
+
+
+            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11);
+
+            if (VARIANT == POW_V4) {
+                al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+                ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+            }
+
+            lo = __umul128(idx1, cl, &hi);
+
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+
+            al1 += hi;
+            ah1 += lo;
+
+            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+            ah1 ^= ch;
+            al1 ^= cl;
+            idx1 = al1;
+
+            bx11 = bx01;
+            bx01 = cx1;
+
+            cl = ((uint64_t*) &l2[idx2 & MASK])[0];
+            ch = ((uint64_t*) &l2[idx2 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(2, al2, ah2, cl, bx02, bx12);
+
+            if (VARIANT == POW_V4) {
+                al2 ^= r2[2] | ((uint64_t)(r2[3]) << 32);
+                ah2 ^= r2[0] | ((uint64_t)(r2[1]) << 32);
+            }
+
+            lo = __umul128(idx2, cl, &hi);
+
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+
+            al2 += hi;
+            ah2 += lo;
+
+            ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
+            ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
+
+            ah2 ^= ch;
+            al2 ^= cl;
+            idx2 = al2;
+
+            bx12 = bx02;
+            bx02 = cx2;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
+
+        keccakf(h0, 24);
+        keccakf(h1, 24);
+        keccakf(h2, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+        extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
+    }
+
+    inline static void hashPowV4_asm(const uint8_t* __restrict__ input,
+                                     size_t size,
+                                     uint8_t* __restrict__ output,
+                                     ScratchPad** __restrict__ scratchPad,
+                                     uint64_t height,
+                                     AsmOptimization asmOptimization)
     {
         // not supported
     }
@@ -3582,158 +3417,13 @@ public:
             int64_t q = n / (d | 0x5);
 
             ((int64_t*)&l0[idx[0] & MASK])[0] = n ^ q;
-            idx[0] = d ^ q;
 
-
-            cl = ((uint64_t*) &l1[idx[1] & MASK])[0];
-            ch = ((uint64_t*) &l1[idx[1] & MASK])[1];
-            lo = __umul128(idx[1], cl, &hi);
-
-            al1 += hi;
-            ah1 += lo;
-
-            ((uint64_t*) &l1[idx[1] & MASK])[0] = al1;
-            ((uint64_t*) &l1[idx[1] & MASK])[1] = ah1;
-
-            ah1 ^= ch;
-            al1 ^= cl;
-            idx[1] = al1;
-
-            n  = ((int64_t*)&l1[idx[1] & MASK])[0];
-            d  = ((int32_t*)&l1[idx[1] & MASK])[2];
-            q = n / (d | 0x5);
-
-            ((int64_t*)&l1[idx[1] & MASK])[0] = n ^ q;
-            idx[1] = d ^ q;
-
-
-            cl = ((uint64_t*) &l2[idx[2] & MASK])[0];
-            ch = ((uint64_t*) &l2[idx[2] & MASK])[1];
-            lo = __umul128(idx[2], cl, &hi);
-
-            al2 += hi;
-            ah2 += lo;
-
-            ((uint64_t*) &l2[idx[2] & MASK])[0] = al2;
-            ((uint64_t*) &l2[idx[2] & MASK])[1] = ah2;
-
-            ah2 ^= ch;
-            al2 ^= cl;
-            idx[2] = al2;
-
-            n  = ((int64_t*)&l2[idx[2] & MASK])[0];
-            d  = ((int32_t*)&l2[idx[2] & MASK])[2];
-            q = n / (d | 0x5);
-
-            ((int64_t*)&l2[idx[2] & MASK])[0] = n ^ q;
-            idx[2] = d ^ q;
-        }
-
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
-        cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
-
-        keccakf(h0, 24);
-        keccakf(h1, 24);
-        keccakf(h2, 24);
-
-        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
-        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
-        extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
-    }
-
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                 size_t size,
-                                 uint8_t* __restrict__ output,
-                                 ScratchPad** __restrict__ scratchPad)
-    {
-        keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200);
-        keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200);
-        keccak((const uint8_t*) input + 2 * size, (int) size, scratchPad[2]->state, 200);
-
-        const uint8_t* l0 = scratchPad[0]->memory;
-        const uint8_t* l1 = scratchPad[1]->memory;
-        const uint8_t* l2 = scratchPad[2]->memory;
-        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
-        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
-        uint64_t* h2 = reinterpret_cast<uint64_t*>(scratchPad[2]->state);
-
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
-        cn_explode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
-
-        uint64_t al0 = h0[0] ^h0[4];
-        uint64_t al1 = h1[0] ^h1[4];
-        uint64_t al2 = h2[0] ^h2[4];
-        uint64_t ah0 = h0[1] ^h0[5];
-        uint64_t ah1 = h1[1] ^h1[5];
-        uint64_t ah2 = h2[1] ^h2[5];
-
-        __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-        __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-        __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
-
-        uint64_t idx[2];
-
-        idx[0] = al0;
-        idx[1] = al1;
-        idx[2] = al2;
-
-        for (size_t i = 0; i < ITERATIONS; i++) {
-            __m128i cx0;
-            __m128i cx1;
-            __m128i cx2;
-
-            if (SOFT_AES) {
-                cx0 = soft_aesenc((uint32_t*)&l0[idx[0] & MASK], _mm_set_epi64x(ah0, al0));
-                cx1 = soft_aesenc((uint32_t*)&l1[idx[1] & MASK], _mm_set_epi64x(ah1, al1));
-                cx2 = soft_aesenc((uint32_t*)&l2[idx[2] & MASK], _mm_set_epi64x(ah2, al2));
+            if (VARIANT == POW_XHV) {
+                idx[0] = (~d) ^ q;
             } else {
-                cx0 = _mm_load_si128((__m128i*) &l0[idx[0] & MASK]);
-                cx1 = _mm_load_si128((__m128i*) &l1[idx[1] & MASK]);
-                cx2 = _mm_load_si128((__m128i*) &l2[idx[2] & MASK]);
-
-                cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
-                cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
-                cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
+                idx[0] = d ^ q;
             }
 
-            _mm_store_si128((__m128i*) &l0[idx[0] & MASK], _mm_xor_si128(bx0, cx0));
-            _mm_store_si128((__m128i*) &l1[idx[1] & MASK], _mm_xor_si128(bx1, cx1));
-            _mm_store_si128((__m128i*) &l2[idx[2] & MASK], _mm_xor_si128(bx2, cx2));
-
-            idx[0] = EXTRACT64(cx0);
-            idx[1] = EXTRACT64(cx1);
-            idx[2] = EXTRACT64(cx2);
-
-            bx0 = cx0;
-            bx1 = cx1;
-            bx2 = cx2;
-
-
-            uint64_t hi, lo, cl, ch;
-            cl = ((uint64_t*) &l0[idx[0] & MASK])[0];
-            ch = ((uint64_t*) &l0[idx[0] & MASK])[1];
-            lo = __umul128(idx[0], cl, &hi);
-
-            al0 += hi;
-            ah0 += lo;
-
-            ((uint64_t*) &l0[idx[0] & MASK])[0] = al0;
-            ((uint64_t*) &l0[idx[0] & MASK])[1] = ah0;
-
-            ah0 ^= ch;
-            al0 ^= cl;
-            idx[0] = al0;
-
-            int64_t n  = ((int64_t*)&l0[idx[0] & MASK])[0];
-            int32_t d  = ((int32_t*)&l0[idx[0] & MASK])[2];
-            int64_t q = n / (d | 0x5);
-
-            ((int64_t*)&l0[idx[0] & MASK])[0] = n ^ q;
-            idx[0] = (~d) ^ q;
-
-
             cl = ((uint64_t*) &l1[idx[1] & MASK])[0];
             ch = ((uint64_t*) &l1[idx[1] & MASK])[1];
             lo = __umul128(idx[1], cl, &hi);
@@ -3753,8 +3443,12 @@ public:
             q = n / (d | 0x5);
 
             ((int64_t*)&l1[idx[1] & MASK])[0] = n ^ q;
-            idx[1] = (~d) ^ q;
 
+            if (VARIANT == POW_XHV) {
+                idx[1] = (~d) ^ q;
+            } else {
+                idx[1] = d ^ q;
+            }
 
             cl = ((uint64_t*) &l2[idx[2] & MASK])[0];
             ch = ((uint64_t*) &l2[idx[2] & MASK])[1];
@@ -3775,7 +3469,12 @@ public:
             q = n / (d | 0x5);
 
             ((int64_t*)&l2[idx[2] & MASK])[0] = n ^ q;
-            idx[2] = (~d) ^ q;
+
+            if (VARIANT == POW_XHV) {
+                idx[2] = (~d) ^ q;
+            } else {
+                idx[2] = d ^ q;
+            }
         }
 
         cn_implode_scratchpad_heavy<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
@@ -4010,8 +3709,8 @@ public:
     }
 };
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 4>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 4>
 {
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -4367,8 +4066,7 @@ public:
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
     {
         // not supported
     }
@@ -4464,10 +4162,10 @@ public:
                 cx3 = _mm_aesenc_si128(cx3, ax3);
             }
 
-            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
-            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
-            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2)
-            SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3)
+            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
             _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -4487,7 +4185,7 @@ public:
 
             lo = __umul128(idx0, cl, &hi);
 
-            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
 
             al0 += hi;
             ah0 += lo;
@@ -4510,7 +4208,7 @@ public:
 
             lo = __umul128(idx1, cl, &hi);
 
-            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
 
             al1 += hi;
             ah1 += lo;
@@ -4533,7 +4231,7 @@ public:
 
             lo = __umul128(idx2, cl, &hi);
 
-            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi);
+            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
 
             al2 += hi;
             ah2 += lo;
@@ -4556,7 +4254,7 @@ public:
 
             lo = __umul128(idx3, cl, &hi);
 
-            SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi);
+            SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
 
             al3 += hi;
             ah3 += lo;
@@ -4592,8 +4290,249 @@ public:
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
+    {
+        // not supported
+    }
+
+    // quadruple
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200);
+        keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200);
+        keccak((const uint8_t*) input + 2 * size, (int) size, scratchPad[2]->state, 200);
+        keccak((const uint8_t*) input + 3 * size, (int) size, scratchPad[3]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        const uint8_t* l1 = scratchPad[1]->memory;
+        const uint8_t* l2 = scratchPad[2]->memory;
+        const uint8_t* l3 = scratchPad[3]->memory;
+
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
+        uint64_t* h2 = reinterpret_cast<uint64_t*>(scratchPad[2]->state);
+        uint64_t* h3 = reinterpret_cast<uint64_t*>(scratchPad[3]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h3, (__m128i*) l3);
+
+        uint64_t al0 = h0[0] ^h0[4];
+        uint64_t al1 = h1[0] ^h1[4];
+        uint64_t al2 = h2[0] ^h2[4];
+        uint64_t al3 = h3[0] ^h3[4];
+
+        uint64_t ah0 = h0[1] ^h0[5];
+        uint64_t ah1 = h1[1] ^h1[5];
+        uint64_t ah2 = h2[1] ^h2[5];
+        uint64_t ah3 = h3[1] ^h3[5];
+
+        __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+        __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+        __m128i bx02 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+        __m128i bx03 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
+
+        __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+        __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+        __m128i bx12 = _mm_set_epi64x(h2[9] ^ h2[11], h2[8] ^ h2[10]);
+        __m128i bx13 = _mm_set_epi64x(h3[9] ^ h3[11], h3[8] ^ h3[10]);
+
+        uint64_t idx0 = h0[0] ^h0[4];
+        uint64_t idx1 = h1[0] ^h1[4];
+        uint64_t idx2 = h2[0] ^h2[4];
+        uint64_t idx3 = h3[0] ^h3[4];
+
+        SET_ROUNDING_MODE_UP();
+
+        VARIANT4_RANDOM_MATH_INIT(0, h0)
+        VARIANT4_RANDOM_MATH_INIT(1, h1)
+        VARIANT4_RANDOM_MATH_INIT(2, h2)
+        VARIANT4_RANDOM_MATH_INIT(3, h3)
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx0;
+            __m128i cx1;
+            __m128i cx2;
+            __m128i cx3;
+
+            const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+            const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+            const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+            const __m128i ax3 = _mm_set_epi64x(ah3, al3);
+
+            if (SOFT_AES) {
+                cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
+                cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
+                cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], ax2);
+                cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], ax3);
+            } else {
+                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+                cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
+                cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
+
+                cx0 = _mm_aesenc_si128(cx0, ax0);
+                cx1 = _mm_aesenc_si128(cx1, ax1);
+                cx2 = _mm_aesenc_si128(cx2, ax2);
+                cx3 = _mm_aesenc_si128(cx3, ax3);
+            }
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+            SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3)
+
+            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+            _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx02, cx2));
+            _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx03, cx3));
+
+            idx0 = EXTRACT64(cx0);
+            idx1 = EXTRACT64(cx1);
+            idx2 = EXTRACT64(cx2);
+            idx3 = EXTRACT64(cx3);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10);
+
+            if (VARIANT == POW_V4) {
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx0, cl, &hi);
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
+
+            al0 += hi;
+            ah0 += lo;
+
+            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+            ah0 ^= ch;
+            al0 ^= cl;
+            idx0 = al0;
+
+            bx10 = bx00;
+            bx00 = cx0;
+
+
+            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11);
+
+            if (VARIANT == POW_V4) {
+                al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+                ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+            }
+
+            lo = __umul128(idx1, cl, &hi);
+
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1);
+
+            al1 += hi;
+            ah1 += lo;
+
+            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+            ah1 ^= ch;
+            al1 ^= cl;
+            idx1 = al1;
+
+            bx11 = bx01;
+            bx01 = cx1;
+
+
+            cl = ((uint64_t*) &l2[idx2 & MASK])[0];
+            ch = ((uint64_t*) &l2[idx2 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(2, al2, ah2, cl, bx02, bx12);
+
+            if (VARIANT == POW_V4) {
+                al2 ^= r2[2] | ((uint64_t)(r2[3]) << 32);
+                ah2 ^= r2[0] | ((uint64_t)(r2[1]) << 32);
+            }
+
+            lo = __umul128(idx2, cl, &hi);
+
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2);
+
+            al2 += hi;
+            ah2 += lo;
+
+            ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
+            ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
+
+            ah2 ^= ch;
+            al2 ^= cl;
+            idx2 = al2;
+
+            bx12 = bx02;
+            bx02 = cx2;
+
+
+            cl = ((uint64_t*) &l3[idx3 & MASK])[0];
+            ch = ((uint64_t*) &l3[idx3 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(3, al3, ah3, cl, bx03, bx13);
+
+            if (VARIANT == POW_V4) {
+                al3 ^= r3[2] | ((uint64_t)(r3[3]) << 32);
+                ah3 ^= r3[0] | ((uint64_t)(r3[1]) << 32);
+            }
+
+            lo = __umul128(idx3, cl, &hi);
+
+            SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3);
+
+            al3 += hi;
+            ah3 += lo;
+
+            ((uint64_t*) &l3[idx3 & MASK])[0] = al3;
+            ((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
+
+            ah3 ^= ch;
+            al3 ^= cl;
+            idx3 = al3;
+
+            bx13 = bx03;
+            bx03 = cx3;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l3, (__m128i*) h3);
+
+        keccakf(h0, 24);
+        keccakf(h1, 24);
+        keccakf(h2, 24);
+        keccakf(h3, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+        extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
+        extra_hashes[scratchPad[3]->state[0] & 3](scratchPad[3]->state, 200, output + 96);
+    }
+
+
+    inline static void hashPowV4_asm(const uint8_t* __restrict__ input,
+                                     size_t size,
+                                     uint8_t* __restrict__ output,
+                                     ScratchPad** __restrict__ scratchPad,
+                                     uint64_t height,
+                                     AsmOptimization asmOptimization)
     {
         // not supported
     }
@@ -4804,14 +4743,6 @@ public:
         // not supported
     }
 
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                      size_t size,
-                                      uint8_t* __restrict__ output,
-                                      ScratchPad** __restrict__ scratchPad)
-    {
-        // not supported
-    }
-
     inline static void hashHeavyTube(const uint8_t* __restrict__ input,
                                      size_t size,
                                      uint8_t* __restrict__ output,
@@ -4821,8 +4752,8 @@ public:
     }
 };
 
-template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES>
-class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, 5>
+template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT>
+class CryptoNightMultiHash<ITERATIONS, INDEX_SHIFT, MEM, MASK, SOFT_AES, VARIANT, 5>
 {
 public:
     inline static void hash(const uint8_t* __restrict__ input,
@@ -5250,8 +5181,7 @@ public:
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
     {
         // not supported
     }
@@ -5362,11 +5292,11 @@ public:
                 cx4 = _mm_aesenc_si128(cx4, ax4);
             }
 
-            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
-            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
-            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2)
-            SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3)
-            SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4)
+            SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
+            SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ)
 
             _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
             _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -5388,7 +5318,7 @@ public:
 
             lo = __umul128(idx0, cl, &hi);
 
-            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+            SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
 
             al0 += hi;
             ah0 += lo;
@@ -5411,7 +5341,7 @@ public:
 
             lo = __umul128(idx1, cl, &hi);
 
-            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+            SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
 
             al1 += hi;
             ah1 += lo;
@@ -5434,7 +5364,7 @@ public:
 
             lo = __umul128(idx2, cl, &hi);
 
-            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi);
+            SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
 
             al2 += hi;
             ah2 += lo;
@@ -5457,7 +5387,7 @@ public:
 
             lo = __umul128(idx3, cl, &hi);
 
-            SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi);
+            SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
 
             al3 += hi;
             ah3 += lo;
@@ -5480,7 +5410,7 @@ public:
 
             lo = __umul128(idx4, cl, &hi);
 
-            SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi);
+            SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ)
 
             al4 += hi;
             ah4 += lo;
@@ -5519,8 +5449,296 @@ public:
                                      size_t size,
                                      uint8_t* __restrict__ output,
                                      ScratchPad** __restrict__ scratchPad,
-                                     AsmOptimization asmOptimization,
-                                     PowVariant powVariant)
+                                     AsmOptimization asmOptimization)
+    {
+        // not supported
+    }
+
+    // quintuple
+    inline static void hashPowV4(const uint8_t* __restrict__ input,
+                                 size_t size,
+                                 uint8_t* __restrict__ output,
+                                 ScratchPad** __restrict__ scratchPad,
+                                 uint64_t height)
+    {
+        keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200);
+        keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200);
+        keccak((const uint8_t*) input + 2 * size, (int) size, scratchPad[2]->state, 200);
+        keccak((const uint8_t*) input + 3 * size, (int) size, scratchPad[3]->state, 200);
+        keccak((const uint8_t*) input + 4 * size, (int) size, scratchPad[4]->state, 200);
+
+        const uint8_t* l0 = scratchPad[0]->memory;
+        const uint8_t* l1 = scratchPad[1]->memory;
+        const uint8_t* l2 = scratchPad[2]->memory;
+        const uint8_t* l3 = scratchPad[3]->memory;
+        const uint8_t* l4 = scratchPad[4]->memory;
+        uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
+        uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
+        uint64_t* h2 = reinterpret_cast<uint64_t*>(scratchPad[2]->state);
+        uint64_t* h3 = reinterpret_cast<uint64_t*>(scratchPad[3]->state);
+        uint64_t* h4 = reinterpret_cast<uint64_t*>(scratchPad[4]->state);
+
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h3, (__m128i*) l3);
+        cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h4, (__m128i*) l4);
+
+        uint64_t al0 = h0[0] ^h0[4];
+        uint64_t al1 = h1[0] ^h1[4];
+        uint64_t al2 = h2[0] ^h2[4];
+        uint64_t al3 = h3[0] ^h3[4];
+        uint64_t al4 = h4[0] ^h4[4];
+
+        uint64_t ah0 = h0[1] ^h0[5];
+        uint64_t ah1 = h1[1] ^h1[5];
+        uint64_t ah2 = h2[1] ^h2[5];
+        uint64_t ah3 = h3[1] ^h3[5];
+        uint64_t ah4 = h4[1] ^h4[5];
+
+        __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+        __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+        __m128i bx02 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+        __m128i bx03 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
+        __m128i bx04 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
+
+        __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+        __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+        __m128i bx12 = _mm_set_epi64x(h2[9] ^ h2[11], h2[8] ^ h2[10]);
+        __m128i bx13 = _mm_set_epi64x(h3[9] ^ h3[11], h3[8] ^ h3[10]);
+        __m128i bx14 = _mm_set_epi64x(h4[9] ^ h4[11], h4[8] ^ h4[10]);
+
+        uint64_t idx0 = h0[0] ^h0[4];
+        uint64_t idx1 = h1[0] ^h1[4];
+        uint64_t idx2 = h2[0] ^h2[4];
+        uint64_t idx3 = h3[0] ^h3[4];
+        uint64_t idx4 = h4[0] ^h4[4];
+
+        SET_ROUNDING_MODE_UP()
+
+        VARIANT4_RANDOM_MATH_INIT(0, h0)
+        VARIANT4_RANDOM_MATH_INIT(1, h1)
+        VARIANT4_RANDOM_MATH_INIT(2, h2)
+        VARIANT4_RANDOM_MATH_INIT(3, h3)
+        VARIANT4_RANDOM_MATH_INIT(4, h4)
+
+        for (size_t i = 0; i < ITERATIONS; i++) {
+            __m128i cx0;
+            __m128i cx1;
+            __m128i cx2;
+            __m128i cx3;
+            __m128i cx4;
+
+            const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+            const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+            const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+            const __m128i ax3 = _mm_set_epi64x(ah3, al3);
+            const __m128i ax4 = _mm_set_epi64x(ah4, al4);
+
+            if (SOFT_AES) {
+                cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
+                cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
+                cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], ax2);
+                cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], ax3);
+                cx4 = soft_aesenc((uint32_t*)&l4[idx4 & MASK], ax4);
+            } else {
+                cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+                cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+                cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
+                cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
+                cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]);
+
+                cx0 = _mm_aesenc_si128(cx0, ax0);
+                cx1 = _mm_aesenc_si128(cx1, ax1);
+                cx2 = _mm_aesenc_si128(cx2, ax2);
+                cx3 = _mm_aesenc_si128(cx3, ax3);
+                cx4 = _mm_aesenc_si128(cx4, ax4);
+            }
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+            SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3)
+            SHUFFLE_V4(l4, (idx4&MASK), bx04, bx14, ax4, cx4)
+
+            _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+            _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+            _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx02, cx2));
+            _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx03, cx3));
+            _mm_store_si128((__m128i*) &l4[idx4 & MASK], _mm_xor_si128(bx04, cx4));
+
+            idx0 = EXTRACT64(cx0);
+            idx1 = EXTRACT64(cx1);
+            idx2 = EXTRACT64(cx2);
+            idx3 = EXTRACT64(cx3);
+            idx4 = EXTRACT64(cx4);
+
+            uint64_t hi, lo, cl, ch;
+            cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+            ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10);
+
+            if (VARIANT == POW_V4) {
+                al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+                ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+            }
+
+            lo = __umul128(idx0, cl, &hi);
+
+            SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+
+            al0 += hi;
+            ah0 += lo;
+
+            ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+            ah0 ^= ch;
+            al0 ^= cl;
+            idx0 = al0;
+
+            bx10 = bx00;
+            bx00 = cx0;
+
+
+            cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+            ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11)
+
+            if (VARIANT == POW_V4) {
+                al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+                ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+            }
+
+            lo = __umul128(idx1, cl, &hi);
+
+            SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1);
+
+            al1 += hi;
+            ah1 += lo;
+
+            ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+            ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+            ah1 ^= ch;
+            al1 ^= cl;
+            idx1 = al1;
+
+            bx11 = bx01;
+            bx01 = cx1;
+
+
+            cl = ((uint64_t*) &l2[idx2 & MASK])[0];
+            ch = ((uint64_t*) &l2[idx2 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(2, al2, ah2, cl, bx02, bx12);
+
+            if (VARIANT == POW_V4) {
+                al2 ^= r2[2] | ((uint64_t)(r2[3]) << 32);
+                ah2 ^= r2[0] | ((uint64_t)(r2[1]) << 32);
+            }
+
+            lo = __umul128(idx2, cl, &hi);
+
+            SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2);
+
+            al2 += hi;
+            ah2 += lo;
+
+            ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
+            ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
+
+            ah2 ^= ch;
+            al2 ^= cl;
+            idx2 = al2;
+
+            bx12 = bx02;
+            bx02 = cx2;
+
+
+            cl = ((uint64_t*) &l3[idx3 & MASK])[0];
+            ch = ((uint64_t*) &l3[idx3 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(3, al3, ah3, cl, bx03, bx13);
+
+            if (VARIANT == POW_V4) {
+                al3 ^= r3[2] | ((uint64_t)(r3[3]) << 32);
+                ah3 ^= r3[0] | ((uint64_t)(r3[1]) << 32);
+            }
+
+            lo = __umul128(idx3, cl, &hi);
+
+            SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3);
+
+            al3 += hi;
+            ah3 += lo;
+
+            ((uint64_t*) &l3[idx3 & MASK])[0] = al3;
+            ((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
+
+            ah3 ^= ch;
+            al3 ^= cl;
+            idx3 = al3;
+
+            bx13 = bx03;
+            bx03 = cx3;
+
+
+            cl = ((uint64_t*) &l4[idx4 & MASK])[0];
+            ch = ((uint64_t*) &l4[idx4 & MASK])[1];
+
+            VARIANT4_RANDOM_MATH(4, al4, ah4, cl, bx04, bx14);
+
+            if (VARIANT == POW_V4) {
+                al4 ^= r4[2] | ((uint64_t)(r4[3]) << 32);
+                ah4 ^= r4[0] | ((uint64_t)(r4[1]) << 32);
+            }
+
+            lo = __umul128(idx4, cl, &hi);
+
+            SHUFFLE_V4(l4, (idx4&MASK), bx04, bx14, ax4, cx4);
+
+            al4 += hi;
+            ah4 += lo;
+
+            ((uint64_t*) &l4[idx4 & MASK])[0] = al4;
+            ((uint64_t*) &l4[idx4 & MASK])[1] = ah4;
+
+            ah4 ^= ch;
+            al4 ^= cl;
+            idx4 = al4;
+
+            bx14 = bx04;
+            bx04 = cx4;
+        }
+
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l3, (__m128i*) h3);
+        cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l4, (__m128i*) h4);
+
+        keccakf(h0, 24);
+        keccakf(h1, 24);
+        keccakf(h2, 24);
+        keccakf(h3, 24);
+        keccakf(h4, 24);
+
+        extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+        extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+        extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
+        extra_hashes[scratchPad[3]->state[0] & 3](scratchPad[3]->state, 200, output + 96);
+        extra_hashes[scratchPad[4]->state[0] & 3](scratchPad[4]->state, 200, output + 128);
+    }
+
+    inline static void hashPowV4_asm(const uint8_t* __restrict__ input,
+                                     size_t size,
+                                     uint8_t* __restrict__ output,
+                                     ScratchPad** __restrict__ scratchPad,
+                                     uint64_t height,
+                                     AsmOptimization asmOptimization)
     {
         // not supported
     }
@@ -5773,14 +5991,6 @@ public:
         // not supported
     }
 
-    inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
-                                      size_t size,
-                                      uint8_t* __restrict__ output,
-                                      ScratchPad** __restrict__ scratchPad)
-    {
-        // not supported
-    }
-
     inline static void hashHeavyTube(const uint8_t* __restrict__ input,
                                      size_t size,
                                      uint8_t* __restrict__ output,
diff --git a/src/crypto/asm/CryptonightR_soft_aes_template.inc b/src/crypto/asm/CryptonightR_soft_aes_template.inc
new file mode 100644
index 00000000..40c7874d
--- /dev/null
+++ b/src/crypto/asm/CryptonightR_soft_aes_template.inc
@@ -0,0 +1,279 @@
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
+
+ALIGN(64)
+FN_PREFIX(CryptonightR_soft_aes_template_part1):
+	mov	QWORD PTR [rsp+8], rcx
+	push	rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 232
+
+	mov	eax, [rcx+96]
+	mov	ebx, [rcx+100]
+	mov	esi, [rcx+104]
+	mov	edx, [rcx+108]
+	mov [rsp+144], eax
+	mov [rsp+148], ebx
+	mov [rsp+152], esi
+	mov [rsp+156], edx
+
+	mov	rax, QWORD PTR [rcx+48]
+	mov	r10, rcx
+	xor	rax, QWORD PTR [rcx+16]
+	mov	r8, QWORD PTR [rcx+32]
+	xor	r8, QWORD PTR [rcx]
+	mov	r9, QWORD PTR [rcx+40]
+	xor	r9, QWORD PTR [rcx+8]
+	movq	xmm4, rax
+	mov	rdx, QWORD PTR [rcx+56]
+	xor	rdx, QWORD PTR [rcx+24]
+	mov	r11, QWORD PTR [rcx+224]
+	mov	rcx, QWORD PTR [rcx+88]
+	xor	rcx, QWORD PTR [r10+72]
+	mov	rax, QWORD PTR [r10+80]
+	movq	xmm0, rdx
+	xor	rax, QWORD PTR [r10+64]
+
+	movaps	XMMWORD PTR [rsp+16], xmm6
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+48], xmm8
+	movaps	XMMWORD PTR [rsp+64], xmm9
+	movaps	XMMWORD PTR [rsp+80], xmm10
+	movaps	XMMWORD PTR [rsp+96], xmm11
+	movaps	XMMWORD PTR [rsp+112], xmm12
+	movaps	XMMWORD PTR [rsp+128], xmm13
+
+	movq	xmm5, rax
+
+	mov	rax, r8
+	punpcklqdq xmm4, xmm0
+	and	eax, 2097136
+	movq	xmm10, QWORD PTR [r10+96]
+	movq	xmm0, rcx
+	mov	rcx, QWORD PTR [r10+104]
+	xorps	xmm9, xmm9
+	mov	QWORD PTR [rsp+328], rax
+	movq	xmm12, r11
+	mov	QWORD PTR [rsp+320], r9
+	punpcklqdq xmm5, xmm0
+	movq xmm13, rcx
+	mov r12d, 524288
+
+	ALIGN(64)
+FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
+	movd xmm11, r12d
+	mov	r12, QWORD PTR [r10+272]
+	lea	r13, QWORD PTR [rax+r11]
+	mov	esi, DWORD PTR [r13]
+	movq	xmm0, r9
+	mov	r10d, DWORD PTR [r13+4]
+	movq	xmm7, r8
+	mov	ebp, DWORD PTR [r13+12]
+	mov	r14d, DWORD PTR [r13+8]
+	mov	rdx, QWORD PTR [rsp+328]
+	movzx	ecx, sil
+	shr	esi, 8
+	punpcklqdq xmm7, xmm0
+	mov	r15d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	mov	edi, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	ebx, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	shr	ebp, 8
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	xor	r15d, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	eax, r14d
+	shr	eax, 8
+	xor	edi, DWORD PTR [r12+rcx*4+1024]
+	add	eax, 256
+	movzx	ecx, bpl
+	shr	ebp, 8
+	xor	ebx, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, sil
+	shr	esi, 8
+	xor	r9d, DWORD PTR [r12+rcx*4+1024]
+	add	r12, 2048
+	movzx	ecx, r10b
+	shr	r10d, 8
+	add	r10d, 256
+	mov	r11d, DWORD PTR [r12+rax*4]
+	xor	r11d, DWORD PTR [r12+rcx*4]
+	xor	r11d, r9d
+	movzx	ecx, sil
+	mov	r10d, DWORD PTR [r12+r10*4]
+	shr	esi, 8
+	add	esi, 256
+	xor	r10d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	xor	r10d, ebx
+	shr	ebp, 8
+	movd	xmm1, r11d
+	add	ebp, 256
+	movq	r11, xmm12
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	xor	r9d, DWORD PTR [r12+rsi*4]
+	mov	eax, DWORD PTR [r12+rbp*4]
+	xor	r9d, edi
+	movzx	ecx, r14b
+	movd	xmm0, r10d
+	movd	xmm2, r9d
+	xor	eax, DWORD PTR [r12+rcx*4]
+	mov	rcx, rdx
+	xor	eax, r15d
+	punpckldq xmm2, xmm1
+	xor	rcx, 16
+	movd	xmm6, eax
+	mov	rax, rdx
+	punpckldq xmm6, xmm0
+	xor	rax, 32
+	punpckldq xmm6, xmm2
+	xor	rdx, 48
+	movdqu	xmm2, XMMWORD PTR [rcx+r11]
+	pxor xmm6, xmm2
+	pxor	xmm6, xmm7
+	paddq	xmm2, xmm4
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	movdqu	xmm0, XMMWORD PTR [rdx+r11]
+	pxor xmm6, xmm1
+	pxor xmm6, xmm0
+	paddq	xmm0, xmm5
+	movdqu	XMMWORD PTR [rcx+r11], xmm0
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movq rcx, xmm13
+	paddq	xmm1, xmm7
+	movdqu	XMMWORD PTR [rdx+r11], xmm1
+	movq	rdi, xmm6
+	mov	r10, rdi
+	and	r10d, 2097136
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm4
+	movdqu	XMMWORD PTR [r13], xmm0
+
+	mov ebx, [rsp+144]
+	mov ebp, [rsp+152]
+	add ebx, [rsp+148]
+	add ebp, [rsp+156]
+	shl rbp, 32
+	or rbx, rbp
+
+	xor rbx, QWORD PTR [r10+r11]
+	lea	r14, QWORD PTR [r10+r11]
+	mov	rbp, QWORD PTR [r14+8]
+
+	mov [rsp+160], rbx
+	mov [rsp+168], rdi
+	mov [rsp+176], rbp
+	mov [rsp+184], r10
+	mov r10, rsp
+
+	mov ebx, [rsp+144]
+	mov esi, [rsp+148]
+	mov edi, [rsp+152]
+	mov ebp, [rsp+156]
+
+	movd esp, xmm7
+	movaps xmm0, xmm7
+	psrldq xmm0, 8
+	movd r15d, xmm0
+	movd eax, xmm4
+	movd edx, xmm5
+	movaps xmm0, xmm5
+	psrldq xmm0, 8
+	movd r9d, xmm0
+
+FN_PREFIX(CryptonightR_soft_aes_template_part2):
+	mov rsp, r10
+	mov [rsp+144], ebx
+	mov [rsp+148], esi
+	mov [rsp+152], edi
+	mov [rsp+156], ebp
+
+	mov edi, edi
+	shl rbp, 32
+	or rbp, rdi
+	xor r8, rbp
+
+	mov ebx, ebx
+	shl rsi, 32
+	or rsi, rbx
+	xor QWORD PTR [rsp+320], rsi
+
+	mov rbx, [rsp+160]
+	mov rdi, [rsp+168]
+	mov rbp, [rsp+176]
+	mov r10, [rsp+184]
+
+	mov	r9, r10
+	xor	r9, 16
+	mov	rcx, r10
+	xor	rcx, 32
+	xor	r10, 48
+	mov	rax, rbx
+	mul	rdi
+	movdqu	xmm2, XMMWORD PTR [r9+r11]
+	movdqu	xmm1, XMMWORD PTR [rcx+r11]
+	pxor xmm6, xmm2
+	pxor xmm6, xmm1
+	paddq	xmm1, xmm7
+	add	r8, rdx
+	movdqu	xmm0, XMMWORD PTR [r10+r11]
+	pxor xmm6, xmm0
+	paddq	xmm0, xmm5
+	paddq	xmm2, xmm4
+	movdqu	XMMWORD PTR [r9+r11], xmm0
+	movdqa	xmm5, xmm4
+	mov	r9, QWORD PTR [rsp+320]
+	movdqa	xmm4, xmm6
+	add	r9, rax
+	movdqu	XMMWORD PTR [rcx+r11], xmm2
+	movdqu	XMMWORD PTR [r10+r11], xmm1
+	mov	r10, QWORD PTR [rsp+304]
+	movd r12d, xmm11
+	mov	QWORD PTR [r14], r8
+	xor	r8, rbx
+	mov	rax, r8
+	mov	QWORD PTR [r14+8], r9
+	and	eax, 2097136
+	xor	r9, rbp
+	mov	QWORD PTR [rsp+320], r9
+	mov	QWORD PTR [rsp+328], rax
+	sub	r12d, 1
+	jne	FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
+
+FN_PREFIX(CryptonightR_soft_aes_template_part3):
+	movaps	xmm6, XMMWORD PTR [rsp+16]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+48]
+	movaps	xmm9, XMMWORD PTR [rsp+64]
+	movaps	xmm10, XMMWORD PTR [rsp+80]
+	movaps	xmm11, XMMWORD PTR [rsp+96]
+	movaps	xmm12, XMMWORD PTR [rsp+112]
+	movaps	xmm13, XMMWORD PTR [rsp+128]
+
+	add	rsp, 232
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	pop	rbx
+	ret
+FN_PREFIX(CryptonightR_soft_aes_template_end):
diff --git a/src/crypto/asm/CryptonightR_template.S b/src/crypto/asm/CryptonightR_template.S
new file mode 100644
index 00000000..d2974d16
--- /dev/null
+++ b/src/crypto/asm/CryptonightR_template.S
@@ -0,0 +1,1595 @@
+#ifdef __APPLE__
+#   define ALIGN(x) .align 6
+#else
+#   define ALIGN(x) .align 64
+#endif
+.intel_syntax noprefix
+#ifdef __APPLE__
+#   define FN_PREFIX(fn) _ ## fn
+.text
+#else
+#   define FN_PREFIX(fn) fn
+.section .text
+#endif
+
+#define PUBLIC .global
+
+PUBLIC FN_PREFIX(CryptonightR_instruction0)
+PUBLIC FN_PREFIX(CryptonightR_instruction1)
+PUBLIC FN_PREFIX(CryptonightR_instruction2)
+PUBLIC FN_PREFIX(CryptonightR_instruction3)
+PUBLIC FN_PREFIX(CryptonightR_instruction4)
+PUBLIC FN_PREFIX(CryptonightR_instruction5)
+PUBLIC FN_PREFIX(CryptonightR_instruction6)
+PUBLIC FN_PREFIX(CryptonightR_instruction7)
+PUBLIC FN_PREFIX(CryptonightR_instruction8)
+PUBLIC FN_PREFIX(CryptonightR_instruction9)
+PUBLIC FN_PREFIX(CryptonightR_instruction10)
+PUBLIC FN_PREFIX(CryptonightR_instruction11)
+PUBLIC FN_PREFIX(CryptonightR_instruction12)
+PUBLIC FN_PREFIX(CryptonightR_instruction13)
+PUBLIC FN_PREFIX(CryptonightR_instruction14)
+PUBLIC FN_PREFIX(CryptonightR_instruction15)
+PUBLIC FN_PREFIX(CryptonightR_instruction16)
+PUBLIC FN_PREFIX(CryptonightR_instruction17)
+PUBLIC FN_PREFIX(CryptonightR_instruction18)
+PUBLIC FN_PREFIX(CryptonightR_instruction19)
+PUBLIC FN_PREFIX(CryptonightR_instruction20)
+PUBLIC FN_PREFIX(CryptonightR_instruction21)
+PUBLIC FN_PREFIX(CryptonightR_instruction22)
+PUBLIC FN_PREFIX(CryptonightR_instruction23)
+PUBLIC FN_PREFIX(CryptonightR_instruction24)
+PUBLIC FN_PREFIX(CryptonightR_instruction25)
+PUBLIC FN_PREFIX(CryptonightR_instruction26)
+PUBLIC FN_PREFIX(CryptonightR_instruction27)
+PUBLIC FN_PREFIX(CryptonightR_instruction28)
+PUBLIC FN_PREFIX(CryptonightR_instruction29)
+PUBLIC FN_PREFIX(CryptonightR_instruction30)
+PUBLIC FN_PREFIX(CryptonightR_instruction31)
+PUBLIC FN_PREFIX(CryptonightR_instruction32)
+PUBLIC FN_PREFIX(CryptonightR_instruction33)
+PUBLIC FN_PREFIX(CryptonightR_instruction34)
+PUBLIC FN_PREFIX(CryptonightR_instruction35)
+PUBLIC FN_PREFIX(CryptonightR_instruction36)
+PUBLIC FN_PREFIX(CryptonightR_instruction37)
+PUBLIC FN_PREFIX(CryptonightR_instruction38)
+PUBLIC FN_PREFIX(CryptonightR_instruction39)
+PUBLIC FN_PREFIX(CryptonightR_instruction40)
+PUBLIC FN_PREFIX(CryptonightR_instruction41)
+PUBLIC FN_PREFIX(CryptonightR_instruction42)
+PUBLIC FN_PREFIX(CryptonightR_instruction43)
+PUBLIC FN_PREFIX(CryptonightR_instruction44)
+PUBLIC FN_PREFIX(CryptonightR_instruction45)
+PUBLIC FN_PREFIX(CryptonightR_instruction46)
+PUBLIC FN_PREFIX(CryptonightR_instruction47)
+PUBLIC FN_PREFIX(CryptonightR_instruction48)
+PUBLIC FN_PREFIX(CryptonightR_instruction49)
+PUBLIC FN_PREFIX(CryptonightR_instruction50)
+PUBLIC FN_PREFIX(CryptonightR_instruction51)
+PUBLIC FN_PREFIX(CryptonightR_instruction52)
+PUBLIC FN_PREFIX(CryptonightR_instruction53)
+PUBLIC FN_PREFIX(CryptonightR_instruction54)
+PUBLIC FN_PREFIX(CryptonightR_instruction55)
+PUBLIC FN_PREFIX(CryptonightR_instruction56)
+PUBLIC FN_PREFIX(CryptonightR_instruction57)
+PUBLIC FN_PREFIX(CryptonightR_instruction58)
+PUBLIC FN_PREFIX(CryptonightR_instruction59)
+PUBLIC FN_PREFIX(CryptonightR_instruction60)
+PUBLIC FN_PREFIX(CryptonightR_instruction61)
+PUBLIC FN_PREFIX(CryptonightR_instruction62)
+PUBLIC FN_PREFIX(CryptonightR_instruction63)
+PUBLIC FN_PREFIX(CryptonightR_instruction64)
+PUBLIC FN_PREFIX(CryptonightR_instruction65)
+PUBLIC FN_PREFIX(CryptonightR_instruction66)
+PUBLIC FN_PREFIX(CryptonightR_instruction67)
+PUBLIC FN_PREFIX(CryptonightR_instruction68)
+PUBLIC FN_PREFIX(CryptonightR_instruction69)
+PUBLIC FN_PREFIX(CryptonightR_instruction70)
+PUBLIC FN_PREFIX(CryptonightR_instruction71)
+PUBLIC FN_PREFIX(CryptonightR_instruction72)
+PUBLIC FN_PREFIX(CryptonightR_instruction73)
+PUBLIC FN_PREFIX(CryptonightR_instruction74)
+PUBLIC FN_PREFIX(CryptonightR_instruction75)
+PUBLIC FN_PREFIX(CryptonightR_instruction76)
+PUBLIC FN_PREFIX(CryptonightR_instruction77)
+PUBLIC FN_PREFIX(CryptonightR_instruction78)
+PUBLIC FN_PREFIX(CryptonightR_instruction79)
+PUBLIC FN_PREFIX(CryptonightR_instruction80)
+PUBLIC FN_PREFIX(CryptonightR_instruction81)
+PUBLIC FN_PREFIX(CryptonightR_instruction82)
+PUBLIC FN_PREFIX(CryptonightR_instruction83)
+PUBLIC FN_PREFIX(CryptonightR_instruction84)
+PUBLIC FN_PREFIX(CryptonightR_instruction85)
+PUBLIC FN_PREFIX(CryptonightR_instruction86)
+PUBLIC FN_PREFIX(CryptonightR_instruction87)
+PUBLIC FN_PREFIX(CryptonightR_instruction88)
+PUBLIC FN_PREFIX(CryptonightR_instruction89)
+PUBLIC FN_PREFIX(CryptonightR_instruction90)
+PUBLIC FN_PREFIX(CryptonightR_instruction91)
+PUBLIC FN_PREFIX(CryptonightR_instruction92)
+PUBLIC FN_PREFIX(CryptonightR_instruction93)
+PUBLIC FN_PREFIX(CryptonightR_instruction94)
+PUBLIC FN_PREFIX(CryptonightR_instruction95)
+PUBLIC FN_PREFIX(CryptonightR_instruction96)
+PUBLIC FN_PREFIX(CryptonightR_instruction97)
+PUBLIC FN_PREFIX(CryptonightR_instruction98)
+PUBLIC FN_PREFIX(CryptonightR_instruction99)
+PUBLIC FN_PREFIX(CryptonightR_instruction100)
+PUBLIC FN_PREFIX(CryptonightR_instruction101)
+PUBLIC FN_PREFIX(CryptonightR_instruction102)
+PUBLIC FN_PREFIX(CryptonightR_instruction103)
+PUBLIC FN_PREFIX(CryptonightR_instruction104)
+PUBLIC FN_PREFIX(CryptonightR_instruction105)
+PUBLIC FN_PREFIX(CryptonightR_instruction106)
+PUBLIC FN_PREFIX(CryptonightR_instruction107)
+PUBLIC FN_PREFIX(CryptonightR_instruction108)
+PUBLIC FN_PREFIX(CryptonightR_instruction109)
+PUBLIC FN_PREFIX(CryptonightR_instruction110)
+PUBLIC FN_PREFIX(CryptonightR_instruction111)
+PUBLIC FN_PREFIX(CryptonightR_instruction112)
+PUBLIC FN_PREFIX(CryptonightR_instruction113)
+PUBLIC FN_PREFIX(CryptonightR_instruction114)
+PUBLIC FN_PREFIX(CryptonightR_instruction115)
+PUBLIC FN_PREFIX(CryptonightR_instruction116)
+PUBLIC FN_PREFIX(CryptonightR_instruction117)
+PUBLIC FN_PREFIX(CryptonightR_instruction118)
+PUBLIC FN_PREFIX(CryptonightR_instruction119)
+PUBLIC FN_PREFIX(CryptonightR_instruction120)
+PUBLIC FN_PREFIX(CryptonightR_instruction121)
+PUBLIC FN_PREFIX(CryptonightR_instruction122)
+PUBLIC FN_PREFIX(CryptonightR_instruction123)
+PUBLIC FN_PREFIX(CryptonightR_instruction124)
+PUBLIC FN_PREFIX(CryptonightR_instruction125)
+PUBLIC FN_PREFIX(CryptonightR_instruction126)
+PUBLIC FN_PREFIX(CryptonightR_instruction127)
+PUBLIC FN_PREFIX(CryptonightR_instruction128)
+PUBLIC FN_PREFIX(CryptonightR_instruction129)
+PUBLIC FN_PREFIX(CryptonightR_instruction130)
+PUBLIC FN_PREFIX(CryptonightR_instruction131)
+PUBLIC FN_PREFIX(CryptonightR_instruction132)
+PUBLIC FN_PREFIX(CryptonightR_instruction133)
+PUBLIC FN_PREFIX(CryptonightR_instruction134)
+PUBLIC FN_PREFIX(CryptonightR_instruction135)
+PUBLIC FN_PREFIX(CryptonightR_instruction136)
+PUBLIC FN_PREFIX(CryptonightR_instruction137)
+PUBLIC FN_PREFIX(CryptonightR_instruction138)
+PUBLIC FN_PREFIX(CryptonightR_instruction139)
+PUBLIC FN_PREFIX(CryptonightR_instruction140)
+PUBLIC FN_PREFIX(CryptonightR_instruction141)
+PUBLIC FN_PREFIX(CryptonightR_instruction142)
+PUBLIC FN_PREFIX(CryptonightR_instruction143)
+PUBLIC FN_PREFIX(CryptonightR_instruction144)
+PUBLIC FN_PREFIX(CryptonightR_instruction145)
+PUBLIC FN_PREFIX(CryptonightR_instruction146)
+PUBLIC FN_PREFIX(CryptonightR_instruction147)
+PUBLIC FN_PREFIX(CryptonightR_instruction148)
+PUBLIC FN_PREFIX(CryptonightR_instruction149)
+PUBLIC FN_PREFIX(CryptonightR_instruction150)
+PUBLIC FN_PREFIX(CryptonightR_instruction151)
+PUBLIC FN_PREFIX(CryptonightR_instruction152)
+PUBLIC FN_PREFIX(CryptonightR_instruction153)
+PUBLIC FN_PREFIX(CryptonightR_instruction154)
+PUBLIC FN_PREFIX(CryptonightR_instruction155)
+PUBLIC FN_PREFIX(CryptonightR_instruction156)
+PUBLIC FN_PREFIX(CryptonightR_instruction157)
+PUBLIC FN_PREFIX(CryptonightR_instruction158)
+PUBLIC FN_PREFIX(CryptonightR_instruction159)
+PUBLIC FN_PREFIX(CryptonightR_instruction160)
+PUBLIC FN_PREFIX(CryptonightR_instruction161)
+PUBLIC FN_PREFIX(CryptonightR_instruction162)
+PUBLIC FN_PREFIX(CryptonightR_instruction163)
+PUBLIC FN_PREFIX(CryptonightR_instruction164)
+PUBLIC FN_PREFIX(CryptonightR_instruction165)
+PUBLIC FN_PREFIX(CryptonightR_instruction166)
+PUBLIC FN_PREFIX(CryptonightR_instruction167)
+PUBLIC FN_PREFIX(CryptonightR_instruction168)
+PUBLIC FN_PREFIX(CryptonightR_instruction169)
+PUBLIC FN_PREFIX(CryptonightR_instruction170)
+PUBLIC FN_PREFIX(CryptonightR_instruction171)
+PUBLIC FN_PREFIX(CryptonightR_instruction172)
+PUBLIC FN_PREFIX(CryptonightR_instruction173)
+PUBLIC FN_PREFIX(CryptonightR_instruction174)
+PUBLIC FN_PREFIX(CryptonightR_instruction175)
+PUBLIC FN_PREFIX(CryptonightR_instruction176)
+PUBLIC FN_PREFIX(CryptonightR_instruction177)
+PUBLIC FN_PREFIX(CryptonightR_instruction178)
+PUBLIC FN_PREFIX(CryptonightR_instruction179)
+PUBLIC FN_PREFIX(CryptonightR_instruction180)
+PUBLIC FN_PREFIX(CryptonightR_instruction181)
+PUBLIC FN_PREFIX(CryptonightR_instruction182)
+PUBLIC FN_PREFIX(CryptonightR_instruction183)
+PUBLIC FN_PREFIX(CryptonightR_instruction184)
+PUBLIC FN_PREFIX(CryptonightR_instruction185)
+PUBLIC FN_PREFIX(CryptonightR_instruction186)
+PUBLIC FN_PREFIX(CryptonightR_instruction187)
+PUBLIC FN_PREFIX(CryptonightR_instruction188)
+PUBLIC FN_PREFIX(CryptonightR_instruction189)
+PUBLIC FN_PREFIX(CryptonightR_instruction190)
+PUBLIC FN_PREFIX(CryptonightR_instruction191)
+PUBLIC FN_PREFIX(CryptonightR_instruction192)
+PUBLIC FN_PREFIX(CryptonightR_instruction193)
+PUBLIC FN_PREFIX(CryptonightR_instruction194)
+PUBLIC FN_PREFIX(CryptonightR_instruction195)
+PUBLIC FN_PREFIX(CryptonightR_instruction196)
+PUBLIC FN_PREFIX(CryptonightR_instruction197)
+PUBLIC FN_PREFIX(CryptonightR_instruction198)
+PUBLIC FN_PREFIX(CryptonightR_instruction199)
+PUBLIC FN_PREFIX(CryptonightR_instruction200)
+PUBLIC FN_PREFIX(CryptonightR_instruction201)
+PUBLIC FN_PREFIX(CryptonightR_instruction202)
+PUBLIC FN_PREFIX(CryptonightR_instruction203)
+PUBLIC FN_PREFIX(CryptonightR_instruction204)
+PUBLIC FN_PREFIX(CryptonightR_instruction205)
+PUBLIC FN_PREFIX(CryptonightR_instruction206)
+PUBLIC FN_PREFIX(CryptonightR_instruction207)
+PUBLIC FN_PREFIX(CryptonightR_instruction208)
+PUBLIC FN_PREFIX(CryptonightR_instruction209)
+PUBLIC FN_PREFIX(CryptonightR_instruction210)
+PUBLIC FN_PREFIX(CryptonightR_instruction211)
+PUBLIC FN_PREFIX(CryptonightR_instruction212)
+PUBLIC FN_PREFIX(CryptonightR_instruction213)
+PUBLIC FN_PREFIX(CryptonightR_instruction214)
+PUBLIC FN_PREFIX(CryptonightR_instruction215)
+PUBLIC FN_PREFIX(CryptonightR_instruction216)
+PUBLIC FN_PREFIX(CryptonightR_instruction217)
+PUBLIC FN_PREFIX(CryptonightR_instruction218)
+PUBLIC FN_PREFIX(CryptonightR_instruction219)
+PUBLIC FN_PREFIX(CryptonightR_instruction220)
+PUBLIC FN_PREFIX(CryptonightR_instruction221)
+PUBLIC FN_PREFIX(CryptonightR_instruction222)
+PUBLIC FN_PREFIX(CryptonightR_instruction223)
+PUBLIC FN_PREFIX(CryptonightR_instruction224)
+PUBLIC FN_PREFIX(CryptonightR_instruction225)
+PUBLIC FN_PREFIX(CryptonightR_instruction226)
+PUBLIC FN_PREFIX(CryptonightR_instruction227)
+PUBLIC FN_PREFIX(CryptonightR_instruction228)
+PUBLIC FN_PREFIX(CryptonightR_instruction229)
+PUBLIC FN_PREFIX(CryptonightR_instruction230)
+PUBLIC FN_PREFIX(CryptonightR_instruction231)
+PUBLIC FN_PREFIX(CryptonightR_instruction232)
+PUBLIC FN_PREFIX(CryptonightR_instruction233)
+PUBLIC FN_PREFIX(CryptonightR_instruction234)
+PUBLIC FN_PREFIX(CryptonightR_instruction235)
+PUBLIC FN_PREFIX(CryptonightR_instruction236)
+PUBLIC FN_PREFIX(CryptonightR_instruction237)
+PUBLIC FN_PREFIX(CryptonightR_instruction238)
+PUBLIC FN_PREFIX(CryptonightR_instruction239)
+PUBLIC FN_PREFIX(CryptonightR_instruction240)
+PUBLIC FN_PREFIX(CryptonightR_instruction241)
+PUBLIC FN_PREFIX(CryptonightR_instruction242)
+PUBLIC FN_PREFIX(CryptonightR_instruction243)
+PUBLIC FN_PREFIX(CryptonightR_instruction244)
+PUBLIC FN_PREFIX(CryptonightR_instruction245)
+PUBLIC FN_PREFIX(CryptonightR_instruction246)
+PUBLIC FN_PREFIX(CryptonightR_instruction247)
+PUBLIC FN_PREFIX(CryptonightR_instruction248)
+PUBLIC FN_PREFIX(CryptonightR_instruction249)
+PUBLIC FN_PREFIX(CryptonightR_instruction250)
+PUBLIC FN_PREFIX(CryptonightR_instruction251)
+PUBLIC FN_PREFIX(CryptonightR_instruction252)
+PUBLIC FN_PREFIX(CryptonightR_instruction253)
+PUBLIC FN_PREFIX(CryptonightR_instruction254)
+PUBLIC FN_PREFIX(CryptonightR_instruction255)
+PUBLIC FN_PREFIX(CryptonightR_instruction256)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov0)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov1)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov2)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov3)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov4)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov5)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov6)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov7)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov8)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov9)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov10)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov11)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov12)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov13)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov14)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov15)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov16)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov17)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov18)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov19)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov20)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov21)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov22)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov23)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov24)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov25)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov26)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov27)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov28)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov29)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov30)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov31)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov32)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov33)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov34)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov35)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov36)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov37)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov38)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov39)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov40)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov41)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov42)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov43)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov44)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov45)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov46)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov47)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov48)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov49)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov50)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov51)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov52)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov53)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov54)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov55)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov56)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov57)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov58)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov59)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov60)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov61)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov62)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov63)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov64)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov65)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov66)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov67)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov68)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov69)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov70)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov71)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov72)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov73)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov74)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov75)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov76)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov77)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov78)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov79)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov80)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov81)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov82)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov83)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov84)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov85)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov86)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov87)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov88)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov89)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov90)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov91)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov92)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov93)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov94)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov95)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov96)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov97)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov98)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov99)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov100)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov101)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov102)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov103)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov104)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov105)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov106)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov107)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov108)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov109)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov110)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov111)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov112)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov113)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov114)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov115)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov116)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov117)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov118)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov119)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov120)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov121)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov122)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov123)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov124)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov125)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov126)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov127)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov128)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov129)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov130)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov131)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov132)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov133)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov134)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov135)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov136)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov137)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov138)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov139)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov140)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov141)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov142)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov143)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov144)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov145)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov146)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov147)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov148)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov149)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov150)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov151)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov152)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov153)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov154)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov155)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov156)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov157)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov158)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov159)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov160)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov161)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov162)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov163)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov164)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov165)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov166)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov167)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov168)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov169)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov170)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov171)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov172)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov173)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov174)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov175)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov176)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov177)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov178)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov179)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov180)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov181)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov182)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov183)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov184)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov185)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov186)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov187)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov188)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov189)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov190)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov191)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov192)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov193)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov194)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov195)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov196)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov197)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov198)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov199)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov200)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov201)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov202)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov203)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov204)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov205)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov206)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov207)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov208)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov209)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov210)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov211)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov212)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov213)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov214)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov215)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov216)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov217)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov218)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov219)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov220)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov221)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov222)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov223)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov224)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov225)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov226)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov227)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov228)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov229)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov230)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov231)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov232)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov233)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov234)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov235)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov236)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov237)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov238)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov239)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov240)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov241)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov242)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov243)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov244)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov245)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov246)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov247)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov248)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov249)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov250)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov251)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov252)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov253)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov254)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov255)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov256)
+
+#include "CryptonightWOW_template.inc"
+#include "CryptonightR_template.inc"
+#include "CryptonightWOW_soft_aes_template.inc"
+#include "CryptonightR_soft_aes_template.inc"
+
+FN_PREFIX(CryptonightR_instruction0):
+	imul	rbx, rbx
+FN_PREFIX(CryptonightR_instruction1):
+	imul	rbx, rbx
+FN_PREFIX(CryptonightR_instruction2):
+	imul	rbx, rbx
+FN_PREFIX(CryptonightR_instruction3):
+	add	rbx, r9
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction4):
+	sub	rbx, r9
+FN_PREFIX(CryptonightR_instruction5):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction6):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction7):
+	xor	rbx, r9
+FN_PREFIX(CryptonightR_instruction8):
+	imul	rsi, rbx
+FN_PREFIX(CryptonightR_instruction9):
+	imul	rsi, rbx
+FN_PREFIX(CryptonightR_instruction10):
+	imul	rsi, rbx
+FN_PREFIX(CryptonightR_instruction11):
+	add	rsi, rbx
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction12):
+	sub	rsi, rbx
+FN_PREFIX(CryptonightR_instruction13):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction14):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction15):
+	xor	rsi, rbx
+FN_PREFIX(CryptonightR_instruction16):
+	imul	rdi, rbx
+FN_PREFIX(CryptonightR_instruction17):
+	imul	rdi, rbx
+FN_PREFIX(CryptonightR_instruction18):
+	imul	rdi, rbx
+FN_PREFIX(CryptonightR_instruction19):
+	add	rdi, rbx
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction20):
+	sub	rdi, rbx
+FN_PREFIX(CryptonightR_instruction21):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction22):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction23):
+	xor	rdi, rbx
+FN_PREFIX(CryptonightR_instruction24):
+	imul	rbp, rbx
+FN_PREFIX(CryptonightR_instruction25):
+	imul	rbp, rbx
+FN_PREFIX(CryptonightR_instruction26):
+	imul	rbp, rbx
+FN_PREFIX(CryptonightR_instruction27):
+	add	rbp, rbx
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction28):
+	sub	rbp, rbx
+FN_PREFIX(CryptonightR_instruction29):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction30):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction31):
+	xor	rbp, rbx
+FN_PREFIX(CryptonightR_instruction32):
+	imul	rbx, rsi
+FN_PREFIX(CryptonightR_instruction33):
+	imul	rbx, rsi
+FN_PREFIX(CryptonightR_instruction34):
+	imul	rbx, rsi
+FN_PREFIX(CryptonightR_instruction35):
+	add	rbx, rsi
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction36):
+	sub	rbx, rsi
+FN_PREFIX(CryptonightR_instruction37):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction38):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction39):
+	xor	rbx, rsi
+FN_PREFIX(CryptonightR_instruction40):
+	imul	rsi, rsi
+FN_PREFIX(CryptonightR_instruction41):
+	imul	rsi, rsi
+FN_PREFIX(CryptonightR_instruction42):
+	imul	rsi, rsi
+FN_PREFIX(CryptonightR_instruction43):
+	add	rsi, r9
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction44):
+	sub	rsi, r9
+FN_PREFIX(CryptonightR_instruction45):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction46):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction47):
+	xor	rsi, r9
+FN_PREFIX(CryptonightR_instruction48):
+	imul	rdi, rsi
+FN_PREFIX(CryptonightR_instruction49):
+	imul	rdi, rsi
+FN_PREFIX(CryptonightR_instruction50):
+	imul	rdi, rsi
+FN_PREFIX(CryptonightR_instruction51):
+	add	rdi, rsi
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction52):
+	sub	rdi, rsi
+FN_PREFIX(CryptonightR_instruction53):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction54):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction55):
+	xor	rdi, rsi
+FN_PREFIX(CryptonightR_instruction56):
+	imul	rbp, rsi
+FN_PREFIX(CryptonightR_instruction57):
+	imul	rbp, rsi
+FN_PREFIX(CryptonightR_instruction58):
+	imul	rbp, rsi
+FN_PREFIX(CryptonightR_instruction59):
+	add	rbp, rsi
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction60):
+	sub	rbp, rsi
+FN_PREFIX(CryptonightR_instruction61):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction62):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction63):
+	xor	rbp, rsi
+FN_PREFIX(CryptonightR_instruction64):
+	imul	rbx, rdi
+FN_PREFIX(CryptonightR_instruction65):
+	imul	rbx, rdi
+FN_PREFIX(CryptonightR_instruction66):
+	imul	rbx, rdi
+FN_PREFIX(CryptonightR_instruction67):
+	add	rbx, rdi
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction68):
+	sub	rbx, rdi
+FN_PREFIX(CryptonightR_instruction69):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction70):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction71):
+	xor	rbx, rdi
+FN_PREFIX(CryptonightR_instruction72):
+	imul	rsi, rdi
+FN_PREFIX(CryptonightR_instruction73):
+	imul	rsi, rdi
+FN_PREFIX(CryptonightR_instruction74):
+	imul	rsi, rdi
+FN_PREFIX(CryptonightR_instruction75):
+	add	rsi, rdi
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction76):
+	sub	rsi, rdi
+FN_PREFIX(CryptonightR_instruction77):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction78):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction79):
+	xor	rsi, rdi
+FN_PREFIX(CryptonightR_instruction80):
+	imul	rdi, rdi
+FN_PREFIX(CryptonightR_instruction81):
+	imul	rdi, rdi
+FN_PREFIX(CryptonightR_instruction82):
+	imul	rdi, rdi
+FN_PREFIX(CryptonightR_instruction83):
+	add	rdi, r9
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction84):
+	sub	rdi, r9
+FN_PREFIX(CryptonightR_instruction85):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction86):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction87):
+	xor	rdi, r9
+FN_PREFIX(CryptonightR_instruction88):
+	imul	rbp, rdi
+FN_PREFIX(CryptonightR_instruction89):
+	imul	rbp, rdi
+FN_PREFIX(CryptonightR_instruction90):
+	imul	rbp, rdi
+FN_PREFIX(CryptonightR_instruction91):
+	add	rbp, rdi
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction92):
+	sub	rbp, rdi
+FN_PREFIX(CryptonightR_instruction93):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction94):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction95):
+	xor	rbp, rdi
+FN_PREFIX(CryptonightR_instruction96):
+	imul	rbx, rbp
+FN_PREFIX(CryptonightR_instruction97):
+	imul	rbx, rbp
+FN_PREFIX(CryptonightR_instruction98):
+	imul	rbx, rbp
+FN_PREFIX(CryptonightR_instruction99):
+	add	rbx, rbp
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction100):
+	sub	rbx, rbp
+FN_PREFIX(CryptonightR_instruction101):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction102):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction103):
+	xor	rbx, rbp
+FN_PREFIX(CryptonightR_instruction104):
+	imul	rsi, rbp
+FN_PREFIX(CryptonightR_instruction105):
+	imul	rsi, rbp
+FN_PREFIX(CryptonightR_instruction106):
+	imul	rsi, rbp
+FN_PREFIX(CryptonightR_instruction107):
+	add	rsi, rbp
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction108):
+	sub	rsi, rbp
+FN_PREFIX(CryptonightR_instruction109):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction110):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction111):
+	xor	rsi, rbp
+FN_PREFIX(CryptonightR_instruction112):
+	imul	rdi, rbp
+FN_PREFIX(CryptonightR_instruction113):
+	imul	rdi, rbp
+FN_PREFIX(CryptonightR_instruction114):
+	imul	rdi, rbp
+FN_PREFIX(CryptonightR_instruction115):
+	add	rdi, rbp
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction116):
+	sub	rdi, rbp
+FN_PREFIX(CryptonightR_instruction117):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction118):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction119):
+	xor	rdi, rbp
+FN_PREFIX(CryptonightR_instruction120):
+	imul	rbp, rbp
+FN_PREFIX(CryptonightR_instruction121):
+	imul	rbp, rbp
+FN_PREFIX(CryptonightR_instruction122):
+	imul	rbp, rbp
+FN_PREFIX(CryptonightR_instruction123):
+	add	rbp, r9
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction124):
+	sub	rbp, r9
+FN_PREFIX(CryptonightR_instruction125):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction126):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction127):
+	xor	rbp, r9
+FN_PREFIX(CryptonightR_instruction128):
+	imul	rbx, rsp
+FN_PREFIX(CryptonightR_instruction129):
+	imul	rbx, rsp
+FN_PREFIX(CryptonightR_instruction130):
+	imul	rbx, rsp
+FN_PREFIX(CryptonightR_instruction131):
+	add	rbx, rsp
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction132):
+	sub	rbx, rsp
+FN_PREFIX(CryptonightR_instruction133):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction134):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction135):
+	xor	rbx, rsp
+FN_PREFIX(CryptonightR_instruction136):
+	imul	rsi, rsp
+FN_PREFIX(CryptonightR_instruction137):
+	imul	rsi, rsp
+FN_PREFIX(CryptonightR_instruction138):
+	imul	rsi, rsp
+FN_PREFIX(CryptonightR_instruction139):
+	add	rsi, rsp
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction140):
+	sub	rsi, rsp
+FN_PREFIX(CryptonightR_instruction141):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction142):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction143):
+	xor	rsi, rsp
+FN_PREFIX(CryptonightR_instruction144):
+	imul	rdi, rsp
+FN_PREFIX(CryptonightR_instruction145):
+	imul	rdi, rsp
+FN_PREFIX(CryptonightR_instruction146):
+	imul	rdi, rsp
+FN_PREFIX(CryptonightR_instruction147):
+	add	rdi, rsp
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction148):
+	sub	rdi, rsp
+FN_PREFIX(CryptonightR_instruction149):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction150):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction151):
+	xor	rdi, rsp
+FN_PREFIX(CryptonightR_instruction152):
+	imul	rbp, rsp
+FN_PREFIX(CryptonightR_instruction153):
+	imul	rbp, rsp
+FN_PREFIX(CryptonightR_instruction154):
+	imul	rbp, rsp
+FN_PREFIX(CryptonightR_instruction155):
+	add	rbp, rsp
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction156):
+	sub	rbp, rsp
+FN_PREFIX(CryptonightR_instruction157):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction158):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction159):
+	xor	rbp, rsp
+FN_PREFIX(CryptonightR_instruction160):
+	imul	rbx, r15
+FN_PREFIX(CryptonightR_instruction161):
+	imul	rbx, r15
+FN_PREFIX(CryptonightR_instruction162):
+	imul	rbx, r15
+FN_PREFIX(CryptonightR_instruction163):
+	add	rbx, r15
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction164):
+	sub	rbx, r15
+FN_PREFIX(CryptonightR_instruction165):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction166):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction167):
+	xor	rbx, r15
+FN_PREFIX(CryptonightR_instruction168):
+	imul	rsi, r15
+FN_PREFIX(CryptonightR_instruction169):
+	imul	rsi, r15
+FN_PREFIX(CryptonightR_instruction170):
+	imul	rsi, r15
+FN_PREFIX(CryptonightR_instruction171):
+	add	rsi, r15
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction172):
+	sub	rsi, r15
+FN_PREFIX(CryptonightR_instruction173):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction174):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction175):
+	xor	rsi, r15
+FN_PREFIX(CryptonightR_instruction176):
+	imul	rdi, r15
+FN_PREFIX(CryptonightR_instruction177):
+	imul	rdi, r15
+FN_PREFIX(CryptonightR_instruction178):
+	imul	rdi, r15
+FN_PREFIX(CryptonightR_instruction179):
+	add	rdi, r15
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction180):
+	sub	rdi, r15
+FN_PREFIX(CryptonightR_instruction181):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction182):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction183):
+	xor	rdi, r15
+FN_PREFIX(CryptonightR_instruction184):
+	imul	rbp, r15
+FN_PREFIX(CryptonightR_instruction185):
+	imul	rbp, r15
+FN_PREFIX(CryptonightR_instruction186):
+	imul	rbp, r15
+FN_PREFIX(CryptonightR_instruction187):
+	add	rbp, r15
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction188):
+	sub	rbp, r15
+FN_PREFIX(CryptonightR_instruction189):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction190):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction191):
+	xor	rbp, r15
+FN_PREFIX(CryptonightR_instruction192):
+	imul	rbx, rax
+FN_PREFIX(CryptonightR_instruction193):
+	imul	rbx, rax
+FN_PREFIX(CryptonightR_instruction194):
+	imul	rbx, rax
+FN_PREFIX(CryptonightR_instruction195):
+	add	rbx, rax
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction196):
+	sub	rbx, rax
+FN_PREFIX(CryptonightR_instruction197):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction198):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction199):
+	xor	rbx, rax
+FN_PREFIX(CryptonightR_instruction200):
+	imul	rsi, rax
+FN_PREFIX(CryptonightR_instruction201):
+	imul	rsi, rax
+FN_PREFIX(CryptonightR_instruction202):
+	imul	rsi, rax
+FN_PREFIX(CryptonightR_instruction203):
+	add	rsi, rax
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction204):
+	sub	rsi, rax
+FN_PREFIX(CryptonightR_instruction205):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction206):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction207):
+	xor	rsi, rax
+FN_PREFIX(CryptonightR_instruction208):
+	imul	rdi, rax
+FN_PREFIX(CryptonightR_instruction209):
+	imul	rdi, rax
+FN_PREFIX(CryptonightR_instruction210):
+	imul	rdi, rax
+FN_PREFIX(CryptonightR_instruction211):
+	add	rdi, rax
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction212):
+	sub	rdi, rax
+FN_PREFIX(CryptonightR_instruction213):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction214):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction215):
+	xor	rdi, rax
+FN_PREFIX(CryptonightR_instruction216):
+	imul	rbp, rax
+FN_PREFIX(CryptonightR_instruction217):
+	imul	rbp, rax
+FN_PREFIX(CryptonightR_instruction218):
+	imul	rbp, rax
+FN_PREFIX(CryptonightR_instruction219):
+	add	rbp, rax
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction220):
+	sub	rbp, rax
+FN_PREFIX(CryptonightR_instruction221):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction222):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction223):
+	xor	rbp, rax
+FN_PREFIX(CryptonightR_instruction224):
+	imul	rbx, rdx
+FN_PREFIX(CryptonightR_instruction225):
+	imul	rbx, rdx
+FN_PREFIX(CryptonightR_instruction226):
+	imul	rbx, rdx
+FN_PREFIX(CryptonightR_instruction227):
+	add	rbx, rdx
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction228):
+	sub	rbx, rdx
+FN_PREFIX(CryptonightR_instruction229):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction230):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction231):
+	xor	rbx, rdx
+FN_PREFIX(CryptonightR_instruction232):
+	imul	rsi, rdx
+FN_PREFIX(CryptonightR_instruction233):
+	imul	rsi, rdx
+FN_PREFIX(CryptonightR_instruction234):
+	imul	rsi, rdx
+FN_PREFIX(CryptonightR_instruction235):
+	add	rsi, rdx
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction236):
+	sub	rsi, rdx
+FN_PREFIX(CryptonightR_instruction237):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction238):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction239):
+	xor	rsi, rdx
+FN_PREFIX(CryptonightR_instruction240):
+	imul	rdi, rdx
+FN_PREFIX(CryptonightR_instruction241):
+	imul	rdi, rdx
+FN_PREFIX(CryptonightR_instruction242):
+	imul	rdi, rdx
+FN_PREFIX(CryptonightR_instruction243):
+	add	rdi, rdx
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction244):
+	sub	rdi, rdx
+FN_PREFIX(CryptonightR_instruction245):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction246):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction247):
+	xor	rdi, rdx
+FN_PREFIX(CryptonightR_instruction248):
+	imul	rbp, rdx
+FN_PREFIX(CryptonightR_instruction249):
+	imul	rbp, rdx
+FN_PREFIX(CryptonightR_instruction250):
+	imul	rbp, rdx
+FN_PREFIX(CryptonightR_instruction251):
+	add	rbp, rdx
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction252):
+	sub	rbp, rdx
+FN_PREFIX(CryptonightR_instruction253):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction254):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction255):
+	xor	rbp, rdx
+FN_PREFIX(CryptonightR_instruction256):
+	imul	rbx, rbx
+FN_PREFIX(CryptonightR_instruction_mov0):
+
+FN_PREFIX(CryptonightR_instruction_mov1):
+
+FN_PREFIX(CryptonightR_instruction_mov2):
+
+FN_PREFIX(CryptonightR_instruction_mov3):
+
+FN_PREFIX(CryptonightR_instruction_mov4):
+
+FN_PREFIX(CryptonightR_instruction_mov5):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov6):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov7):
+
+FN_PREFIX(CryptonightR_instruction_mov8):
+
+FN_PREFIX(CryptonightR_instruction_mov9):
+
+FN_PREFIX(CryptonightR_instruction_mov10):
+
+FN_PREFIX(CryptonightR_instruction_mov11):
+
+FN_PREFIX(CryptonightR_instruction_mov12):
+
+FN_PREFIX(CryptonightR_instruction_mov13):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov14):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov15):
+
+FN_PREFIX(CryptonightR_instruction_mov16):
+
+FN_PREFIX(CryptonightR_instruction_mov17):
+
+FN_PREFIX(CryptonightR_instruction_mov18):
+
+FN_PREFIX(CryptonightR_instruction_mov19):
+
+FN_PREFIX(CryptonightR_instruction_mov20):
+
+FN_PREFIX(CryptonightR_instruction_mov21):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov22):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov23):
+
+FN_PREFIX(CryptonightR_instruction_mov24):
+
+FN_PREFIX(CryptonightR_instruction_mov25):
+
+FN_PREFIX(CryptonightR_instruction_mov26):
+
+FN_PREFIX(CryptonightR_instruction_mov27):
+
+FN_PREFIX(CryptonightR_instruction_mov28):
+
+FN_PREFIX(CryptonightR_instruction_mov29):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov30):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov31):
+
+FN_PREFIX(CryptonightR_instruction_mov32):
+
+FN_PREFIX(CryptonightR_instruction_mov33):
+
+FN_PREFIX(CryptonightR_instruction_mov34):
+
+FN_PREFIX(CryptonightR_instruction_mov35):
+
+FN_PREFIX(CryptonightR_instruction_mov36):
+
+FN_PREFIX(CryptonightR_instruction_mov37):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov38):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov39):
+
+FN_PREFIX(CryptonightR_instruction_mov40):
+
+FN_PREFIX(CryptonightR_instruction_mov41):
+
+FN_PREFIX(CryptonightR_instruction_mov42):
+
+FN_PREFIX(CryptonightR_instruction_mov43):
+
+FN_PREFIX(CryptonightR_instruction_mov44):
+
+FN_PREFIX(CryptonightR_instruction_mov45):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov46):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov47):
+
+FN_PREFIX(CryptonightR_instruction_mov48):
+
+FN_PREFIX(CryptonightR_instruction_mov49):
+
+FN_PREFIX(CryptonightR_instruction_mov50):
+
+FN_PREFIX(CryptonightR_instruction_mov51):
+
+FN_PREFIX(CryptonightR_instruction_mov52):
+
+FN_PREFIX(CryptonightR_instruction_mov53):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov54):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov55):
+
+FN_PREFIX(CryptonightR_instruction_mov56):
+
+FN_PREFIX(CryptonightR_instruction_mov57):
+
+FN_PREFIX(CryptonightR_instruction_mov58):
+
+FN_PREFIX(CryptonightR_instruction_mov59):
+
+FN_PREFIX(CryptonightR_instruction_mov60):
+
+FN_PREFIX(CryptonightR_instruction_mov61):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov62):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov63):
+
+FN_PREFIX(CryptonightR_instruction_mov64):
+
+FN_PREFIX(CryptonightR_instruction_mov65):
+
+FN_PREFIX(CryptonightR_instruction_mov66):
+
+FN_PREFIX(CryptonightR_instruction_mov67):
+
+FN_PREFIX(CryptonightR_instruction_mov68):
+
+FN_PREFIX(CryptonightR_instruction_mov69):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov70):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov71):
+
+FN_PREFIX(CryptonightR_instruction_mov72):
+
+FN_PREFIX(CryptonightR_instruction_mov73):
+
+FN_PREFIX(CryptonightR_instruction_mov74):
+
+FN_PREFIX(CryptonightR_instruction_mov75):
+
+FN_PREFIX(CryptonightR_instruction_mov76):
+
+FN_PREFIX(CryptonightR_instruction_mov77):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov78):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov79):
+
+FN_PREFIX(CryptonightR_instruction_mov80):
+
+FN_PREFIX(CryptonightR_instruction_mov81):
+
+FN_PREFIX(CryptonightR_instruction_mov82):
+
+FN_PREFIX(CryptonightR_instruction_mov83):
+
+FN_PREFIX(CryptonightR_instruction_mov84):
+
+FN_PREFIX(CryptonightR_instruction_mov85):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov86):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov87):
+
+FN_PREFIX(CryptonightR_instruction_mov88):
+
+FN_PREFIX(CryptonightR_instruction_mov89):
+
+FN_PREFIX(CryptonightR_instruction_mov90):
+
+FN_PREFIX(CryptonightR_instruction_mov91):
+
+FN_PREFIX(CryptonightR_instruction_mov92):
+
+FN_PREFIX(CryptonightR_instruction_mov93):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov94):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov95):
+
+FN_PREFIX(CryptonightR_instruction_mov96):
+
+FN_PREFIX(CryptonightR_instruction_mov97):
+
+FN_PREFIX(CryptonightR_instruction_mov98):
+
+FN_PREFIX(CryptonightR_instruction_mov99):
+
+FN_PREFIX(CryptonightR_instruction_mov100):
+
+FN_PREFIX(CryptonightR_instruction_mov101):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov102):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov103):
+
+FN_PREFIX(CryptonightR_instruction_mov104):
+
+FN_PREFIX(CryptonightR_instruction_mov105):
+
+FN_PREFIX(CryptonightR_instruction_mov106):
+
+FN_PREFIX(CryptonightR_instruction_mov107):
+
+FN_PREFIX(CryptonightR_instruction_mov108):
+
+FN_PREFIX(CryptonightR_instruction_mov109):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov110):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov111):
+
+FN_PREFIX(CryptonightR_instruction_mov112):
+
+FN_PREFIX(CryptonightR_instruction_mov113):
+
+FN_PREFIX(CryptonightR_instruction_mov114):
+
+FN_PREFIX(CryptonightR_instruction_mov115):
+
+FN_PREFIX(CryptonightR_instruction_mov116):
+
+FN_PREFIX(CryptonightR_instruction_mov117):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov118):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov119):
+
+FN_PREFIX(CryptonightR_instruction_mov120):
+
+FN_PREFIX(CryptonightR_instruction_mov121):
+
+FN_PREFIX(CryptonightR_instruction_mov122):
+
+FN_PREFIX(CryptonightR_instruction_mov123):
+
+FN_PREFIX(CryptonightR_instruction_mov124):
+
+FN_PREFIX(CryptonightR_instruction_mov125):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov126):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov127):
+
+FN_PREFIX(CryptonightR_instruction_mov128):
+
+FN_PREFIX(CryptonightR_instruction_mov129):
+
+FN_PREFIX(CryptonightR_instruction_mov130):
+
+FN_PREFIX(CryptonightR_instruction_mov131):
+
+FN_PREFIX(CryptonightR_instruction_mov132):
+
+FN_PREFIX(CryptonightR_instruction_mov133):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov134):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov135):
+
+FN_PREFIX(CryptonightR_instruction_mov136):
+
+FN_PREFIX(CryptonightR_instruction_mov137):
+
+FN_PREFIX(CryptonightR_instruction_mov138):
+
+FN_PREFIX(CryptonightR_instruction_mov139):
+
+FN_PREFIX(CryptonightR_instruction_mov140):
+
+FN_PREFIX(CryptonightR_instruction_mov141):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov142):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov143):
+
+FN_PREFIX(CryptonightR_instruction_mov144):
+
+FN_PREFIX(CryptonightR_instruction_mov145):
+
+FN_PREFIX(CryptonightR_instruction_mov146):
+
+FN_PREFIX(CryptonightR_instruction_mov147):
+
+FN_PREFIX(CryptonightR_instruction_mov148):
+
+FN_PREFIX(CryptonightR_instruction_mov149):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov150):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov151):
+
+FN_PREFIX(CryptonightR_instruction_mov152):
+
+FN_PREFIX(CryptonightR_instruction_mov153):
+
+FN_PREFIX(CryptonightR_instruction_mov154):
+
+FN_PREFIX(CryptonightR_instruction_mov155):
+
+FN_PREFIX(CryptonightR_instruction_mov156):
+
+FN_PREFIX(CryptonightR_instruction_mov157):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov158):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov159):
+
+FN_PREFIX(CryptonightR_instruction_mov160):
+
+FN_PREFIX(CryptonightR_instruction_mov161):
+
+FN_PREFIX(CryptonightR_instruction_mov162):
+
+FN_PREFIX(CryptonightR_instruction_mov163):
+
+FN_PREFIX(CryptonightR_instruction_mov164):
+
+FN_PREFIX(CryptonightR_instruction_mov165):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov166):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov167):
+
+FN_PREFIX(CryptonightR_instruction_mov168):
+
+FN_PREFIX(CryptonightR_instruction_mov169):
+
+FN_PREFIX(CryptonightR_instruction_mov170):
+
+FN_PREFIX(CryptonightR_instruction_mov171):
+
+FN_PREFIX(CryptonightR_instruction_mov172):
+
+FN_PREFIX(CryptonightR_instruction_mov173):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov174):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov175):
+
+FN_PREFIX(CryptonightR_instruction_mov176):
+
+FN_PREFIX(CryptonightR_instruction_mov177):
+
+FN_PREFIX(CryptonightR_instruction_mov178):
+
+FN_PREFIX(CryptonightR_instruction_mov179):
+
+FN_PREFIX(CryptonightR_instruction_mov180):
+
+FN_PREFIX(CryptonightR_instruction_mov181):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov182):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov183):
+
+FN_PREFIX(CryptonightR_instruction_mov184):
+
+FN_PREFIX(CryptonightR_instruction_mov185):
+
+FN_PREFIX(CryptonightR_instruction_mov186):
+
+FN_PREFIX(CryptonightR_instruction_mov187):
+
+FN_PREFIX(CryptonightR_instruction_mov188):
+
+FN_PREFIX(CryptonightR_instruction_mov189):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov190):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov191):
+
+FN_PREFIX(CryptonightR_instruction_mov192):
+
+FN_PREFIX(CryptonightR_instruction_mov193):
+
+FN_PREFIX(CryptonightR_instruction_mov194):
+
+FN_PREFIX(CryptonightR_instruction_mov195):
+
+FN_PREFIX(CryptonightR_instruction_mov196):
+
+FN_PREFIX(CryptonightR_instruction_mov197):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov198):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov199):
+
+FN_PREFIX(CryptonightR_instruction_mov200):
+
+FN_PREFIX(CryptonightR_instruction_mov201):
+
+FN_PREFIX(CryptonightR_instruction_mov202):
+
+FN_PREFIX(CryptonightR_instruction_mov203):
+
+FN_PREFIX(CryptonightR_instruction_mov204):
+
+FN_PREFIX(CryptonightR_instruction_mov205):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov206):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov207):
+
+FN_PREFIX(CryptonightR_instruction_mov208):
+
+FN_PREFIX(CryptonightR_instruction_mov209):
+
+FN_PREFIX(CryptonightR_instruction_mov210):
+
+FN_PREFIX(CryptonightR_instruction_mov211):
+
+FN_PREFIX(CryptonightR_instruction_mov212):
+
+FN_PREFIX(CryptonightR_instruction_mov213):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov214):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov215):
+
+FN_PREFIX(CryptonightR_instruction_mov216):
+
+FN_PREFIX(CryptonightR_instruction_mov217):
+
+FN_PREFIX(CryptonightR_instruction_mov218):
+
+FN_PREFIX(CryptonightR_instruction_mov219):
+
+FN_PREFIX(CryptonightR_instruction_mov220):
+
+FN_PREFIX(CryptonightR_instruction_mov221):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov222):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov223):
+
+FN_PREFIX(CryptonightR_instruction_mov224):
+
+FN_PREFIX(CryptonightR_instruction_mov225):
+
+FN_PREFIX(CryptonightR_instruction_mov226):
+
+FN_PREFIX(CryptonightR_instruction_mov227):
+
+FN_PREFIX(CryptonightR_instruction_mov228):
+
+FN_PREFIX(CryptonightR_instruction_mov229):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov230):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov231):
+
+FN_PREFIX(CryptonightR_instruction_mov232):
+
+FN_PREFIX(CryptonightR_instruction_mov233):
+
+FN_PREFIX(CryptonightR_instruction_mov234):
+
+FN_PREFIX(CryptonightR_instruction_mov235):
+
+FN_PREFIX(CryptonightR_instruction_mov236):
+
+FN_PREFIX(CryptonightR_instruction_mov237):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov238):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov239):
+
+FN_PREFIX(CryptonightR_instruction_mov240):
+
+FN_PREFIX(CryptonightR_instruction_mov241):
+
+FN_PREFIX(CryptonightR_instruction_mov242):
+
+FN_PREFIX(CryptonightR_instruction_mov243):
+
+FN_PREFIX(CryptonightR_instruction_mov244):
+
+FN_PREFIX(CryptonightR_instruction_mov245):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov246):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov247):
+
+FN_PREFIX(CryptonightR_instruction_mov248):
+
+FN_PREFIX(CryptonightR_instruction_mov249):
+
+FN_PREFIX(CryptonightR_instruction_mov250):
+
+FN_PREFIX(CryptonightR_instruction_mov251):
+
+FN_PREFIX(CryptonightR_instruction_mov252):
+
+FN_PREFIX(CryptonightR_instruction_mov253):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov254):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov255):
+
+FN_PREFIX(CryptonightR_instruction_mov256):
diff --git a/src/crypto/asm/CryptonightR_template.h b/src/crypto/asm/CryptonightR_template.h
new file mode 100644
index 00000000..d9159a8f
--- /dev/null
+++ b/src/crypto/asm/CryptonightR_template.h
@@ -0,0 +1,1087 @@
+// Auto-generated file, do not edit
+
+extern "C"
+{
+	void CryptonightWOW_template_part1();
+	void CryptonightWOW_template_mainloop();
+	void CryptonightWOW_template_part2();
+	void CryptonightWOW_template_part3();
+	void CryptonightWOW_template_end();
+	void CryptonightWOW_template_double_part1();
+	void CryptonightWOW_template_double_mainloop();
+	void CryptonightWOW_template_double_part2();
+	void CryptonightWOW_template_double_part3();
+	void CryptonightWOW_template_double_part4();
+	void CryptonightWOW_template_double_end();
+
+	void CryptonightR_template_part1();
+	void CryptonightR_template_mainloop();
+	void CryptonightR_template_part2();
+	void CryptonightR_template_part3();
+	void CryptonightR_template_end();
+	void CryptonightR_template_double_part1();
+	void CryptonightR_template_double_mainloop();
+	void CryptonightR_template_double_part2();
+	void CryptonightR_template_double_part3();
+	void CryptonightR_template_double_part4();
+	void CryptonightR_template_double_end();
+
+	void CryptonightWOW_soft_aes_template_part1();
+	void CryptonightWOW_soft_aes_template_mainloop();
+	void CryptonightWOW_soft_aes_template_part2();
+	void CryptonightWOW_soft_aes_template_part3();
+	void CryptonightWOW_soft_aes_template_end();
+	void CryptonightWOW_soft_aes_template_double_part1();
+	void CryptonightWOW_soft_aes_template_double_mainloop();
+	void CryptonightWOW_soft_aes_template_double_part2();
+	void CryptonightWOW_soft_aes_template_double_part3();
+	void CryptonightWOW_soft_aes_template_double_part4();
+	void CryptonightWOW_soft_aes_template_double_end();
+
+	void CryptonightR_soft_aes_template_part1();
+	void CryptonightR_soft_aes_template_mainloop();
+	void CryptonightR_soft_aes_template_part2();
+	void CryptonightR_soft_aes_template_part3();
+	void CryptonightR_soft_aes_template_end();
+	void CryptonightR_soft_aes_template_double_part1();
+	void CryptonightR_soft_aes_template_double_mainloop();
+	void CryptonightR_soft_aes_template_double_part2();
+	void CryptonightR_soft_aes_template_double_part3();
+	void CryptonightR_soft_aes_template_double_part4();
+	void CryptonightR_soft_aes_template_double_end();
+
+	void CryptonightR_instruction0();
+	void CryptonightR_instruction1();
+	void CryptonightR_instruction2();
+	void CryptonightR_instruction3();
+	void CryptonightR_instruction4();
+	void CryptonightR_instruction5();
+	void CryptonightR_instruction6();
+	void CryptonightR_instruction7();
+	void CryptonightR_instruction8();
+	void CryptonightR_instruction9();
+	void CryptonightR_instruction10();
+	void CryptonightR_instruction11();
+	void CryptonightR_instruction12();
+	void CryptonightR_instruction13();
+	void CryptonightR_instruction14();
+	void CryptonightR_instruction15();
+	void CryptonightR_instruction16();
+	void CryptonightR_instruction17();
+	void CryptonightR_instruction18();
+	void CryptonightR_instruction19();
+	void CryptonightR_instruction20();
+	void CryptonightR_instruction21();
+	void CryptonightR_instruction22();
+	void CryptonightR_instruction23();
+	void CryptonightR_instruction24();
+	void CryptonightR_instruction25();
+	void CryptonightR_instruction26();
+	void CryptonightR_instruction27();
+	void CryptonightR_instruction28();
+	void CryptonightR_instruction29();
+	void CryptonightR_instruction30();
+	void CryptonightR_instruction31();
+	void CryptonightR_instruction32();
+	void CryptonightR_instruction33();
+	void CryptonightR_instruction34();
+	void CryptonightR_instruction35();
+	void CryptonightR_instruction36();
+	void CryptonightR_instruction37();
+	void CryptonightR_instruction38();
+	void CryptonightR_instruction39();
+	void CryptonightR_instruction40();
+	void CryptonightR_instruction41();
+	void CryptonightR_instruction42();
+	void CryptonightR_instruction43();
+	void CryptonightR_instruction44();
+	void CryptonightR_instruction45();
+	void CryptonightR_instruction46();
+	void CryptonightR_instruction47();
+	void CryptonightR_instruction48();
+	void CryptonightR_instruction49();
+	void CryptonightR_instruction50();
+	void CryptonightR_instruction51();
+	void CryptonightR_instruction52();
+	void CryptonightR_instruction53();
+	void CryptonightR_instruction54();
+	void CryptonightR_instruction55();
+	void CryptonightR_instruction56();
+	void CryptonightR_instruction57();
+	void CryptonightR_instruction58();
+	void CryptonightR_instruction59();
+	void CryptonightR_instruction60();
+	void CryptonightR_instruction61();
+	void CryptonightR_instruction62();
+	void CryptonightR_instruction63();
+	void CryptonightR_instruction64();
+	void CryptonightR_instruction65();
+	void CryptonightR_instruction66();
+	void CryptonightR_instruction67();
+	void CryptonightR_instruction68();
+	void CryptonightR_instruction69();
+	void CryptonightR_instruction70();
+	void CryptonightR_instruction71();
+	void CryptonightR_instruction72();
+	void CryptonightR_instruction73();
+	void CryptonightR_instruction74();
+	void CryptonightR_instruction75();
+	void CryptonightR_instruction76();
+	void CryptonightR_instruction77();
+	void CryptonightR_instruction78();
+	void CryptonightR_instruction79();
+	void CryptonightR_instruction80();
+	void CryptonightR_instruction81();
+	void CryptonightR_instruction82();
+	void CryptonightR_instruction83();
+	void CryptonightR_instruction84();
+	void CryptonightR_instruction85();
+	void CryptonightR_instruction86();
+	void CryptonightR_instruction87();
+	void CryptonightR_instruction88();
+	void CryptonightR_instruction89();
+	void CryptonightR_instruction90();
+	void CryptonightR_instruction91();
+	void CryptonightR_instruction92();
+	void CryptonightR_instruction93();
+	void CryptonightR_instruction94();
+	void CryptonightR_instruction95();
+	void CryptonightR_instruction96();
+	void CryptonightR_instruction97();
+	void CryptonightR_instruction98();
+	void CryptonightR_instruction99();
+	void CryptonightR_instruction100();
+	void CryptonightR_instruction101();
+	void CryptonightR_instruction102();
+	void CryptonightR_instruction103();
+	void CryptonightR_instruction104();
+	void CryptonightR_instruction105();
+	void CryptonightR_instruction106();
+	void CryptonightR_instruction107();
+	void CryptonightR_instruction108();
+	void CryptonightR_instruction109();
+	void CryptonightR_instruction110();
+	void CryptonightR_instruction111();
+	void CryptonightR_instruction112();
+	void CryptonightR_instruction113();
+	void CryptonightR_instruction114();
+	void CryptonightR_instruction115();
+	void CryptonightR_instruction116();
+	void CryptonightR_instruction117();
+	void CryptonightR_instruction118();
+	void CryptonightR_instruction119();
+	void CryptonightR_instruction120();
+	void CryptonightR_instruction121();
+	void CryptonightR_instruction122();
+	void CryptonightR_instruction123();
+	void CryptonightR_instruction124();
+	void CryptonightR_instruction125();
+	void CryptonightR_instruction126();
+	void CryptonightR_instruction127();
+	void CryptonightR_instruction128();
+	void CryptonightR_instruction129();
+	void CryptonightR_instruction130();
+	void CryptonightR_instruction131();
+	void CryptonightR_instruction132();
+	void CryptonightR_instruction133();
+	void CryptonightR_instruction134();
+	void CryptonightR_instruction135();
+	void CryptonightR_instruction136();
+	void CryptonightR_instruction137();
+	void CryptonightR_instruction138();
+	void CryptonightR_instruction139();
+	void CryptonightR_instruction140();
+	void CryptonightR_instruction141();
+	void CryptonightR_instruction142();
+	void CryptonightR_instruction143();
+	void CryptonightR_instruction144();
+	void CryptonightR_instruction145();
+	void CryptonightR_instruction146();
+	void CryptonightR_instruction147();
+	void CryptonightR_instruction148();
+	void CryptonightR_instruction149();
+	void CryptonightR_instruction150();
+	void CryptonightR_instruction151();
+	void CryptonightR_instruction152();
+	void CryptonightR_instruction153();
+	void CryptonightR_instruction154();
+	void CryptonightR_instruction155();
+	void CryptonightR_instruction156();
+	void CryptonightR_instruction157();
+	void CryptonightR_instruction158();
+	void CryptonightR_instruction159();
+	void CryptonightR_instruction160();
+	void CryptonightR_instruction161();
+	void CryptonightR_instruction162();
+	void CryptonightR_instruction163();
+	void CryptonightR_instruction164();
+	void CryptonightR_instruction165();
+	void CryptonightR_instruction166();
+	void CryptonightR_instruction167();
+	void CryptonightR_instruction168();
+	void CryptonightR_instruction169();
+	void CryptonightR_instruction170();
+	void CryptonightR_instruction171();
+	void CryptonightR_instruction172();
+	void CryptonightR_instruction173();
+	void CryptonightR_instruction174();
+	void CryptonightR_instruction175();
+	void CryptonightR_instruction176();
+	void CryptonightR_instruction177();
+	void CryptonightR_instruction178();
+	void CryptonightR_instruction179();
+	void CryptonightR_instruction180();
+	void CryptonightR_instruction181();
+	void CryptonightR_instruction182();
+	void CryptonightR_instruction183();
+	void CryptonightR_instruction184();
+	void CryptonightR_instruction185();
+	void CryptonightR_instruction186();
+	void CryptonightR_instruction187();
+	void CryptonightR_instruction188();
+	void CryptonightR_instruction189();
+	void CryptonightR_instruction190();
+	void CryptonightR_instruction191();
+	void CryptonightR_instruction192();
+	void CryptonightR_instruction193();
+	void CryptonightR_instruction194();
+	void CryptonightR_instruction195();
+	void CryptonightR_instruction196();
+	void CryptonightR_instruction197();
+	void CryptonightR_instruction198();
+	void CryptonightR_instruction199();
+	void CryptonightR_instruction200();
+	void CryptonightR_instruction201();
+	void CryptonightR_instruction202();
+	void CryptonightR_instruction203();
+	void CryptonightR_instruction204();
+	void CryptonightR_instruction205();
+	void CryptonightR_instruction206();
+	void CryptonightR_instruction207();
+	void CryptonightR_instruction208();
+	void CryptonightR_instruction209();
+	void CryptonightR_instruction210();
+	void CryptonightR_instruction211();
+	void CryptonightR_instruction212();
+	void CryptonightR_instruction213();
+	void CryptonightR_instruction214();
+	void CryptonightR_instruction215();
+	void CryptonightR_instruction216();
+	void CryptonightR_instruction217();
+	void CryptonightR_instruction218();
+	void CryptonightR_instruction219();
+	void CryptonightR_instruction220();
+	void CryptonightR_instruction221();
+	void CryptonightR_instruction222();
+	void CryptonightR_instruction223();
+	void CryptonightR_instruction224();
+	void CryptonightR_instruction225();
+	void CryptonightR_instruction226();
+	void CryptonightR_instruction227();
+	void CryptonightR_instruction228();
+	void CryptonightR_instruction229();
+	void CryptonightR_instruction230();
+	void CryptonightR_instruction231();
+	void CryptonightR_instruction232();
+	void CryptonightR_instruction233();
+	void CryptonightR_instruction234();
+	void CryptonightR_instruction235();
+	void CryptonightR_instruction236();
+	void CryptonightR_instruction237();
+	void CryptonightR_instruction238();
+	void CryptonightR_instruction239();
+	void CryptonightR_instruction240();
+	void CryptonightR_instruction241();
+	void CryptonightR_instruction242();
+	void CryptonightR_instruction243();
+	void CryptonightR_instruction244();
+	void CryptonightR_instruction245();
+	void CryptonightR_instruction246();
+	void CryptonightR_instruction247();
+	void CryptonightR_instruction248();
+	void CryptonightR_instruction249();
+	void CryptonightR_instruction250();
+	void CryptonightR_instruction251();
+	void CryptonightR_instruction252();
+	void CryptonightR_instruction253();
+	void CryptonightR_instruction254();
+	void CryptonightR_instruction255();
+	void CryptonightR_instruction256();
+	void CryptonightR_instruction_mov0();
+	void CryptonightR_instruction_mov1();
+	void CryptonightR_instruction_mov2();
+	void CryptonightR_instruction_mov3();
+	void CryptonightR_instruction_mov4();
+	void CryptonightR_instruction_mov5();
+	void CryptonightR_instruction_mov6();
+	void CryptonightR_instruction_mov7();
+	void CryptonightR_instruction_mov8();
+	void CryptonightR_instruction_mov9();
+	void CryptonightR_instruction_mov10();
+	void CryptonightR_instruction_mov11();
+	void CryptonightR_instruction_mov12();
+	void CryptonightR_instruction_mov13();
+	void CryptonightR_instruction_mov14();
+	void CryptonightR_instruction_mov15();
+	void CryptonightR_instruction_mov16();
+	void CryptonightR_instruction_mov17();
+	void CryptonightR_instruction_mov18();
+	void CryptonightR_instruction_mov19();
+	void CryptonightR_instruction_mov20();
+	void CryptonightR_instruction_mov21();
+	void CryptonightR_instruction_mov22();
+	void CryptonightR_instruction_mov23();
+	void CryptonightR_instruction_mov24();
+	void CryptonightR_instruction_mov25();
+	void CryptonightR_instruction_mov26();
+	void CryptonightR_instruction_mov27();
+	void CryptonightR_instruction_mov28();
+	void CryptonightR_instruction_mov29();
+	void CryptonightR_instruction_mov30();
+	void CryptonightR_instruction_mov31();
+	void CryptonightR_instruction_mov32();
+	void CryptonightR_instruction_mov33();
+	void CryptonightR_instruction_mov34();
+	void CryptonightR_instruction_mov35();
+	void CryptonightR_instruction_mov36();
+	void CryptonightR_instruction_mov37();
+	void CryptonightR_instruction_mov38();
+	void CryptonightR_instruction_mov39();
+	void CryptonightR_instruction_mov40();
+	void CryptonightR_instruction_mov41();
+	void CryptonightR_instruction_mov42();
+	void CryptonightR_instruction_mov43();
+	void CryptonightR_instruction_mov44();
+	void CryptonightR_instruction_mov45();
+	void CryptonightR_instruction_mov46();
+	void CryptonightR_instruction_mov47();
+	void CryptonightR_instruction_mov48();
+	void CryptonightR_instruction_mov49();
+	void CryptonightR_instruction_mov50();
+	void CryptonightR_instruction_mov51();
+	void CryptonightR_instruction_mov52();
+	void CryptonightR_instruction_mov53();
+	void CryptonightR_instruction_mov54();
+	void CryptonightR_instruction_mov55();
+	void CryptonightR_instruction_mov56();
+	void CryptonightR_instruction_mov57();
+	void CryptonightR_instruction_mov58();
+	void CryptonightR_instruction_mov59();
+	void CryptonightR_instruction_mov60();
+	void CryptonightR_instruction_mov61();
+	void CryptonightR_instruction_mov62();
+	void CryptonightR_instruction_mov63();
+	void CryptonightR_instruction_mov64();
+	void CryptonightR_instruction_mov65();
+	void CryptonightR_instruction_mov66();
+	void CryptonightR_instruction_mov67();
+	void CryptonightR_instruction_mov68();
+	void CryptonightR_instruction_mov69();
+	void CryptonightR_instruction_mov70();
+	void CryptonightR_instruction_mov71();
+	void CryptonightR_instruction_mov72();
+	void CryptonightR_instruction_mov73();
+	void CryptonightR_instruction_mov74();
+	void CryptonightR_instruction_mov75();
+	void CryptonightR_instruction_mov76();
+	void CryptonightR_instruction_mov77();
+	void CryptonightR_instruction_mov78();
+	void CryptonightR_instruction_mov79();
+	void CryptonightR_instruction_mov80();
+	void CryptonightR_instruction_mov81();
+	void CryptonightR_instruction_mov82();
+	void CryptonightR_instruction_mov83();
+	void CryptonightR_instruction_mov84();
+	void CryptonightR_instruction_mov85();
+	void CryptonightR_instruction_mov86();
+	void CryptonightR_instruction_mov87();
+	void CryptonightR_instruction_mov88();
+	void CryptonightR_instruction_mov89();
+	void CryptonightR_instruction_mov90();
+	void CryptonightR_instruction_mov91();
+	void CryptonightR_instruction_mov92();
+	void CryptonightR_instruction_mov93();
+	void CryptonightR_instruction_mov94();
+	void CryptonightR_instruction_mov95();
+	void CryptonightR_instruction_mov96();
+	void CryptonightR_instruction_mov97();
+	void CryptonightR_instruction_mov98();
+	void CryptonightR_instruction_mov99();
+	void CryptonightR_instruction_mov100();
+	void CryptonightR_instruction_mov101();
+	void CryptonightR_instruction_mov102();
+	void CryptonightR_instruction_mov103();
+	void CryptonightR_instruction_mov104();
+	void CryptonightR_instruction_mov105();
+	void CryptonightR_instruction_mov106();
+	void CryptonightR_instruction_mov107();
+	void CryptonightR_instruction_mov108();
+	void CryptonightR_instruction_mov109();
+	void CryptonightR_instruction_mov110();
+	void CryptonightR_instruction_mov111();
+	void CryptonightR_instruction_mov112();
+	void CryptonightR_instruction_mov113();
+	void CryptonightR_instruction_mov114();
+	void CryptonightR_instruction_mov115();
+	void CryptonightR_instruction_mov116();
+	void CryptonightR_instruction_mov117();
+	void CryptonightR_instruction_mov118();
+	void CryptonightR_instruction_mov119();
+	void CryptonightR_instruction_mov120();
+	void CryptonightR_instruction_mov121();
+	void CryptonightR_instruction_mov122();
+	void CryptonightR_instruction_mov123();
+	void CryptonightR_instruction_mov124();
+	void CryptonightR_instruction_mov125();
+	void CryptonightR_instruction_mov126();
+	void CryptonightR_instruction_mov127();
+	void CryptonightR_instruction_mov128();
+	void CryptonightR_instruction_mov129();
+	void CryptonightR_instruction_mov130();
+	void CryptonightR_instruction_mov131();
+	void CryptonightR_instruction_mov132();
+	void CryptonightR_instruction_mov133();
+	void CryptonightR_instruction_mov134();
+	void CryptonightR_instruction_mov135();
+	void CryptonightR_instruction_mov136();
+	void CryptonightR_instruction_mov137();
+	void CryptonightR_instruction_mov138();
+	void CryptonightR_instruction_mov139();
+	void CryptonightR_instruction_mov140();
+	void CryptonightR_instruction_mov141();
+	void CryptonightR_instruction_mov142();
+	void CryptonightR_instruction_mov143();
+	void CryptonightR_instruction_mov144();
+	void CryptonightR_instruction_mov145();
+	void CryptonightR_instruction_mov146();
+	void CryptonightR_instruction_mov147();
+	void CryptonightR_instruction_mov148();
+	void CryptonightR_instruction_mov149();
+	void CryptonightR_instruction_mov150();
+	void CryptonightR_instruction_mov151();
+	void CryptonightR_instruction_mov152();
+	void CryptonightR_instruction_mov153();
+	void CryptonightR_instruction_mov154();
+	void CryptonightR_instruction_mov155();
+	void CryptonightR_instruction_mov156();
+	void CryptonightR_instruction_mov157();
+	void CryptonightR_instruction_mov158();
+	void CryptonightR_instruction_mov159();
+	void CryptonightR_instruction_mov160();
+	void CryptonightR_instruction_mov161();
+	void CryptonightR_instruction_mov162();
+	void CryptonightR_instruction_mov163();
+	void CryptonightR_instruction_mov164();
+	void CryptonightR_instruction_mov165();
+	void CryptonightR_instruction_mov166();
+	void CryptonightR_instruction_mov167();
+	void CryptonightR_instruction_mov168();
+	void CryptonightR_instruction_mov169();
+	void CryptonightR_instruction_mov170();
+	void CryptonightR_instruction_mov171();
+	void CryptonightR_instruction_mov172();
+	void CryptonightR_instruction_mov173();
+	void CryptonightR_instruction_mov174();
+	void CryptonightR_instruction_mov175();
+	void CryptonightR_instruction_mov176();
+	void CryptonightR_instruction_mov177();
+	void CryptonightR_instruction_mov178();
+	void CryptonightR_instruction_mov179();
+	void CryptonightR_instruction_mov180();
+	void CryptonightR_instruction_mov181();
+	void CryptonightR_instruction_mov182();
+	void CryptonightR_instruction_mov183();
+	void CryptonightR_instruction_mov184();
+	void CryptonightR_instruction_mov185();
+	void CryptonightR_instruction_mov186();
+	void CryptonightR_instruction_mov187();
+	void CryptonightR_instruction_mov188();
+	void CryptonightR_instruction_mov189();
+	void CryptonightR_instruction_mov190();
+	void CryptonightR_instruction_mov191();
+	void CryptonightR_instruction_mov192();
+	void CryptonightR_instruction_mov193();
+	void CryptonightR_instruction_mov194();
+	void CryptonightR_instruction_mov195();
+	void CryptonightR_instruction_mov196();
+	void CryptonightR_instruction_mov197();
+	void CryptonightR_instruction_mov198();
+	void CryptonightR_instruction_mov199();
+	void CryptonightR_instruction_mov200();
+	void CryptonightR_instruction_mov201();
+	void CryptonightR_instruction_mov202();
+	void CryptonightR_instruction_mov203();
+	void CryptonightR_instruction_mov204();
+	void CryptonightR_instruction_mov205();
+	void CryptonightR_instruction_mov206();
+	void CryptonightR_instruction_mov207();
+	void CryptonightR_instruction_mov208();
+	void CryptonightR_instruction_mov209();
+	void CryptonightR_instruction_mov210();
+	void CryptonightR_instruction_mov211();
+	void CryptonightR_instruction_mov212();
+	void CryptonightR_instruction_mov213();
+	void CryptonightR_instruction_mov214();
+	void CryptonightR_instruction_mov215();
+	void CryptonightR_instruction_mov216();
+	void CryptonightR_instruction_mov217();
+	void CryptonightR_instruction_mov218();
+	void CryptonightR_instruction_mov219();
+	void CryptonightR_instruction_mov220();
+	void CryptonightR_instruction_mov221();
+	void CryptonightR_instruction_mov222();
+	void CryptonightR_instruction_mov223();
+	void CryptonightR_instruction_mov224();
+	void CryptonightR_instruction_mov225();
+	void CryptonightR_instruction_mov226();
+	void CryptonightR_instruction_mov227();
+	void CryptonightR_instruction_mov228();
+	void CryptonightR_instruction_mov229();
+	void CryptonightR_instruction_mov230();
+	void CryptonightR_instruction_mov231();
+	void CryptonightR_instruction_mov232();
+	void CryptonightR_instruction_mov233();
+	void CryptonightR_instruction_mov234();
+	void CryptonightR_instruction_mov235();
+	void CryptonightR_instruction_mov236();
+	void CryptonightR_instruction_mov237();
+	void CryptonightR_instruction_mov238();
+	void CryptonightR_instruction_mov239();
+	void CryptonightR_instruction_mov240();
+	void CryptonightR_instruction_mov241();
+	void CryptonightR_instruction_mov242();
+	void CryptonightR_instruction_mov243();
+	void CryptonightR_instruction_mov244();
+	void CryptonightR_instruction_mov245();
+	void CryptonightR_instruction_mov246();
+	void CryptonightR_instruction_mov247();
+	void CryptonightR_instruction_mov248();
+	void CryptonightR_instruction_mov249();
+	void CryptonightR_instruction_mov250();
+	void CryptonightR_instruction_mov251();
+	void CryptonightR_instruction_mov252();
+	void CryptonightR_instruction_mov253();
+	void CryptonightR_instruction_mov254();
+	void CryptonightR_instruction_mov255();
+	void CryptonightR_instruction_mov256();
+}
+
+const void_func instructions[257] = {
+	CryptonightR_instruction0,
+	CryptonightR_instruction1,
+	CryptonightR_instruction2,
+	CryptonightR_instruction3,
+	CryptonightR_instruction4,
+	CryptonightR_instruction5,
+	CryptonightR_instruction6,
+	CryptonightR_instruction7,
+	CryptonightR_instruction8,
+	CryptonightR_instruction9,
+	CryptonightR_instruction10,
+	CryptonightR_instruction11,
+	CryptonightR_instruction12,
+	CryptonightR_instruction13,
+	CryptonightR_instruction14,
+	CryptonightR_instruction15,
+	CryptonightR_instruction16,
+	CryptonightR_instruction17,
+	CryptonightR_instruction18,
+	CryptonightR_instruction19,
+	CryptonightR_instruction20,
+	CryptonightR_instruction21,
+	CryptonightR_instruction22,
+	CryptonightR_instruction23,
+	CryptonightR_instruction24,
+	CryptonightR_instruction25,
+	CryptonightR_instruction26,
+	CryptonightR_instruction27,
+	CryptonightR_instruction28,
+	CryptonightR_instruction29,
+	CryptonightR_instruction30,
+	CryptonightR_instruction31,
+	CryptonightR_instruction32,
+	CryptonightR_instruction33,
+	CryptonightR_instruction34,
+	CryptonightR_instruction35,
+	CryptonightR_instruction36,
+	CryptonightR_instruction37,
+	CryptonightR_instruction38,
+	CryptonightR_instruction39,
+	CryptonightR_instruction40,
+	CryptonightR_instruction41,
+	CryptonightR_instruction42,
+	CryptonightR_instruction43,
+	CryptonightR_instruction44,
+	CryptonightR_instruction45,
+	CryptonightR_instruction46,
+	CryptonightR_instruction47,
+	CryptonightR_instruction48,
+	CryptonightR_instruction49,
+	CryptonightR_instruction50,
+	CryptonightR_instruction51,
+	CryptonightR_instruction52,
+	CryptonightR_instruction53,
+	CryptonightR_instruction54,
+	CryptonightR_instruction55,
+	CryptonightR_instruction56,
+	CryptonightR_instruction57,
+	CryptonightR_instruction58,
+	CryptonightR_instruction59,
+	CryptonightR_instruction60,
+	CryptonightR_instruction61,
+	CryptonightR_instruction62,
+	CryptonightR_instruction63,
+	CryptonightR_instruction64,
+	CryptonightR_instruction65,
+	CryptonightR_instruction66,
+	CryptonightR_instruction67,
+	CryptonightR_instruction68,
+	CryptonightR_instruction69,
+	CryptonightR_instruction70,
+	CryptonightR_instruction71,
+	CryptonightR_instruction72,
+	CryptonightR_instruction73,
+	CryptonightR_instruction74,
+	CryptonightR_instruction75,
+	CryptonightR_instruction76,
+	CryptonightR_instruction77,
+	CryptonightR_instruction78,
+	CryptonightR_instruction79,
+	CryptonightR_instruction80,
+	CryptonightR_instruction81,
+	CryptonightR_instruction82,
+	CryptonightR_instruction83,
+	CryptonightR_instruction84,
+	CryptonightR_instruction85,
+	CryptonightR_instruction86,
+	CryptonightR_instruction87,
+	CryptonightR_instruction88,
+	CryptonightR_instruction89,
+	CryptonightR_instruction90,
+	CryptonightR_instruction91,
+	CryptonightR_instruction92,
+	CryptonightR_instruction93,
+	CryptonightR_instruction94,
+	CryptonightR_instruction95,
+	CryptonightR_instruction96,
+	CryptonightR_instruction97,
+	CryptonightR_instruction98,
+	CryptonightR_instruction99,
+	CryptonightR_instruction100,
+	CryptonightR_instruction101,
+	CryptonightR_instruction102,
+	CryptonightR_instruction103,
+	CryptonightR_instruction104,
+	CryptonightR_instruction105,
+	CryptonightR_instruction106,
+	CryptonightR_instruction107,
+	CryptonightR_instruction108,
+	CryptonightR_instruction109,
+	CryptonightR_instruction110,
+	CryptonightR_instruction111,
+	CryptonightR_instruction112,
+	CryptonightR_instruction113,
+	CryptonightR_instruction114,
+	CryptonightR_instruction115,
+	CryptonightR_instruction116,
+	CryptonightR_instruction117,
+	CryptonightR_instruction118,
+	CryptonightR_instruction119,
+	CryptonightR_instruction120,
+	CryptonightR_instruction121,
+	CryptonightR_instruction122,
+	CryptonightR_instruction123,
+	CryptonightR_instruction124,
+	CryptonightR_instruction125,
+	CryptonightR_instruction126,
+	CryptonightR_instruction127,
+	CryptonightR_instruction128,
+	CryptonightR_instruction129,
+	CryptonightR_instruction130,
+	CryptonightR_instruction131,
+	CryptonightR_instruction132,
+	CryptonightR_instruction133,
+	CryptonightR_instruction134,
+	CryptonightR_instruction135,
+	CryptonightR_instruction136,
+	CryptonightR_instruction137,
+	CryptonightR_instruction138,
+	CryptonightR_instruction139,
+	CryptonightR_instruction140,
+	CryptonightR_instruction141,
+	CryptonightR_instruction142,
+	CryptonightR_instruction143,
+	CryptonightR_instruction144,
+	CryptonightR_instruction145,
+	CryptonightR_instruction146,
+	CryptonightR_instruction147,
+	CryptonightR_instruction148,
+	CryptonightR_instruction149,
+	CryptonightR_instruction150,
+	CryptonightR_instruction151,
+	CryptonightR_instruction152,
+	CryptonightR_instruction153,
+	CryptonightR_instruction154,
+	CryptonightR_instruction155,
+	CryptonightR_instruction156,
+	CryptonightR_instruction157,
+	CryptonightR_instruction158,
+	CryptonightR_instruction159,
+	CryptonightR_instruction160,
+	CryptonightR_instruction161,
+	CryptonightR_instruction162,
+	CryptonightR_instruction163,
+	CryptonightR_instruction164,
+	CryptonightR_instruction165,
+	CryptonightR_instruction166,
+	CryptonightR_instruction167,
+	CryptonightR_instruction168,
+	CryptonightR_instruction169,
+	CryptonightR_instruction170,
+	CryptonightR_instruction171,
+	CryptonightR_instruction172,
+	CryptonightR_instruction173,
+	CryptonightR_instruction174,
+	CryptonightR_instruction175,
+	CryptonightR_instruction176,
+	CryptonightR_instruction177,
+	CryptonightR_instruction178,
+	CryptonightR_instruction179,
+	CryptonightR_instruction180,
+	CryptonightR_instruction181,
+	CryptonightR_instruction182,
+	CryptonightR_instruction183,
+	CryptonightR_instruction184,
+	CryptonightR_instruction185,
+	CryptonightR_instruction186,
+	CryptonightR_instruction187,
+	CryptonightR_instruction188,
+	CryptonightR_instruction189,
+	CryptonightR_instruction190,
+	CryptonightR_instruction191,
+	CryptonightR_instruction192,
+	CryptonightR_instruction193,
+	CryptonightR_instruction194,
+	CryptonightR_instruction195,
+	CryptonightR_instruction196,
+	CryptonightR_instruction197,
+	CryptonightR_instruction198,
+	CryptonightR_instruction199,
+	CryptonightR_instruction200,
+	CryptonightR_instruction201,
+	CryptonightR_instruction202,
+	CryptonightR_instruction203,
+	CryptonightR_instruction204,
+	CryptonightR_instruction205,
+	CryptonightR_instruction206,
+	CryptonightR_instruction207,
+	CryptonightR_instruction208,
+	CryptonightR_instruction209,
+	CryptonightR_instruction210,
+	CryptonightR_instruction211,
+	CryptonightR_instruction212,
+	CryptonightR_instruction213,
+	CryptonightR_instruction214,
+	CryptonightR_instruction215,
+	CryptonightR_instruction216,
+	CryptonightR_instruction217,
+	CryptonightR_instruction218,
+	CryptonightR_instruction219,
+	CryptonightR_instruction220,
+	CryptonightR_instruction221,
+	CryptonightR_instruction222,
+	CryptonightR_instruction223,
+	CryptonightR_instruction224,
+	CryptonightR_instruction225,
+	CryptonightR_instruction226,
+	CryptonightR_instruction227,
+	CryptonightR_instruction228,
+	CryptonightR_instruction229,
+	CryptonightR_instruction230,
+	CryptonightR_instruction231,
+	CryptonightR_instruction232,
+	CryptonightR_instruction233,
+	CryptonightR_instruction234,
+	CryptonightR_instruction235,
+	CryptonightR_instruction236,
+	CryptonightR_instruction237,
+	CryptonightR_instruction238,
+	CryptonightR_instruction239,
+	CryptonightR_instruction240,
+	CryptonightR_instruction241,
+	CryptonightR_instruction242,
+	CryptonightR_instruction243,
+	CryptonightR_instruction244,
+	CryptonightR_instruction245,
+	CryptonightR_instruction246,
+	CryptonightR_instruction247,
+	CryptonightR_instruction248,
+	CryptonightR_instruction249,
+	CryptonightR_instruction250,
+	CryptonightR_instruction251,
+	CryptonightR_instruction252,
+	CryptonightR_instruction253,
+	CryptonightR_instruction254,
+	CryptonightR_instruction255,
+	CryptonightR_instruction256,
+};
+
+const void_func instructions_mov[257] = {
+	CryptonightR_instruction_mov0,
+	CryptonightR_instruction_mov1,
+	CryptonightR_instruction_mov2,
+	CryptonightR_instruction_mov3,
+	CryptonightR_instruction_mov4,
+	CryptonightR_instruction_mov5,
+	CryptonightR_instruction_mov6,
+	CryptonightR_instruction_mov7,
+	CryptonightR_instruction_mov8,
+	CryptonightR_instruction_mov9,
+	CryptonightR_instruction_mov10,
+	CryptonightR_instruction_mov11,
+	CryptonightR_instruction_mov12,
+	CryptonightR_instruction_mov13,
+	CryptonightR_instruction_mov14,
+	CryptonightR_instruction_mov15,
+	CryptonightR_instruction_mov16,
+	CryptonightR_instruction_mov17,
+	CryptonightR_instruction_mov18,
+	CryptonightR_instruction_mov19,
+	CryptonightR_instruction_mov20,
+	CryptonightR_instruction_mov21,
+	CryptonightR_instruction_mov22,
+	CryptonightR_instruction_mov23,
+	CryptonightR_instruction_mov24,
+	CryptonightR_instruction_mov25,
+	CryptonightR_instruction_mov26,
+	CryptonightR_instruction_mov27,
+	CryptonightR_instruction_mov28,
+	CryptonightR_instruction_mov29,
+	CryptonightR_instruction_mov30,
+	CryptonightR_instruction_mov31,
+	CryptonightR_instruction_mov32,
+	CryptonightR_instruction_mov33,
+	CryptonightR_instruction_mov34,
+	CryptonightR_instruction_mov35,
+	CryptonightR_instruction_mov36,
+	CryptonightR_instruction_mov37,
+	CryptonightR_instruction_mov38,
+	CryptonightR_instruction_mov39,
+	CryptonightR_instruction_mov40,
+	CryptonightR_instruction_mov41,
+	CryptonightR_instruction_mov42,
+	CryptonightR_instruction_mov43,
+	CryptonightR_instruction_mov44,
+	CryptonightR_instruction_mov45,
+	CryptonightR_instruction_mov46,
+	CryptonightR_instruction_mov47,
+	CryptonightR_instruction_mov48,
+	CryptonightR_instruction_mov49,
+	CryptonightR_instruction_mov50,
+	CryptonightR_instruction_mov51,
+	CryptonightR_instruction_mov52,
+	CryptonightR_instruction_mov53,
+	CryptonightR_instruction_mov54,
+	CryptonightR_instruction_mov55,
+	CryptonightR_instruction_mov56,
+	CryptonightR_instruction_mov57,
+	CryptonightR_instruction_mov58,
+	CryptonightR_instruction_mov59,
+	CryptonightR_instruction_mov60,
+	CryptonightR_instruction_mov61,
+	CryptonightR_instruction_mov62,
+	CryptonightR_instruction_mov63,
+	CryptonightR_instruction_mov64,
+	CryptonightR_instruction_mov65,
+	CryptonightR_instruction_mov66,
+	CryptonightR_instruction_mov67,
+	CryptonightR_instruction_mov68,
+	CryptonightR_instruction_mov69,
+	CryptonightR_instruction_mov70,
+	CryptonightR_instruction_mov71,
+	CryptonightR_instruction_mov72,
+	CryptonightR_instruction_mov73,
+	CryptonightR_instruction_mov74,
+	CryptonightR_instruction_mov75,
+	CryptonightR_instruction_mov76,
+	CryptonightR_instruction_mov77,
+	CryptonightR_instruction_mov78,
+	CryptonightR_instruction_mov79,
+	CryptonightR_instruction_mov80,
+	CryptonightR_instruction_mov81,
+	CryptonightR_instruction_mov82,
+	CryptonightR_instruction_mov83,
+	CryptonightR_instruction_mov84,
+	CryptonightR_instruction_mov85,
+	CryptonightR_instruction_mov86,
+	CryptonightR_instruction_mov87,
+	CryptonightR_instruction_mov88,
+	CryptonightR_instruction_mov89,
+	CryptonightR_instruction_mov90,
+	CryptonightR_instruction_mov91,
+	CryptonightR_instruction_mov92,
+	CryptonightR_instruction_mov93,
+	CryptonightR_instruction_mov94,
+	CryptonightR_instruction_mov95,
+	CryptonightR_instruction_mov96,
+	CryptonightR_instruction_mov97,
+	CryptonightR_instruction_mov98,
+	CryptonightR_instruction_mov99,
+	CryptonightR_instruction_mov100,
+	CryptonightR_instruction_mov101,
+	CryptonightR_instruction_mov102,
+	CryptonightR_instruction_mov103,
+	CryptonightR_instruction_mov104,
+	CryptonightR_instruction_mov105,
+	CryptonightR_instruction_mov106,
+	CryptonightR_instruction_mov107,
+	CryptonightR_instruction_mov108,
+	CryptonightR_instruction_mov109,
+	CryptonightR_instruction_mov110,
+	CryptonightR_instruction_mov111,
+	CryptonightR_instruction_mov112,
+	CryptonightR_instruction_mov113,
+	CryptonightR_instruction_mov114,
+	CryptonightR_instruction_mov115,
+	CryptonightR_instruction_mov116,
+	CryptonightR_instruction_mov117,
+	CryptonightR_instruction_mov118,
+	CryptonightR_instruction_mov119,
+	CryptonightR_instruction_mov120,
+	CryptonightR_instruction_mov121,
+	CryptonightR_instruction_mov122,
+	CryptonightR_instruction_mov123,
+	CryptonightR_instruction_mov124,
+	CryptonightR_instruction_mov125,
+	CryptonightR_instruction_mov126,
+	CryptonightR_instruction_mov127,
+	CryptonightR_instruction_mov128,
+	CryptonightR_instruction_mov129,
+	CryptonightR_instruction_mov130,
+	CryptonightR_instruction_mov131,
+	CryptonightR_instruction_mov132,
+	CryptonightR_instruction_mov133,
+	CryptonightR_instruction_mov134,
+	CryptonightR_instruction_mov135,
+	CryptonightR_instruction_mov136,
+	CryptonightR_instruction_mov137,
+	CryptonightR_instruction_mov138,
+	CryptonightR_instruction_mov139,
+	CryptonightR_instruction_mov140,
+	CryptonightR_instruction_mov141,
+	CryptonightR_instruction_mov142,
+	CryptonightR_instruction_mov143,
+	CryptonightR_instruction_mov144,
+	CryptonightR_instruction_mov145,
+	CryptonightR_instruction_mov146,
+	CryptonightR_instruction_mov147,
+	CryptonightR_instruction_mov148,
+	CryptonightR_instruction_mov149,
+	CryptonightR_instruction_mov150,
+	CryptonightR_instruction_mov151,
+	CryptonightR_instruction_mov152,
+	CryptonightR_instruction_mov153,
+	CryptonightR_instruction_mov154,
+	CryptonightR_instruction_mov155,
+	CryptonightR_instruction_mov156,
+	CryptonightR_instruction_mov157,
+	CryptonightR_instruction_mov158,
+	CryptonightR_instruction_mov159,
+	CryptonightR_instruction_mov160,
+	CryptonightR_instruction_mov161,
+	CryptonightR_instruction_mov162,
+	CryptonightR_instruction_mov163,
+	CryptonightR_instruction_mov164,
+	CryptonightR_instruction_mov165,
+	CryptonightR_instruction_mov166,
+	CryptonightR_instruction_mov167,
+	CryptonightR_instruction_mov168,
+	CryptonightR_instruction_mov169,
+	CryptonightR_instruction_mov170,
+	CryptonightR_instruction_mov171,
+	CryptonightR_instruction_mov172,
+	CryptonightR_instruction_mov173,
+	CryptonightR_instruction_mov174,
+	CryptonightR_instruction_mov175,
+	CryptonightR_instruction_mov176,
+	CryptonightR_instruction_mov177,
+	CryptonightR_instruction_mov178,
+	CryptonightR_instruction_mov179,
+	CryptonightR_instruction_mov180,
+	CryptonightR_instruction_mov181,
+	CryptonightR_instruction_mov182,
+	CryptonightR_instruction_mov183,
+	CryptonightR_instruction_mov184,
+	CryptonightR_instruction_mov185,
+	CryptonightR_instruction_mov186,
+	CryptonightR_instruction_mov187,
+	CryptonightR_instruction_mov188,
+	CryptonightR_instruction_mov189,
+	CryptonightR_instruction_mov190,
+	CryptonightR_instruction_mov191,
+	CryptonightR_instruction_mov192,
+	CryptonightR_instruction_mov193,
+	CryptonightR_instruction_mov194,
+	CryptonightR_instruction_mov195,
+	CryptonightR_instruction_mov196,
+	CryptonightR_instruction_mov197,
+	CryptonightR_instruction_mov198,
+	CryptonightR_instruction_mov199,
+	CryptonightR_instruction_mov200,
+	CryptonightR_instruction_mov201,
+	CryptonightR_instruction_mov202,
+	CryptonightR_instruction_mov203,
+	CryptonightR_instruction_mov204,
+	CryptonightR_instruction_mov205,
+	CryptonightR_instruction_mov206,
+	CryptonightR_instruction_mov207,
+	CryptonightR_instruction_mov208,
+	CryptonightR_instruction_mov209,
+	CryptonightR_instruction_mov210,
+	CryptonightR_instruction_mov211,
+	CryptonightR_instruction_mov212,
+	CryptonightR_instruction_mov213,
+	CryptonightR_instruction_mov214,
+	CryptonightR_instruction_mov215,
+	CryptonightR_instruction_mov216,
+	CryptonightR_instruction_mov217,
+	CryptonightR_instruction_mov218,
+	CryptonightR_instruction_mov219,
+	CryptonightR_instruction_mov220,
+	CryptonightR_instruction_mov221,
+	CryptonightR_instruction_mov222,
+	CryptonightR_instruction_mov223,
+	CryptonightR_instruction_mov224,
+	CryptonightR_instruction_mov225,
+	CryptonightR_instruction_mov226,
+	CryptonightR_instruction_mov227,
+	CryptonightR_instruction_mov228,
+	CryptonightR_instruction_mov229,
+	CryptonightR_instruction_mov230,
+	CryptonightR_instruction_mov231,
+	CryptonightR_instruction_mov232,
+	CryptonightR_instruction_mov233,
+	CryptonightR_instruction_mov234,
+	CryptonightR_instruction_mov235,
+	CryptonightR_instruction_mov236,
+	CryptonightR_instruction_mov237,
+	CryptonightR_instruction_mov238,
+	CryptonightR_instruction_mov239,
+	CryptonightR_instruction_mov240,
+	CryptonightR_instruction_mov241,
+	CryptonightR_instruction_mov242,
+	CryptonightR_instruction_mov243,
+	CryptonightR_instruction_mov244,
+	CryptonightR_instruction_mov245,
+	CryptonightR_instruction_mov246,
+	CryptonightR_instruction_mov247,
+	CryptonightR_instruction_mov248,
+	CryptonightR_instruction_mov249,
+	CryptonightR_instruction_mov250,
+	CryptonightR_instruction_mov251,
+	CryptonightR_instruction_mov252,
+	CryptonightR_instruction_mov253,
+	CryptonightR_instruction_mov254,
+	CryptonightR_instruction_mov255,
+	CryptonightR_instruction_mov256,
+};
diff --git a/src/crypto/asm/CryptonightR_template.inc b/src/crypto/asm/CryptonightR_template.inc
new file mode 100644
index 00000000..b54486a5
--- /dev/null
+++ b/src/crypto/asm/CryptonightR_template.inc
@@ -0,0 +1,529 @@
+PUBLIC FN_PREFIX(CryptonightR_template_part1)
+PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
+PUBLIC FN_PREFIX(CryptonightR_template_part2)
+PUBLIC FN_PREFIX(CryptonightR_template_part3)
+PUBLIC FN_PREFIX(CryptonightR_template_end)
+PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
+PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
+PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
+PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
+PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
+PUBLIC FN_PREFIX(CryptonightR_template_double_end)
+
+ALIGN(64)
+FN_PREFIX(CryptonightR_template_part1):
+	mov	QWORD PTR [rsp+16], rbx
+	mov	QWORD PTR [rsp+24], rbp
+	mov	QWORD PTR [rsp+32], rsi
+	push	r10
+	push	r11
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	push	rdi
+	sub	rsp, 64
+	mov	r12, rcx
+	mov	r8, QWORD PTR [r12+32]
+	mov	rdx, r12
+	xor	r8, QWORD PTR [r12]
+	mov	r15, QWORD PTR [r12+40]
+	mov	r9, r8
+	xor	r15, QWORD PTR [r12+8]
+	mov	r11, QWORD PTR [r12+224]
+	mov	r12, QWORD PTR [r12+56]
+	xor	r12, QWORD PTR [rdx+24]
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	movaps	XMMWORD PTR [rsp+48], xmm6
+	movq	xmm0, r12
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+16], xmm8
+	movaps	XMMWORD PTR [rsp], xmm9
+	mov	r12, QWORD PTR [rdx+88]
+	xor	r12, QWORD PTR [rdx+72]
+	movq	xmm6, rax
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm6, xmm0
+	and	r9d, 2097136
+	movq	xmm0, r12
+	movq	xmm7, rax
+	punpcklqdq xmm7, xmm0
+	mov r10d, r9d
+	movq	xmm9, rsp
+	mov rsp, r8
+	mov	r8d, 524288
+
+	mov	ebx, [rdx+96]
+	mov	esi, [rdx+100]
+	mov	edi, [rdx+104]
+	mov	ebp, [rdx+108]
+
+	ALIGN(64)
+FN_PREFIX(CryptonightR_template_mainloop):
+	movdqa	xmm5, XMMWORD PTR [r9+r11]
+	movq	xmm0, r15
+	movq	xmm4, rsp
+	punpcklqdq xmm4, xmm0
+	lea	rdx, QWORD PTR [r9+r11]
+
+	aesenc	xmm5, xmm4
+
+	mov	r12d, r9d
+	mov	eax, r9d
+	xor	r9d, 48
+	xor	r12d, 16
+	xor	eax, 32
+	movdqu	xmm0, XMMWORD PTR [r9+r11]
+	movaps xmm3, xmm0
+	movdqu	xmm2, XMMWORD PTR [r12+r11]
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	pxor xmm0, xmm2
+	pxor xmm5, xmm1
+	pxor xmm5, xmm0
+	paddq	xmm3, xmm7
+	paddq	xmm2, xmm6
+	paddq	xmm1, xmm4
+	movdqu	XMMWORD PTR [r12+r11], xmm3
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movdqu	XMMWORD PTR [r9+r11], xmm1
+
+	movq	r12, xmm5
+	movd	r10d, xmm5
+	and	r10d, 2097136
+
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm6
+	movdqu	XMMWORD PTR [rdx], xmm0
+
+	lea	r13d, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	r13, rdx
+
+	xor	r13, QWORD PTR [r10+r11]
+	mov	r14, QWORD PTR [r10+r11+8]
+
+	movd eax, xmm6
+	movd edx, xmm7
+	pextrd r9d, xmm7, 2
+
+FN_PREFIX(CryptonightR_template_part2):
+	mov eax, edi
+	mov edx, ebp
+	shl rdx, 32
+	or rax, rdx
+	xor rsp, rax
+
+	mov eax, ebx
+	mov edx, esi
+	shl rdx, 32
+	or rax, rdx
+	xor r15, rax
+
+	mov	rax, r13
+	mul	r12
+
+	mov	r9d, r10d
+	mov	r12d, r10d
+	xor	r9d, 16
+	xor	r12d, 32
+	xor	r10d, 48
+	movdqa	xmm1, XMMWORD PTR [r12+r11]
+	movaps xmm3, xmm1
+	movdqa	xmm2, XMMWORD PTR [r9+r11]
+	movdqa	xmm0, XMMWORD PTR [r10+r11]
+	pxor xmm1, xmm2
+	pxor xmm5, xmm0
+	pxor xmm5, xmm1
+	paddq	xmm3, xmm4
+	paddq	xmm2, xmm6
+	paddq	xmm0, xmm7
+	movdqu	XMMWORD PTR [r9+r11], xmm0
+	movdqu	XMMWORD PTR [r12+r11], xmm2
+	movdqu	XMMWORD PTR [r10+r11], xmm3
+
+	movdqa	xmm7, xmm6
+	add	r15, rax
+	add	rsp, rdx
+	xor	r10, 48
+	mov	QWORD PTR [r10+r11], rsp
+	xor	rsp, r13
+	mov	r9d, esp
+	mov	QWORD PTR [r10+r11+8], r15
+	and	r9d, 2097136
+	xor	r15, r14
+	movdqa	xmm6, xmm5
+	dec	r8d
+	jnz	FN_PREFIX(CryptonightR_template_mainloop)
+
+FN_PREFIX(CryptonightR_template_part3):
+	movq	rsp, xmm9
+
+	mov	rbx, QWORD PTR [rsp+136]
+	mov	rbp, QWORD PTR [rsp+144]
+	mov	rsi, QWORD PTR [rsp+152]
+	movaps	xmm6, XMMWORD PTR [rsp+48]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+16]
+	movaps	xmm9, XMMWORD PTR [rsp]
+	add	rsp, 64
+	pop	rdi
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	r11
+	pop	r10
+	ret	0
+FN_PREFIX(CryptonightR_template_end):
+
+ALIGN(64)
+FN_PREFIX(CryptonightR_template_double_part1):
+	mov	QWORD PTR [rsp+24], rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 320
+	mov	r14, QWORD PTR [rcx+32]
+	mov	r8, rcx
+	xor	r14, QWORD PTR [rcx]
+	mov	r12, QWORD PTR [rcx+40]
+	mov	ebx, r14d
+	mov	rsi, QWORD PTR [rcx+224]
+	and	ebx, 2097136
+	xor	r12, QWORD PTR [rcx+8]
+	mov	rcx, QWORD PTR [rcx+56]
+	xor	rcx, QWORD PTR [r8+24]
+	mov	rax, QWORD PTR [r8+48]
+	xor	rax, QWORD PTR [r8+16]
+	mov	r15, QWORD PTR [rdx+32]
+	xor	r15, QWORD PTR [rdx]
+	movq	xmm0, rcx
+	mov	rcx, QWORD PTR [r8+88]
+	xor	rcx, QWORD PTR [r8+72]
+	mov	r13, QWORD PTR [rdx+40]
+	mov	rdi, QWORD PTR [rdx+224]
+	xor	r13, QWORD PTR [rdx+8]
+	movaps	XMMWORD PTR [rsp+160], xmm6
+	movaps	XMMWORD PTR [rsp+176], xmm7
+	movaps	XMMWORD PTR [rsp+192], xmm8
+	movaps	XMMWORD PTR [rsp+208], xmm9
+	movaps	XMMWORD PTR [rsp+224], xmm10
+	movaps	XMMWORD PTR [rsp+240], xmm11
+	movaps	XMMWORD PTR [rsp+256], xmm12
+	movaps	XMMWORD PTR [rsp+272], xmm13
+	movaps	XMMWORD PTR [rsp+288], xmm14
+	movaps	XMMWORD PTR [rsp+304], xmm15
+	movq	xmm7, rax
+	mov	rax, QWORD PTR [r8+80]
+	xor	rax, QWORD PTR [r8+64]
+
+	movaps xmm1, XMMWORD PTR [rdx+96]
+	movaps xmm2, XMMWORD PTR [r8+96]
+	movaps XMMWORD PTR [rsp], xmm1
+	movaps XMMWORD PTR [rsp+16], xmm2
+
+	mov	r8d, r15d
+	punpcklqdq xmm7, xmm0
+	movq	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+56]
+	xor	rcx, QWORD PTR [rdx+24]
+	movq	xmm9, rax
+	mov	QWORD PTR [rsp+128], rsi
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	punpcklqdq xmm9, xmm0
+	movq	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+88]
+	xor	rcx, QWORD PTR [rdx+72]
+	movq	xmm8, rax
+	mov	QWORD PTR [rsp+136], rdi
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm8, xmm0
+	and	r8d, 2097136
+	movq	xmm0, rcx
+	mov	r11d, 524288
+	movq	xmm10, rax
+	punpcklqdq xmm10, xmm0
+	
+	movq xmm14, QWORD PTR [rsp+128]
+	movq xmm15, QWORD PTR [rsp+136]
+
+	ALIGN(64)
+FN_PREFIX(CryptonightR_template_double_mainloop):
+	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
+	movq	xmm0, r12
+	mov	ecx, ebx
+	movq	xmm3, r14
+	punpcklqdq xmm3, xmm0
+	xor	ebx, 16
+	aesenc	xmm6, xmm3
+	movq	xmm4, r15
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	pxor	xmm6, xmm0
+	xor	ebx, 48
+	paddq	xmm0, xmm7
+	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
+	pxor	xmm6, xmm1
+	movdqu	XMMWORD PTR [rbx+rsi], xmm0
+	paddq	xmm1, xmm3
+	xor	ebx, 16
+	mov	eax, ebx
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	pxor	xmm6, xmm0
+	movq	rdx, xmm6
+	movdqu	XMMWORD PTR [rbx+rsi], xmm1
+	paddq	xmm0, xmm9
+	movdqu	XMMWORD PTR [rax+rsi], xmm0
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm7
+	movdqu	XMMWORD PTR [rcx+rsi], xmm0
+	mov	esi, edx
+	movdqu	xmm5, XMMWORD PTR [r8+rdi]
+	and	esi, 2097136
+	mov	ecx, r8d
+	movq	xmm0, r13
+	punpcklqdq xmm4, xmm0
+	xor	r8d, 16
+	aesenc	xmm5, xmm4
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	pxor	xmm5, xmm0
+	xor	r8d, 48
+	paddq	xmm0, xmm8
+	movdqu	xmm1, XMMWORD PTR [r8+rdi]
+	pxor	xmm5, xmm1
+	movdqu	XMMWORD PTR [r8+rdi], xmm0
+	paddq	xmm1, xmm4
+	xor	r8d, 16
+	mov	eax, r8d
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	pxor	xmm5, xmm0
+	movdqu	XMMWORD PTR [r8+rdi], xmm1
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rdi], xmm0
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm8
+	movdqu	XMMWORD PTR [rcx+rdi], xmm0
+	movq	rdi, xmm5
+	movq	rcx, xmm14
+	mov	ebp, edi
+	mov	r8, QWORD PTR [rcx+rsi]
+	mov	r10, QWORD PTR [rcx+rsi+8]
+	lea	r9, QWORD PTR [rcx+rsi]
+	xor	esi, 16
+
+	movq xmm0, rsp
+	movq xmm1, rsi
+	movq xmm2, rdi
+	movq xmm11, rbp
+	movq xmm12, r15
+	movq xmm13, rdx
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp+16]
+	mov esi, DWORD PTR [rsp+20]
+	mov edi, DWORD PTR [rsp+24]
+	mov ebp, DWORD PTR [rsp+28]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+	xor r8, rax
+
+	movd esp, xmm3
+	pextrd r15d, xmm3, 2
+	movd eax, xmm7
+	movd edx, xmm9
+	pextrd r9d, xmm9, 2
+
+FN_PREFIX(CryptonightR_template_double_part2):
+
+	mov eax, edi
+	mov edx, ebp
+	shl rdx, 32
+	or rax, rdx
+	xor r14, rax
+
+	mov eax, ebx
+	mov edx, esi
+	shl rdx, 32
+	or rax, rdx
+	xor r12, rax
+
+	movq rsp, xmm0
+	mov DWORD PTR [rsp+16], ebx
+	mov DWORD PTR [rsp+20], esi
+	mov DWORD PTR [rsp+24], edi
+	mov DWORD PTR [rsp+28], ebp
+
+	movq rsi, xmm1
+	movq rdi, xmm2
+	movq rbp, xmm11
+	movq r15, xmm12
+	movq rdx, xmm13
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rbx, r8
+	mov	rax, r8
+	mul	rdx
+	and	ebp, 2097136
+	mov	r8, rax
+	movdqu	xmm1, XMMWORD PTR [rcx+rsi]
+	pxor	xmm6, xmm1
+	xor	esi, 48
+	paddq	xmm1, xmm7
+	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
+	pxor	xmm6, xmm2
+	paddq	xmm2, xmm3
+	movdqu	XMMWORD PTR [rsi+rcx], xmm1
+	xor	esi, 16
+	mov	eax, esi
+	mov	rsi, rcx
+	movdqu	xmm0, XMMWORD PTR [rax+rcx]
+	pxor	xmm6, xmm0
+	movdqu	XMMWORD PTR [rax+rcx], xmm2
+	paddq	xmm0, xmm9
+	add	r12, r8
+	xor	rax, 32
+	add	r14, rdx
+	movdqa	xmm9, xmm7
+	movdqa	xmm7, xmm6
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	mov	QWORD PTR [r9+8], r12
+	xor	r12, r10
+	mov	QWORD PTR [r9], r14
+	movq rcx, xmm15
+	xor	r14, rbx
+	mov	r10d, ebp
+	mov	ebx, r14d
+	xor	ebp, 16
+	and	ebx, 2097136
+	mov	r8, QWORD PTR [r10+rcx]
+	mov	r9, QWORD PTR [r10+rcx+8]
+
+	movq xmm0, rsp
+	movq xmm1, rbx
+	movq xmm2, rsi
+	movq xmm11, rdi
+	movq xmm12, rbp
+	movq xmm13, r15
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp]
+	mov esi, DWORD PTR [rsp+4]
+	mov edi, DWORD PTR [rsp+8]
+	mov ebp, DWORD PTR [rsp+12]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+
+	xor r8, rax
+	movq xmm3, r8
+
+	movd esp, xmm4
+	pextrd r15d, xmm4, 2
+	movd eax, xmm8
+	movd edx, xmm10
+	pextrd r9d, xmm10, 2
+
+FN_PREFIX(CryptonightR_template_double_part3):
+
+	movq r15, xmm13
+
+	mov eax, edi
+	mov edx, ebp
+	shl rdx, 32
+	or rax, rdx
+	xor r15, rax
+
+	mov eax, ebx
+	mov edx, esi
+	shl rdx, 32
+	or rax, rdx
+	xor r13, rax
+
+	movq rsp, xmm0
+	mov DWORD PTR [rsp], ebx
+	mov DWORD PTR [rsp+4], esi
+	mov DWORD PTR [rsp+8], edi
+	mov DWORD PTR [rsp+12], ebp
+
+	movq rbx, xmm1
+	movq rsi, xmm2
+	movq rdi, xmm11
+	movq rbp, xmm12
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rax, r8
+	mul	rdi
+	mov	rdi, rcx
+	mov	r8, rax
+	movdqu	xmm1, XMMWORD PTR [rbp+rcx]
+	pxor xmm5, xmm1
+	xor	ebp, 48
+	paddq	xmm1, xmm8
+	add	r13, r8
+	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
+	pxor xmm5, xmm2
+	add	r15, rdx
+	movdqu	XMMWORD PTR [rbp+rcx], xmm1
+	paddq	xmm2, xmm4
+	xor	ebp, 16
+	mov	eax, ebp
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
+	pxor xmm5, xmm0
+	movdqu	XMMWORD PTR [rbp+rcx], xmm2
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	movq rax, xmm3
+	movdqa	xmm10, xmm8
+	mov	QWORD PTR [r10+rcx], r15
+	movdqa	xmm8, xmm5
+	xor	r15, rax
+	mov	QWORD PTR [r10+rcx+8], r13
+	mov	r8d, r15d
+	xor	r13, r9
+	and	r8d, 2097136
+	dec r11d
+	jnz	FN_PREFIX(CryptonightR_template_double_mainloop)
+
+FN_PREFIX(CryptonightR_template_double_part4):
+
+	mov	rbx, QWORD PTR [rsp+400]
+	movaps	xmm6, XMMWORD PTR [rsp+160]
+	movaps	xmm7, XMMWORD PTR [rsp+176]
+	movaps	xmm8, XMMWORD PTR [rsp+192]
+	movaps	xmm9, XMMWORD PTR [rsp+208]
+	movaps	xmm10, XMMWORD PTR [rsp+224]
+	movaps	xmm11, XMMWORD PTR [rsp+240]
+	movaps	xmm12, XMMWORD PTR [rsp+256]
+	movaps	xmm13, XMMWORD PTR [rsp+272]
+	movaps	xmm14, XMMWORD PTR [rsp+288]
+	movaps	xmm15, XMMWORD PTR [rsp+304]
+	add	rsp, 320
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	ret	0
+FN_PREFIX(CryptonightR_template_double_end):
diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc
new file mode 100644
index 00000000..feea3949
--- /dev/null
+++ b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc
@@ -0,0 +1,266 @@
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1)
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2)
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3)
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
+
+ALIGN(64)
+FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
+	mov	QWORD PTR [rsp+8], rcx
+	push	rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 232
+
+	mov	eax, [rcx+96]
+	mov	ebx, [rcx+100]
+	mov	esi, [rcx+104]
+	mov	edx, [rcx+108]
+	mov [rsp+144], eax
+	mov [rsp+148], ebx
+	mov [rsp+152], esi
+	mov [rsp+156], edx
+
+	mov	rax, QWORD PTR [rcx+48]
+	mov	r10, rcx
+	xor	rax, QWORD PTR [rcx+16]
+	mov	r8, QWORD PTR [rcx+32]
+	xor	r8, QWORD PTR [rcx]
+	mov	r9, QWORD PTR [rcx+40]
+	xor	r9, QWORD PTR [rcx+8]
+	movq	xmm4, rax
+	mov	rdx, QWORD PTR [rcx+56]
+	xor	rdx, QWORD PTR [rcx+24]
+	mov	r11, QWORD PTR [rcx+224]
+	mov	rcx, QWORD PTR [rcx+88]
+	xor	rcx, QWORD PTR [r10+72]
+	mov	rax, QWORD PTR [r10+80]
+	movq	xmm0, rdx
+	xor	rax, QWORD PTR [r10+64]
+
+	movaps	XMMWORD PTR [rsp+16], xmm6
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+48], xmm8
+	movaps	XMMWORD PTR [rsp+64], xmm9
+	movaps	XMMWORD PTR [rsp+80], xmm10
+	movaps	XMMWORD PTR [rsp+96], xmm11
+	movaps	XMMWORD PTR [rsp+112], xmm12
+	movaps	XMMWORD PTR [rsp+128], xmm13
+
+	movq	xmm5, rax
+
+	mov	rax, r8
+	punpcklqdq xmm4, xmm0
+	and	eax, 2097136
+	movq	xmm10, QWORD PTR [r10+96]
+	movq	xmm0, rcx
+	mov	rcx, QWORD PTR [r10+104]
+	xorps	xmm9, xmm9
+	mov	QWORD PTR [rsp+328], rax
+	movq	xmm12, r11
+	mov	QWORD PTR [rsp+320], r9
+	punpcklqdq xmm5, xmm0
+	movq xmm13, rcx
+	mov r12d, 524288
+
+	ALIGN(64)
+FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop):
+	movd xmm11, r12d
+	mov	r12, QWORD PTR [r10+272]
+	lea	r13, QWORD PTR [rax+r11]
+	mov	esi, DWORD PTR [r13]
+	movq	xmm0, r9
+	mov	r10d, DWORD PTR [r13+4]
+	movq	xmm7, r8
+	mov	ebp, DWORD PTR [r13+12]
+	mov	r14d, DWORD PTR [r13+8]
+	mov	rdx, QWORD PTR [rsp+328]
+	movzx	ecx, sil
+	shr	esi, 8
+	punpcklqdq xmm7, xmm0
+	mov	r15d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	mov	edi, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	ebx, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	shr	ebp, 8
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	xor	r15d, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	eax, r14d
+	shr	eax, 8
+	xor	edi, DWORD PTR [r12+rcx*4+1024]
+	add	eax, 256
+	movzx	ecx, bpl
+	shr	ebp, 8
+	xor	ebx, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, sil
+	shr	esi, 8
+	xor	r9d, DWORD PTR [r12+rcx*4+1024]
+	add	r12, 2048
+	movzx	ecx, r10b
+	shr	r10d, 8
+	add	r10d, 256
+	mov	r11d, DWORD PTR [r12+rax*4]
+	xor	r11d, DWORD PTR [r12+rcx*4]
+	xor	r11d, r9d
+	movzx	ecx, sil
+	mov	r10d, DWORD PTR [r12+r10*4]
+	shr	esi, 8
+	add	esi, 256
+	xor	r10d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	xor	r10d, ebx
+	shr	ebp, 8
+	movd	xmm1, r11d
+	add	ebp, 256
+	movq	r11, xmm12
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	xor	r9d, DWORD PTR [r12+rsi*4]
+	mov	eax, DWORD PTR [r12+rbp*4]
+	xor	r9d, edi
+	movzx	ecx, r14b
+	movd	xmm0, r10d
+	movd	xmm2, r9d
+	xor	eax, DWORD PTR [r12+rcx*4]
+	mov	rcx, rdx
+	xor	eax, r15d
+	punpckldq xmm2, xmm1
+	xor	rcx, 16
+	movd	xmm6, eax
+	mov	rax, rdx
+	punpckldq xmm6, xmm0
+	xor	rax, 32
+	punpckldq xmm6, xmm2
+	xor	rdx, 48
+	movdqu	xmm2, XMMWORD PTR [rcx+r11]
+	pxor	xmm6, xmm7
+	paddq	xmm2, xmm4
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	movdqu	xmm0, XMMWORD PTR [rdx+r11]
+	paddq	xmm0, xmm5
+	movdqu	XMMWORD PTR [rcx+r11], xmm0
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movq rcx, xmm13
+	paddq	xmm1, xmm7
+	movdqu	XMMWORD PTR [rdx+r11], xmm1
+	movq	rdi, xmm6
+	mov	r10, rdi
+	and	r10d, 2097136
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm4
+	movdqu	XMMWORD PTR [r13], xmm0
+
+	mov ebx, [rsp+144]
+	mov ebp, [rsp+152]
+	add ebx, [rsp+148]
+	add ebp, [rsp+156]
+	shl rbp, 32
+	or rbx, rbp
+
+	xor rbx, QWORD PTR [r10+r11]
+	lea	r14, QWORD PTR [r10+r11]
+	mov	rbp, QWORD PTR [r14+8]
+
+	mov [rsp+160], rbx
+	mov [rsp+168], rdi
+	mov [rsp+176], rbp
+	mov [rsp+184], r10
+	mov r10, rsp
+
+	mov ebx, [rsp+144]
+	mov esi, [rsp+148]
+	mov edi, [rsp+152]
+	mov ebp, [rsp+156]
+
+	movd esp, xmm7
+	movaps xmm0, xmm7
+	psrldq xmm0, 8
+	movd r15d, xmm0
+	movd eax, xmm4
+	movd edx, xmm5
+
+FN_PREFIX(CryptonightWOW_soft_aes_template_part2):
+	mov rsp, r10
+	mov [rsp+144], ebx
+	mov [rsp+148], esi
+	mov [rsp+152], edi
+	mov [rsp+156], ebp
+
+	mov rbx, [rsp+160]
+	mov rdi, [rsp+168]
+	mov rbp, [rsp+176]
+	mov r10, [rsp+184]
+
+	mov	r9, r10
+	xor	r9, 16
+	mov	rcx, r10
+	xor	rcx, 32
+	xor	r10, 48
+	mov	rax, rbx
+	mul	rdi
+	movdqu	xmm2, XMMWORD PTR [r9+r11]
+	movdqu	xmm1, XMMWORD PTR [rcx+r11]
+	paddq	xmm1, xmm7
+	movq	xmm0, rax
+	movq	xmm3, rdx
+	xor	rax, QWORD PTR [r11+rcx+8]
+	xor	rdx, QWORD PTR [rcx+r11]
+	punpcklqdq xmm3, xmm0
+	add	r8, rdx
+	movdqu	xmm0, XMMWORD PTR [r10+r11]
+	pxor	xmm2, xmm3
+	paddq	xmm0, xmm5
+	paddq	xmm2, xmm4
+	movdqu	XMMWORD PTR [r9+r11], xmm0
+	movdqa	xmm5, xmm4
+	mov	r9, QWORD PTR [rsp+320]
+	movdqa	xmm4, xmm6
+	add	r9, rax
+	movdqu	XMMWORD PTR [rcx+r11], xmm2
+	movdqu	XMMWORD PTR [r10+r11], xmm1
+	mov	r10, QWORD PTR [rsp+304]
+	movd r12d, xmm11
+	mov	QWORD PTR [r14], r8
+	xor	r8, rbx
+	mov	rax, r8
+	mov	QWORD PTR [r14+8], r9
+	and	eax, 2097136
+	xor	r9, rbp
+	mov	QWORD PTR [rsp+320], r9
+	mov	QWORD PTR [rsp+328], rax
+	sub	r12d, 1
+	jne	FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
+
+FN_PREFIX(CryptonightWOW_soft_aes_template_part3):
+	movaps	xmm6, XMMWORD PTR [rsp+16]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+48]
+	movaps	xmm9, XMMWORD PTR [rsp+64]
+	movaps	xmm10, XMMWORD PTR [rsp+80]
+	movaps	xmm11, XMMWORD PTR [rsp+96]
+	movaps	xmm12, XMMWORD PTR [rsp+112]
+	movaps	xmm13, XMMWORD PTR [rsp+128]
+
+	add	rsp, 232
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	pop	rbx
+	ret
+FN_PREFIX(CryptonightWOW_soft_aes_template_end):
diff --git a/src/crypto/asm/CryptonightWOW_template.inc b/src/crypto/asm/CryptonightWOW_template.inc
new file mode 100644
index 00000000..7183a659
--- /dev/null
+++ b/src/crypto/asm/CryptonightWOW_template.inc
@@ -0,0 +1,486 @@
+PUBLIC FN_PREFIX(CryptonightWOW_template_part1)
+PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop)
+PUBLIC FN_PREFIX(CryptonightWOW_template_part2)
+PUBLIC FN_PREFIX(CryptonightWOW_template_part3)
+PUBLIC FN_PREFIX(CryptonightWOW_template_end)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_end)
+
+ALIGN(64)
+FN_PREFIX(CryptonightWOW_template_part1):
+	mov	QWORD PTR [rsp+16], rbx
+	mov	QWORD PTR [rsp+24], rbp
+	mov	QWORD PTR [rsp+32], rsi
+	push	r10
+	push	r11
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	push	rdi
+	sub	rsp, 64
+	mov	r12, rcx
+	mov	r8, QWORD PTR [r12+32]
+	mov	rdx, r12
+	xor	r8, QWORD PTR [r12]
+	mov	r15, QWORD PTR [r12+40]
+	mov	r9, r8
+	xor	r15, QWORD PTR [r12+8]
+	mov	r11, QWORD PTR [r12+224]
+	mov	r12, QWORD PTR [r12+56]
+	xor	r12, QWORD PTR [rdx+24]
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	movaps	XMMWORD PTR [rsp+48], xmm6
+	movq	xmm0, r12
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+16], xmm8
+	movaps	XMMWORD PTR [rsp], xmm9
+	mov	r12, QWORD PTR [rdx+88]
+	xor	r12, QWORD PTR [rdx+72]
+	movq	xmm6, rax
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm6, xmm0
+	and	r9d, 2097136
+	movq	xmm0, r12
+	movq	xmm7, rax
+	punpcklqdq xmm7, xmm0
+	mov r10d, r9d
+	movq	xmm9, rsp
+	mov rsp, r8
+	mov	r8d, 524288
+
+	mov	ebx, [rdx+96]
+	mov	esi, [rdx+100]
+	mov	edi, [rdx+104]
+	mov	ebp, [rdx+108]
+
+	ALIGN(64)
+FN_PREFIX(CryptonightWOW_template_mainloop):
+	movdqa	xmm5, XMMWORD PTR [r9+r11]
+	movq	xmm0, r15
+	movq	xmm4, rsp
+	punpcklqdq xmm4, xmm0
+	lea	rdx, QWORD PTR [r9+r11]
+
+	aesenc	xmm5, xmm4
+	movd	r10d, xmm5
+	and	r10d, 2097136
+
+	mov	r12d, r9d
+	mov	eax, r9d
+	xor	r9d, 48
+	xor	r12d, 16
+	xor	eax, 32
+	movdqu	xmm0, XMMWORD PTR [r9+r11]
+	movdqu	xmm2, XMMWORD PTR [r12+r11]
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	paddq	xmm0, xmm7
+	paddq	xmm2, xmm6
+	paddq	xmm1, xmm4
+	movdqu	XMMWORD PTR [r12+r11], xmm0
+	movq	r12, xmm5
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movdqu	XMMWORD PTR [r9+r11], xmm1
+
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm6
+	movdqu	XMMWORD PTR [rdx], xmm0
+
+	lea	r13d, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	r13, rdx
+
+	xor	r13, QWORD PTR [r10+r11]
+	mov	r14, QWORD PTR [r10+r11+8]
+
+	movd eax, xmm6
+	movd edx, xmm7
+	pextrd r9d, xmm7, 2
+
+FN_PREFIX(CryptonightWOW_template_part2):
+	mov	rax, r13
+	mul	r12
+	movq	xmm0, rax
+	movq	xmm3, rdx
+	punpcklqdq xmm3, xmm0
+
+	mov	r9d, r10d
+	mov	r12d, r10d
+	xor	r9d, 16
+	xor	r12d, 32
+	xor	r10d, 48
+	movdqa	xmm1, XMMWORD PTR [r12+r11]
+	xor	rdx, QWORD PTR [r12+r11]
+	xor	rax, QWORD PTR [r11+r12+8]
+	movdqa	xmm2, XMMWORD PTR [r9+r11]
+	pxor	xmm3, xmm2
+	paddq	xmm7, XMMWORD PTR [r10+r11]
+	paddq	xmm1, xmm4
+	paddq	xmm3, xmm6
+	movdqu	XMMWORD PTR [r9+r11], xmm7
+	movdqu	XMMWORD PTR [r12+r11], xmm3
+	movdqu	XMMWORD PTR [r10+r11], xmm1
+
+	movdqa	xmm7, xmm6
+	add	r15, rax
+	add	rsp, rdx
+	xor	r10, 48
+	mov	QWORD PTR [r10+r11], rsp
+	xor	rsp, r13
+	mov	r9d, esp
+	mov	QWORD PTR [r10+r11+8], r15
+	and	r9d, 2097136
+	xor	r15, r14
+	movdqa	xmm6, xmm5
+	dec	r8d
+	jnz	FN_PREFIX(CryptonightWOW_template_mainloop)
+
+FN_PREFIX(CryptonightWOW_template_part3):
+	movq	rsp, xmm9
+
+	mov	rbx, QWORD PTR [rsp+136]
+	mov	rbp, QWORD PTR [rsp+144]
+	mov	rsi, QWORD PTR [rsp+152]
+	movaps	xmm6, XMMWORD PTR [rsp+48]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+16]
+	movaps	xmm9, XMMWORD PTR [rsp]
+	add	rsp, 64
+	pop	rdi
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	r11
+	pop	r10
+	ret	0
+FN_PREFIX(CryptonightWOW_template_end):
+
+ALIGN(64)
+FN_PREFIX(CryptonightWOW_template_double_part1):
+	mov	QWORD PTR [rsp+24], rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 320
+	mov	r14, QWORD PTR [rcx+32]
+	mov	r8, rcx
+	xor	r14, QWORD PTR [rcx]
+	mov	r12, QWORD PTR [rcx+40]
+	mov	ebx, r14d
+	mov	rsi, QWORD PTR [rcx+224]
+	and	ebx, 2097136
+	xor	r12, QWORD PTR [rcx+8]
+	mov	rcx, QWORD PTR [rcx+56]
+	xor	rcx, QWORD PTR [r8+24]
+	mov	rax, QWORD PTR [r8+48]
+	xor	rax, QWORD PTR [r8+16]
+	mov	r15, QWORD PTR [rdx+32]
+	xor	r15, QWORD PTR [rdx]
+	movq	xmm0, rcx
+	mov	rcx, QWORD PTR [r8+88]
+	xor	rcx, QWORD PTR [r8+72]
+	mov	r13, QWORD PTR [rdx+40]
+	mov	rdi, QWORD PTR [rdx+224]
+	xor	r13, QWORD PTR [rdx+8]
+	movaps	XMMWORD PTR [rsp+160], xmm6
+	movaps	XMMWORD PTR [rsp+176], xmm7
+	movaps	XMMWORD PTR [rsp+192], xmm8
+	movaps	XMMWORD PTR [rsp+208], xmm9
+	movaps	XMMWORD PTR [rsp+224], xmm10
+	movaps	XMMWORD PTR [rsp+240], xmm11
+	movaps	XMMWORD PTR [rsp+256], xmm12
+	movaps	XMMWORD PTR [rsp+272], xmm13
+	movaps	XMMWORD PTR [rsp+288], xmm14
+	movaps	XMMWORD PTR [rsp+304], xmm15
+	movq	xmm7, rax
+	mov	rax, QWORD PTR [r8+80]
+	xor	rax, QWORD PTR [r8+64]
+
+	movaps xmm1, XMMWORD PTR [rdx+96]
+	movaps xmm2, XMMWORD PTR [r8+96]
+	movaps XMMWORD PTR [rsp], xmm1
+	movaps XMMWORD PTR [rsp+16], xmm2
+
+	mov	r8d, r15d
+	punpcklqdq xmm7, xmm0
+	movq	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+56]
+	xor	rcx, QWORD PTR [rdx+24]
+	movq	xmm9, rax
+	mov	QWORD PTR [rsp+128], rsi
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	punpcklqdq xmm9, xmm0
+	movq	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+88]
+	xor	rcx, QWORD PTR [rdx+72]
+	movq	xmm8, rax
+	mov	QWORD PTR [rsp+136], rdi
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm8, xmm0
+	and	r8d, 2097136
+	movq	xmm0, rcx
+	mov	r11d, 524288
+	movq	xmm10, rax
+	punpcklqdq xmm10, xmm0
+	
+	movq xmm14, QWORD PTR [rsp+128]
+	movq xmm15, QWORD PTR [rsp+136]
+
+	ALIGN(64)
+FN_PREFIX(CryptonightWOW_template_double_mainloop):
+	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
+	movq	xmm0, r12
+	mov	ecx, ebx
+	movq	xmm3, r14
+	punpcklqdq xmm3, xmm0
+	xor	ebx, 16
+	aesenc	xmm6, xmm3
+	movq	rdx, xmm6
+	movq	xmm4, r15
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	xor	ebx, 48
+	paddq	xmm0, xmm7
+	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
+	movdqu	XMMWORD PTR [rbx+rsi], xmm0
+	paddq	xmm1, xmm3
+	xor	ebx, 16
+	mov	eax, ebx
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	movdqu	XMMWORD PTR [rbx+rsi], xmm1
+	paddq	xmm0, xmm9
+	movdqu	XMMWORD PTR [rax+rsi], xmm0
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm7
+	movdqu	XMMWORD PTR [rcx+rsi], xmm0
+	mov	esi, edx
+	movdqu	xmm5, XMMWORD PTR [r8+rdi]
+	and	esi, 2097136
+	mov	ecx, r8d
+	movq	xmm0, r13
+	punpcklqdq xmm4, xmm0
+	xor	r8d, 16
+	aesenc	xmm5, xmm4
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	xor	r8d, 48
+	paddq	xmm0, xmm8
+	movdqu	xmm1, XMMWORD PTR [r8+rdi]
+	movdqu	XMMWORD PTR [r8+rdi], xmm0
+	paddq	xmm1, xmm4
+	xor	r8d, 16
+	mov	eax, r8d
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	movdqu	XMMWORD PTR [r8+rdi], xmm1
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rdi], xmm0
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm8
+	movdqu	XMMWORD PTR [rcx+rdi], xmm0
+	movq	rdi, xmm5
+	movq	rcx, xmm14
+	mov	ebp, edi
+	mov	r8, QWORD PTR [rcx+rsi]
+	mov	r10, QWORD PTR [rcx+rsi+8]
+	lea	r9, QWORD PTR [rcx+rsi]
+	xor	esi, 16
+
+	movq xmm0, rsp
+	movq xmm1, rsi
+	movq xmm2, rdi
+	movq xmm11, rbp
+	movq xmm12, r15
+	movq xmm13, rdx
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp+16]
+	mov esi, DWORD PTR [rsp+20]
+	mov edi, DWORD PTR [rsp+24]
+	mov ebp, DWORD PTR [rsp+28]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+	xor r8, rax
+
+	movd esp, xmm3
+	pextrd r15d, xmm3, 2
+	movd eax, xmm7
+	movd edx, xmm9
+	pextrd r9d, xmm9, 2
+
+FN_PREFIX(CryptonightWOW_template_double_part2):
+
+	movq rsp, xmm0
+	mov DWORD PTR [rsp+16], ebx
+	mov DWORD PTR [rsp+20], esi
+	mov DWORD PTR [rsp+24], edi
+	mov DWORD PTR [rsp+28], ebp
+
+	movq rsi, xmm1
+	movq rdi, xmm2
+	movq rbp, xmm11
+	movq r15, xmm12
+	movq rdx, xmm13
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rbx, r8
+	mov	rax, r8
+	mul	rdx
+	and	ebp, 2097136
+	mov	r8, rax
+	movq	xmm1, rdx
+	movq	xmm0, r8
+	punpcklqdq xmm1, xmm0
+	pxor	xmm1, XMMWORD PTR [rcx+rsi]
+	xor	esi, 48
+	paddq	xmm1, xmm7
+	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
+	xor	rdx, QWORD PTR [rsi+rcx]
+	paddq	xmm2, xmm3
+	xor	r8, QWORD PTR [rsi+rcx+8]
+	movdqu	XMMWORD PTR [rsi+rcx], xmm1
+	xor	esi, 16
+	mov	eax, esi
+	mov	rsi, rcx
+	movdqu	xmm0, XMMWORD PTR [rax+rcx]
+	movdqu	XMMWORD PTR [rax+rcx], xmm2
+	paddq	xmm0, xmm9
+	add	r12, r8
+	xor	rax, 32
+	add	r14, rdx
+	movdqa	xmm9, xmm7
+	movdqa	xmm7, xmm6
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	mov	QWORD PTR [r9+8], r12
+	xor	r12, r10
+	mov	QWORD PTR [r9], r14
+	movq rcx, xmm15
+	xor	r14, rbx
+	mov	r10d, ebp
+	mov	ebx, r14d
+	xor	ebp, 16
+	and	ebx, 2097136
+	mov	r8, QWORD PTR [r10+rcx]
+	mov	r9, QWORD PTR [r10+rcx+8]
+
+	movq xmm0, rsp
+	movq xmm1, rbx
+	movq xmm2, rsi
+	movq xmm11, rdi
+	movq xmm12, rbp
+	movq xmm13, r15
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp]
+	mov esi, DWORD PTR [rsp+4]
+	mov edi, DWORD PTR [rsp+8]
+	mov ebp, DWORD PTR [rsp+12]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+
+	xor r8, rax
+	movq xmm3, r8
+
+	movd esp, xmm4
+	pextrd r15d, xmm4, 2
+	movd eax, xmm8
+	movd edx, xmm10
+	pextrd r9d, xmm10, 2
+
+FN_PREFIX(CryptonightWOW_template_double_part3):
+
+	movq rsp, xmm0
+	mov DWORD PTR [rsp], ebx
+	mov DWORD PTR [rsp+4], esi
+	mov DWORD PTR [rsp+8], edi
+	mov DWORD PTR [rsp+12], ebp
+
+	movq rbx, xmm1
+	movq rsi, xmm2
+	movq rdi, xmm11
+	movq rbp, xmm12
+	movq r15, xmm13
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rax, r8
+	mul	rdi
+	movq	xmm1, rdx
+	movq	xmm0, rax
+	punpcklqdq xmm1, xmm0
+	mov	rdi, rcx
+	mov	r8, rax
+	pxor	xmm1, XMMWORD PTR [rbp+rcx]
+	xor	ebp, 48
+	paddq	xmm1, xmm8
+	xor	r8, QWORD PTR [rbp+rcx+8]
+	xor	rdx, QWORD PTR [rbp+rcx]
+	add	r13, r8
+	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
+	add	r15, rdx
+	movdqu	XMMWORD PTR [rbp+rcx], xmm1
+	paddq	xmm2, xmm4
+	xor	ebp, 16
+	mov	eax, ebp
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
+	movdqu	XMMWORD PTR [rbp+rcx], xmm2
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	movq rax, xmm3
+	movdqa	xmm10, xmm8
+	mov	QWORD PTR [r10+rcx], r15
+	movdqa	xmm8, xmm5
+	xor	r15, rax
+	mov	QWORD PTR [r10+rcx+8], r13
+	mov	r8d, r15d
+	xor	r13, r9
+	and	r8d, 2097136
+	dec r11d
+	jnz	FN_PREFIX(CryptonightWOW_template_double_mainloop)
+
+FN_PREFIX(CryptonightWOW_template_double_part4):
+
+	mov	rbx, QWORD PTR [rsp+400]
+	movaps	xmm6, XMMWORD PTR [rsp+160]
+	movaps	xmm7, XMMWORD PTR [rsp+176]
+	movaps	xmm8, XMMWORD PTR [rsp+192]
+	movaps	xmm9, XMMWORD PTR [rsp+208]
+	movaps	xmm10, XMMWORD PTR [rsp+224]
+	movaps	xmm11, XMMWORD PTR [rsp+240]
+	movaps	xmm12, XMMWORD PTR [rsp+256]
+	movaps	xmm13, XMMWORD PTR [rsp+272]
+	movaps	xmm14, XMMWORD PTR [rsp+288]
+	movaps	xmm15, XMMWORD PTR [rsp+304]
+	add	rsp, 320
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	ret	0
+FN_PREFIX(CryptonightWOW_template_double_end):
diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S
index 5dc80bea..26f353a1 100644
--- a/src/crypto/asm/cn_main_loop.S
+++ b/src/crypto/asm/cn_main_loop.S
@@ -28,6 +28,19 @@
 .global FN_PREFIX(cnv2_main_loop_ultralite_bulldozer_asm)
 .global FN_PREFIX(cnv2_double_main_loop_ultralite_sandybridge_asm)
 
+.global FN_PREFIX(cnv2_main_loop_xcash_ivybridge_asm)
+.global FN_PREFIX(cnv2_main_loop_xcash_ryzen_asm)
+.global FN_PREFIX(cnv2_main_loop_xcash_bulldozer_asm)
+.global FN_PREFIX(cnv2_double_main_loop_xcash_sandybridge_asm)
+
+.global FN_PREFIX(cnv2_main_loop_zelerius_ivybridge_asm)
+.global FN_PREFIX(cnv2_main_loop_zelerius_ryzen_asm)
+.global FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm)
+.global FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm)
+
+.global FN_PREFIX(cnv2_main_loop_rwz_all_asm)
+.global FN_PREFIX(cnv2_double_main_loop_rwz_all_asm)
+
 .global FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm)
 .global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm)
 .global FN_PREFIX(cnv1_main_loop_fast_soft_aes_sandybridge_asm)
@@ -37,6 +50,8 @@
 .global FN_PREFIX(cnv2_main_loop_soft_aes_sandybridge_asm)
 .global FN_PREFIX(cnv2_main_loop_fastv2_soft_aes_sandybridge_asm)
 .global FN_PREFIX(cnv2_main_loop_ultralite_soft_aes_sandybridge_asm)
+.global FN_PREFIX(cnv2_main_loop_xcash_soft_aes_sandybridge_asm)
+.global FN_PREFIX(cnv2_main_loop_zelerius_soft_aes_sandybridge_asm)
 
 #ifdef __APPLE__
 ALIGN 16
@@ -245,6 +260,129 @@ FN_PREFIX(cnv2_double_main_loop_ultralite_sandybridge_asm):
 	add rsp, 48
 	ret 0
 
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_main_loop_xcash_ivybridge_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	#include "cnv2_main_loop_xcash_ivybridge.inc"
+	add rsp, 48
+	ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_main_loop_xcash_ryzen_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	#include "cnv2_main_loop_xcash_ryzen.inc"
+	add rsp, 48
+	ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_main_loop_xcash_bulldozer_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	#include "cnv2_main_loop_xcash_bulldozer.inc"
+	add rsp, 48
+	ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_double_main_loop_xcash_sandybridge_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	mov rdx, rsi
+	#include "cnv2_double_main_loop_xcash_sandybridge.inc"
+	add rsp, 48
+	ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_main_loop_zelerius_ivybridge_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	#include "cnv2_main_loop_zelerius_ivybridge.inc"
+	add rsp, 48
+	ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_main_loop_zelerius_ryzen_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	#include "cnv2_main_loop_zelerius_ryzen.inc"
+	add rsp, 48
+	ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	#include "cnv2_main_loop_zelerius_bulldozer.inc"
+	add rsp, 48
+	ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	mov rdx, rsi
+	#include "cnv2_double_main_loop_zelerius_sandybridge.inc"
+	add rsp, 48
+	ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_main_loop_rwz_all_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	#include "cnv2_main_loop_rwz_all.inc"
+	add rsp, 48
+	ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_double_main_loop_rwz_all_asm):
+	sub rsp, 48
+	mov rcx, rdi
+	mov rdx, rsi
+	#include "cnv2_double_main_loop_rwz_all.inc"
+	add rsp, 48
+	ret 0
+
 #ifdef __APPLE__
 ALIGN 16
 #else
@@ -340,4 +478,29 @@ FN_PREFIX(cnv2_main_loop_ultralite_soft_aes_sandybridge_asm):
     mov rcx, rdi
     #include "cnv2_main_loop_ultralite_soft_aes_sandybridge.inc"
     add rsp, 48
+    ret 0
+
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_main_loop_xcash_soft_aes_sandybridge_asm):
+    sub rsp, 48
+    mov rcx, rdi
+    #include "cnv2_main_loop_xcash_soft_aes_sandybridge.inc"
+    add rsp, 48
+    ret 0
+
+#ifdef __APPLE__
+ALIGN 16
+#else
+ALIGN 64
+#endif
+FN_PREFIX(cnv2_main_loop_zelerius_soft_aes_sandybridge_asm):
+    sub rsp, 48
+    mov rcx, rdi
+    #include "cnv2_main_loop_zelerius_soft_aes_sandybridge.inc"
+    add rsp, 48
     ret 0
\ No newline at end of file
diff --git a/src/crypto/asm/cnv2_double_main_loop_rwz_all.inc b/src/crypto/asm/cnv2_double_main_loop_rwz_all.inc
new file mode 100644
index 00000000..d2d87173
--- /dev/null
+++ b/src/crypto/asm/cnv2_double_main_loop_rwz_all.inc
@@ -0,0 +1,410 @@
+	mov	rax, rsp
+	push	rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 184
+
+	stmxcsr DWORD PTR [rsp+272]
+	mov DWORD PTR [rsp+276], 24448
+	ldmxcsr DWORD PTR [rsp+276]
+
+	mov	r13, QWORD PTR [rcx+224]
+	mov	r9, rdx
+	mov	r10, QWORD PTR [rcx+32]
+	mov	r8, rcx
+	xor	r10, QWORD PTR [rcx]
+	mov	r14d, 393216
+	mov	r11, QWORD PTR [rcx+40]
+	xor	r11, QWORD PTR [rcx+8]
+	mov	rsi, QWORD PTR [rdx+224]
+	mov	rdx, QWORD PTR [rcx+56]
+	xor	rdx, QWORD PTR [rcx+24]
+	mov	rdi, QWORD PTR [r9+32]
+	xor	rdi, QWORD PTR [r9]
+	mov	rbp, QWORD PTR [r9+40]
+	xor	rbp, QWORD PTR [r9+8]
+	movq	xmm0, rdx
+	movaps	XMMWORD PTR [rax-88], xmm6
+	movaps	XMMWORD PTR [rax-104], xmm7
+	movaps	XMMWORD PTR [rax-120], xmm8
+	movaps	XMMWORD PTR [rsp+112], xmm9
+	movaps	XMMWORD PTR [rsp+96], xmm10
+	movaps	XMMWORD PTR [rsp+80], xmm11
+	movaps	XMMWORD PTR [rsp+64], xmm12
+	movaps	XMMWORD PTR [rsp+48], xmm13
+	movaps	XMMWORD PTR [rsp+32], xmm14
+	movaps	XMMWORD PTR [rsp+16], xmm15
+	mov	rdx, r10
+	movq	xmm4, QWORD PTR [r8+96]
+	and	edx, 2097136
+	mov	rax, QWORD PTR [rcx+48]
+	xorps	xmm13, xmm13
+	xor	rax, QWORD PTR [rcx+16]
+	mov	rcx, QWORD PTR [rcx+88]
+	xor	rcx, QWORD PTR [r8+72]
+	movq	xmm5, QWORD PTR [r8+104]
+	movq	xmm7, rax
+
+	mov eax, 1
+	shl rax, 52
+	movq xmm14, rax
+	punpcklqdq xmm14, xmm14
+
+	mov eax, 1023
+	shl rax, 52
+	movq xmm12, rax
+	punpcklqdq xmm12, xmm12
+
+	mov	rax, QWORD PTR [r8+80]
+	xor	rax, QWORD PTR [r8+64]
+	punpcklqdq xmm7, xmm0
+	movq	xmm0, rcx
+	mov	rcx, QWORD PTR [r9+56]
+	xor	rcx, QWORD PTR [r9+24]
+	movq	xmm3, rax
+	mov	rax, QWORD PTR [r9+48]
+	xor	rax, QWORD PTR [r9+16]
+	punpcklqdq xmm3, xmm0
+	movq	xmm0, rcx
+	mov	QWORD PTR [rsp], r13
+	mov	rcx, QWORD PTR [r9+88]
+	xor	rcx, QWORD PTR [r9+72]
+	movq	xmm6, rax
+	mov	rax, QWORD PTR [r9+80]
+	xor	rax, QWORD PTR [r9+64]
+	punpcklqdq xmm6, xmm0
+	movq	xmm0, rcx
+	mov	QWORD PTR [rsp+256], r10
+	mov	rcx, rdi
+	mov	QWORD PTR [rsp+264], r11
+	movq	xmm8, rax
+	and	ecx, 2097136
+	punpcklqdq xmm8, xmm0
+	movq	xmm0, QWORD PTR [r9+96]
+	punpcklqdq xmm4, xmm0
+	movq	xmm0, QWORD PTR [r9+104]
+	lea	r8, QWORD PTR [rcx+rsi]
+	movdqu	xmm11, XMMWORD PTR [r8]
+	punpcklqdq xmm5, xmm0
+	lea	r9, QWORD PTR [rdx+r13]
+	movdqu	xmm15, XMMWORD PTR [r9]
+
+	ALIGN(64)
+rwz_main_loop_double:
+	movdqu	xmm9, xmm15
+	mov eax, edx
+	mov ebx, edx
+	xor eax, 16
+	xor ebx, 32
+	xor edx, 48
+
+	movq	xmm0, r11
+	movq	xmm2, r10
+	punpcklqdq xmm2, xmm0
+	aesenc	xmm9, xmm2
+
+	movdqu	xmm0, XMMWORD PTR [rdx+r13]
+	movdqu	xmm1, XMMWORD PTR [rbx+r13]
+	paddq	xmm0, xmm7
+	paddq	xmm1, xmm2
+	movdqu	XMMWORD PTR [rbx+r13], xmm0
+	movdqu	xmm0, XMMWORD PTR [rax+r13]
+	movdqu	XMMWORD PTR [rdx+r13], xmm1
+	paddq	xmm0, xmm3
+	movdqu	XMMWORD PTR [rax+r13], xmm0
+
+	movq	r11, xmm9
+	mov	edx, r11d
+	and	edx, 2097136
+	movdqa	xmm0, xmm9
+	pxor	xmm0, xmm7
+	movdqu	XMMWORD PTR [r9], xmm0
+
+	lea	rbx, QWORD PTR [rdx+r13]
+	mov	r10, QWORD PTR [rdx+r13]
+
+	movdqu	xmm10, xmm11
+	movq	xmm0, rbp
+	movq	xmm11, rdi
+	punpcklqdq xmm11, xmm0
+	aesenc	xmm10, xmm11
+
+	mov eax, ecx
+	mov r12d, ecx
+	xor eax, 16
+	xor r12d, 32
+	xor ecx, 48
+
+	movdqu	xmm0, XMMWORD PTR [rcx+rsi]
+	paddq	xmm0, xmm6
+	movdqu	xmm1, XMMWORD PTR [r12+rsi]
+	movdqu	XMMWORD PTR [r12+rsi], xmm0
+	paddq	xmm1, xmm11
+	movdqu	xmm0, XMMWORD PTR [rax+rsi]
+	movdqu	XMMWORD PTR [rcx+rsi], xmm1
+	paddq	xmm0, xmm8
+	movdqu	XMMWORD PTR [rax+rsi], xmm0
+
+	movq	rcx, xmm10
+	and	ecx, 2097136
+
+	movdqa	xmm0, xmm10
+	pxor	xmm0, xmm6
+	movdqu	XMMWORD PTR [r8], xmm0
+	mov r12, QWORD PTR [rcx+rsi]
+
+	mov	r9, QWORD PTR [rbx+8]
+
+	xor edx, 16
+	mov r8d, edx
+	mov r15d, edx
+
+	movq	rdx, xmm5
+	shl	rdx, 32
+	movq	rax, xmm4
+	xor	rdx, rax
+	xor	r10, rdx
+	mov	rax, r10
+	mul	r11
+	mov r11d, r8d
+	xor r11d, 48
+	movq xmm0, rdx
+	xor rdx, [r11+r13]
+	movq xmm1, rax
+	xor rax, [r11+r13+8]
+	punpcklqdq xmm0, xmm1
+
+	pxor xmm0, XMMWORD PTR [r8+r13]
+	movdqu	xmm1, XMMWORD PTR [r11+r13]
+	paddq	xmm0, xmm3
+	paddq	xmm1, xmm2
+	movdqu	XMMWORD PTR [r8+r13], xmm0
+	xor	r8d, 32
+	movdqu	xmm0, XMMWORD PTR [r8+r13]
+	movdqu	XMMWORD PTR [r8+r13], xmm1
+	paddq	xmm0, xmm7
+	movdqu	XMMWORD PTR [r11+r13], xmm0
+
+	mov	r11, QWORD PTR [rsp+256]
+	add	r11, rdx
+	mov	rdx, QWORD PTR [rsp+264]
+	add	rdx, rax
+	mov	QWORD PTR [rbx], r11
+	xor	r11, r10
+	mov	QWORD PTR [rbx+8], rdx
+	xor	rdx, r9
+	mov	QWORD PTR [rsp+256], r11
+	and	r11d, 2097136
+	mov	QWORD PTR [rsp+264], rdx
+	mov	QWORD PTR [rsp+8], r11
+	lea	r15, QWORD PTR [r11+r13]
+	movdqu xmm15, XMMWORD PTR [r11+r13]
+	lea	r13, QWORD PTR [rsi+rcx]
+	movdqa	xmm0, xmm5
+	psrldq	xmm0, 8
+	movaps	xmm2, xmm13
+	movq	r10, xmm0
+	psllq	xmm5, 1
+	shl	r10, 32
+	movdqa	xmm0, xmm9
+	psrldq	xmm0, 8
+	movdqa	xmm1, xmm10
+	movq	r11, xmm0
+	psrldq	xmm1, 8
+	movq	r8, xmm1
+	psrldq	xmm4, 8
+	movaps	xmm0, xmm13
+	movq	rax, xmm4
+	xor	r10, rax
+	movaps	xmm1, xmm13
+	xor	r10, r12
+	lea	rax, QWORD PTR [r11+1]
+	shr	rax, 1
+	movdqa	xmm3, xmm9
+	punpcklqdq xmm3, xmm10
+	paddq	xmm5, xmm3
+	movq	rdx, xmm5
+	psrldq	xmm5, 8
+	cvtsi2sd xmm2, rax
+	or	edx, -2147483647
+	lea	rax, QWORD PTR [r8+1]
+	shr	rax, 1
+	movq	r9, xmm5
+	cvtsi2sd xmm0, rax
+	or	r9d, -2147483647
+	cvtsi2sd xmm1, rdx
+	unpcklpd xmm2, xmm0
+	movaps	xmm0, xmm13
+	cvtsi2sd xmm0, r9
+	unpcklpd xmm1, xmm0
+	divpd	xmm2, xmm1
+	paddq	xmm2, xmm14
+	cvttsd2si rax, xmm2
+	psrldq	xmm2, 8
+	mov	rbx, rax
+	imul	rax, rdx
+	sub	r11, rax
+	js	rwz_div_fix_1
+rwz_div_fix_1_ret:
+
+	cvttsd2si rdx, xmm2
+	mov	rax, rdx
+	imul	rax, r9
+	movd	xmm2, r11d
+	movd	xmm4, ebx
+	sub	r8, rax
+	js	rwz_div_fix_2
+rwz_div_fix_2_ret:
+
+	movd	xmm1, r8d
+	movd	xmm0, edx
+	punpckldq xmm2, xmm1
+	punpckldq xmm4, xmm0
+	punpckldq xmm4, xmm2
+	paddq	xmm3, xmm4
+	movdqa	xmm0, xmm3
+	psrlq	xmm0, 12
+	paddq	xmm0, xmm12
+	sqrtpd	xmm1, xmm0
+	movq	r9, xmm1
+	movdqa xmm5, xmm1
+	psrlq xmm5, 19
+	test	r9, 524287
+	je	rwz_sqrt_fix_1
+rwz_sqrt_fix_1_ret:
+
+	movq r9, xmm10
+	psrldq	xmm1, 8
+	movq	r8, xmm1
+	test	r8, 524287
+	je	rwz_sqrt_fix_2
+rwz_sqrt_fix_2_ret:
+
+	mov r12d, ecx
+	mov r8d, ecx
+	xor r12d, 16
+	xor r8d, 32
+	xor ecx, 48
+	mov	rax, r10
+	mul	r9
+	movq xmm0, rax
+	movq xmm3, rdx
+	punpcklqdq xmm3, xmm0
+
+	movdqu	xmm0, XMMWORD PTR [r12+rsi]
+	pxor xmm0, xmm3
+	movdqu	xmm1, XMMWORD PTR [r8+rsi]
+	xor rdx, [r8+rsi]
+	xor rax, [r8+rsi+8]
+	movdqu	xmm3, XMMWORD PTR [rcx+rsi]
+	paddq	xmm3, xmm6
+	paddq	xmm1, xmm11
+	paddq	xmm0, xmm8
+	movdqu	XMMWORD PTR [r8+rsi], xmm3
+	movdqu	XMMWORD PTR [rcx+rsi], xmm1
+	movdqu	XMMWORD PTR [r12+rsi], xmm0
+
+	add	rdi, rdx
+	mov	QWORD PTR [r13], rdi
+	xor	rdi, r10
+	mov	ecx, edi
+	and	ecx, 2097136
+	lea	r8, QWORD PTR [rcx+rsi]
+
+	mov rdx, QWORD PTR [r13+8]	
+	add	rbp, rax
+	mov	QWORD PTR [r13+8], rbp
+	movdqu xmm11, XMMWORD PTR [rcx+rsi]
+	xor	rbp, rdx
+	mov	r13, QWORD PTR [rsp]
+	movdqa	xmm3, xmm7
+	mov	rdx, QWORD PTR [rsp+8]
+	movdqa	xmm8, xmm6
+	mov	r10, QWORD PTR [rsp+256]
+	movdqa	xmm7, xmm9
+	mov	r11, QWORD PTR [rsp+264]
+	movdqa	xmm6, xmm10
+	mov	r9, r15
+	dec r14d
+	jne	rwz_main_loop_double
+
+	ldmxcsr DWORD PTR [rsp+272]
+	movaps	xmm13, XMMWORD PTR [rsp+48]
+	lea	r11, QWORD PTR [rsp+184]
+	movaps	xmm6, XMMWORD PTR [r11-24]
+	movaps	xmm7, XMMWORD PTR [r11-40]
+	movaps	xmm8, XMMWORD PTR [r11-56]
+	movaps	xmm9, XMMWORD PTR [r11-72]
+	movaps	xmm10, XMMWORD PTR [r11-88]
+	movaps	xmm11, XMMWORD PTR [r11-104]
+	movaps	xmm12, XMMWORD PTR [r11-120]
+	movaps	xmm14, XMMWORD PTR [rsp+32]
+	movaps	xmm15, XMMWORD PTR [rsp+16]
+	mov	rsp, r11
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	pop	rbx
+	jmp rwz_cnv2_double_mainloop_asm_endp
+
+rwz_div_fix_1:
+	dec	rbx
+	add	r11, rdx
+	jmp	rwz_div_fix_1_ret
+
+rwz_div_fix_2:
+	dec	rdx
+	add	r8, r9
+	jmp	rwz_div_fix_2_ret
+
+rwz_sqrt_fix_1:
+	movq	r8, xmm3
+	movdqa xmm0, xmm5
+	psrldq xmm0, 8
+	dec	r9
+	mov r11d, -1022
+	shl r11, 32
+	mov	rax, r9
+	shr	r9, 19
+	shr	rax, 20
+	mov	rdx, r9
+	sub	rdx, rax
+	lea	rdx, [rdx+r11+1]
+	add	rax, r11
+	imul	rdx, rax
+	sub	rdx, r8
+	adc	r9, 0
+	movq xmm5, r9
+	punpcklqdq xmm5, xmm0
+	jmp	rwz_sqrt_fix_1_ret
+
+rwz_sqrt_fix_2:
+	psrldq	xmm3, 8
+	movq	r11, xmm3
+	dec	r8
+	mov ebx, -1022
+	shl rbx, 32
+	mov	rax, r8
+	shr	r8, 19
+	shr	rax, 20
+	mov	rdx, r8
+	sub	rdx, rax
+	lea	rdx, [rdx+rbx+1]
+	add	rax, rbx
+	imul	rdx, rax
+	sub	rdx, r11
+	adc	r8, 0
+	movq xmm0, r8
+	punpcklqdq xmm5, xmm0
+	jmp	rwz_sqrt_fix_2_ret
+
+rwz_cnv2_double_mainloop_asm_endp:
diff --git a/src/crypto/asm/cnv2_main_loop_rwz_all.inc b/src/crypto/asm/cnv2_main_loop_rwz_all.inc
new file mode 100644
index 00000000..021f787e
--- /dev/null
+++ b/src/crypto/asm/cnv2_main_loop_rwz_all.inc
@@ -0,0 +1,186 @@
+	mov	 QWORD PTR [rsp+24], rbx
+	push	 rbp
+	push	 rsi
+	push	 rdi
+	push	 r12
+	push	 r13
+	push	 r14
+	push	 r15
+	sub	 rsp, 80
+
+	stmxcsr DWORD PTR [rsp]
+	mov DWORD PTR [rsp+4], 24448
+	ldmxcsr DWORD PTR [rsp+4]
+
+	mov	 rax, QWORD PTR [rcx+48]
+	mov	 r9, rcx
+	xor	 rax, QWORD PTR [rcx+16]
+	mov	 esi, 393216
+	mov	 r8, QWORD PTR [rcx+32]
+	mov	 r13d, -2147483647
+	xor	 r8, QWORD PTR [rcx]
+	mov	 r11, QWORD PTR [rcx+40]
+	mov	 r10, r8
+	mov	 rdx, QWORD PTR [rcx+56]
+	movq	 xmm4, rax
+	xor	 rdx, QWORD PTR [rcx+24]
+	xor	 r11, QWORD PTR [rcx+8]
+	mov	 rbx, QWORD PTR [rcx+224]
+	mov	 rax, QWORD PTR [r9+80]
+	xor	 rax, QWORD PTR [r9+64]
+	movq	 xmm0, rdx
+	mov	 rcx, QWORD PTR [rcx+88]
+	xor	 rcx, QWORD PTR [r9+72]
+	movq	 xmm3, QWORD PTR [r9+104]
+	movaps	 XMMWORD PTR [rsp+64], xmm6
+	movaps	 XMMWORD PTR [rsp+48], xmm7
+	movaps	 XMMWORD PTR [rsp+32], xmm8
+	and	 r10d, 2097136
+	movq	 xmm5, rax
+
+	xor eax, eax
+	mov QWORD PTR [rsp+16], rax
+
+	mov ax, 1023
+	shl rax, 52
+	movq xmm8, rax
+	mov r15, QWORD PTR [r9+96]
+	punpcklqdq xmm4, xmm0
+	movq	 xmm0, rcx
+	punpcklqdq xmm5, xmm0
+	movdqu	 xmm6, XMMWORD PTR [r10+rbx]
+
+	ALIGN(64)
+rwz_main_loop:
+	lea	 rdx, QWORD PTR [r10+rbx]
+	mov	 ecx, r10d
+	mov	 eax, r10d
+	mov rdi, r15
+	xor	 ecx, 16
+	xor	 eax, 32
+	xor	 r10d, 48
+	movq	 xmm0, r11
+	movq	 xmm7, r8
+	punpcklqdq xmm7, xmm0
+	aesenc	 xmm6, xmm7
+	movq	 rbp, xmm6
+	mov	 r9, rbp
+	and	 r9d, 2097136
+	movdqu	 xmm0, XMMWORD PTR [rcx+rbx]
+	movdqu	 xmm1, XMMWORD PTR [rax+rbx]
+	movdqu	 xmm2, XMMWORD PTR [r10+rbx]
+	paddq	 xmm0, xmm5
+	paddq	 xmm1, xmm7
+	paddq	 xmm2, xmm4
+	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
+	movdqu	 XMMWORD PTR [rax+rbx], xmm2
+	movdqu	 XMMWORD PTR [r10+rbx], xmm1
+	mov r10, r9
+	xor r10d, 32
+	movq	 rcx, xmm3
+	mov	 rax, rcx
+	shl	 rax, 32
+	xor	 rdi, rax
+	movdqa	 xmm0, xmm6
+	pxor	 xmm0, xmm4
+	movdqu	 XMMWORD PTR [rdx], xmm0
+	xor	 rdi, QWORD PTR [r9+rbx]
+	lea	 r14, QWORD PTR [r9+rbx]
+	mov	 r12, QWORD PTR [r14+8]
+	xor	 edx, edx
+	lea	 r9d, DWORD PTR [ecx+ecx]
+	add	 r9d, ebp
+	movdqa	 xmm0, xmm6
+	psrldq	 xmm0, 8
+	or	 r9d, r13d
+	movq	 rax, xmm0
+	div	 r9
+	xorps xmm3, xmm3
+	mov	 eax, eax
+	shl	 rdx, 32
+	add	 rdx, rax
+	lea	 r9, QWORD PTR [rdx+rbp]
+	mov r15, rdx
+	mov	 rax, r9
+	shr	 rax, 12
+	movq	 xmm0, rax
+	paddq	 xmm0, xmm8
+	sqrtsd	 xmm3, xmm0
+	psubq	 xmm3, XMMWORD PTR [rsp+16]
+	movq	 rdx, xmm3
+	test	 edx, 524287
+	je	 rwz_sqrt_fixup
+	psrlq	 xmm3, 19
+rwz_sqrt_fixup_ret:
+
+	mov	 ecx, r10d
+	mov	 rax, rdi
+	mul	 rbp
+	movq xmm2, rdx
+	xor rdx, [rcx+rbx]
+	add	 r8, rdx
+	mov	 QWORD PTR [r14], r8
+	xor	 r8, rdi
+	mov edi, r8d
+	and edi, 2097136
+	movq xmm0, rax
+	xor rax, [rcx+rbx+8]
+	add	 r11, rax
+	mov	 QWORD PTR [r14+8], r11
+	punpcklqdq xmm2, xmm0
+
+	mov	 r9d, r10d
+	xor	 r9d, 48
+	xor	 r10d, 16
+	pxor	 xmm2, XMMWORD PTR [r9+rbx]
+	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
+	paddq	 xmm0, xmm4
+	movdqu	 xmm1, XMMWORD PTR [rcx+rbx]
+	paddq	 xmm2, xmm5
+	paddq	 xmm1, xmm7
+	movdqa	 xmm5, xmm4
+	movdqu	 XMMWORD PTR [r9+rbx], xmm2
+	movdqa	 xmm4, xmm6
+	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
+	movdqu	 XMMWORD PTR [r10+rbx], xmm1
+	movdqu xmm6, [rdi+rbx]
+	mov	 r10d, edi
+	xor	 r11, r12
+	dec rsi
+	jne	 rwz_main_loop
+
+	ldmxcsr DWORD PTR [rsp]
+	mov	 rbx, QWORD PTR [rsp+160]
+	movaps	 xmm6, XMMWORD PTR [rsp+64]
+	movaps	 xmm7, XMMWORD PTR [rsp+48]
+	movaps	 xmm8, XMMWORD PTR [rsp+32]
+	add	 rsp, 80
+	pop	 r15
+	pop	 r14
+	pop	 r13
+	pop	 r12
+	pop	 rdi
+	pop	 rsi
+	pop	 rbp
+	jmp cnv2_rwz_main_loop_endp
+
+rwz_sqrt_fixup:
+	dec	 rdx
+	mov r13d, -1022
+	shl r13, 32
+	mov	 rax, rdx
+	shr	 rdx, 19
+	shr	 rax, 20
+	mov	 rcx, rdx
+	sub	 rcx, rax
+	add	 rax, r13
+	not r13
+	sub	 rcx, r13
+	mov	 r13d, -2147483647
+	imul	 rcx, rax
+	sub	 rcx, r9
+	adc	 rdx, 0
+	movq	 xmm3, rdx
+	jmp	 rwz_sqrt_fixup_ret
+
+cnv2_rwz_main_loop_endp:
diff --git a/src/crypto/asm/win/CryptonightR_soft_aes_template.inc b/src/crypto/asm/win/CryptonightR_soft_aes_template.inc
new file mode 100644
index 00000000..c4a0559b
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightR_soft_aes_template.inc
@@ -0,0 +1,279 @@
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
+PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
+
+ALIGN(64)
+FN_PREFIX(CryptonightR_soft_aes_template_part1):
+	mov	QWORD PTR [rsp+8], rcx
+	push	rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 232
+
+	mov	eax, [rcx+96]
+	mov	ebx, [rcx+100]
+	mov	esi, [rcx+104]
+	mov	edx, [rcx+108]
+	mov [rsp+144], eax
+	mov [rsp+148], ebx
+	mov [rsp+152], esi
+	mov [rsp+156], edx
+
+	mov	rax, QWORD PTR [rcx+48]
+	mov	r10, rcx
+	xor	rax, QWORD PTR [rcx+16]
+	mov	r8, QWORD PTR [rcx+32]
+	xor	r8, QWORD PTR [rcx]
+	mov	r9, QWORD PTR [rcx+40]
+	xor	r9, QWORD PTR [rcx+8]
+	movd	xmm4, rax
+	mov	rdx, QWORD PTR [rcx+56]
+	xor	rdx, QWORD PTR [rcx+24]
+	mov	r11, QWORD PTR [rcx+224]
+	mov	rcx, QWORD PTR [rcx+88]
+	xor	rcx, QWORD PTR [r10+72]
+	mov	rax, QWORD PTR [r10+80]
+	movd	xmm0, rdx
+	xor	rax, QWORD PTR [r10+64]
+
+	movaps	XMMWORD PTR [rsp+16], xmm6
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+48], xmm8
+	movaps	XMMWORD PTR [rsp+64], xmm9
+	movaps	XMMWORD PTR [rsp+80], xmm10
+	movaps	XMMWORD PTR [rsp+96], xmm11
+	movaps	XMMWORD PTR [rsp+112], xmm12
+	movaps	XMMWORD PTR [rsp+128], xmm13
+
+	movd	xmm5, rax
+
+	mov	rax, r8
+	punpcklqdq xmm4, xmm0
+	and	eax, 2097136
+	movd	xmm10, QWORD PTR [r10+96]
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [r10+104]
+	xorps	xmm9, xmm9
+	mov	QWORD PTR [rsp+328], rax
+	movd	xmm12, r11
+	mov	QWORD PTR [rsp+320], r9
+	punpcklqdq xmm5, xmm0
+	movd xmm13, rcx
+	mov r12d, 524288
+
+	ALIGN(64)
+FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
+	movd xmm11, r12d
+	mov	r12, QWORD PTR [r10+272]
+	lea	r13, QWORD PTR [rax+r11]
+	mov	esi, DWORD PTR [r13]
+	movd	xmm0, r9
+	mov	r10d, DWORD PTR [r13+4]
+	movd	xmm7, r8
+	mov	ebp, DWORD PTR [r13+12]
+	mov	r14d, DWORD PTR [r13+8]
+	mov	rdx, QWORD PTR [rsp+328]
+	movzx	ecx, sil
+	shr	esi, 8
+	punpcklqdq xmm7, xmm0
+	mov	r15d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	mov	edi, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	ebx, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	shr	ebp, 8
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	xor	r15d, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	eax, r14d
+	shr	eax, 8
+	xor	edi, DWORD PTR [r12+rcx*4+1024]
+	add	eax, 256
+	movzx	ecx, bpl
+	shr	ebp, 8
+	xor	ebx, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, sil
+	shr	esi, 8
+	xor	r9d, DWORD PTR [r12+rcx*4+1024]
+	add	r12, 2048
+	movzx	ecx, r10b
+	shr	r10d, 8
+	add	r10d, 256
+	mov	r11d, DWORD PTR [r12+rax*4]
+	xor	r11d, DWORD PTR [r12+rcx*4]
+	xor	r11d, r9d
+	movzx	ecx, sil
+	mov	r10d, DWORD PTR [r12+r10*4]
+	shr	esi, 8
+	add	esi, 256
+	xor	r10d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	xor	r10d, ebx
+	shr	ebp, 8
+	movd	xmm1, r11d
+	add	ebp, 256
+	movd	r11, xmm12
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	xor	r9d, DWORD PTR [r12+rsi*4]
+	mov	eax, DWORD PTR [r12+rbp*4]
+	xor	r9d, edi
+	movzx	ecx, r14b
+	movd	xmm0, r10d
+	movd	xmm2, r9d
+	xor	eax, DWORD PTR [r12+rcx*4]
+	mov	rcx, rdx
+	xor	eax, r15d
+	punpckldq xmm2, xmm1
+	xor	rcx, 16
+	movd	xmm6, eax
+	mov	rax, rdx
+	punpckldq xmm6, xmm0
+	xor	rax, 32
+	punpckldq xmm6, xmm2
+	xor	rdx, 48
+	movdqu	xmm2, XMMWORD PTR [rcx+r11]
+	pxor xmm6, xmm2
+	pxor	xmm6, xmm7
+	paddq	xmm2, xmm4
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	movdqu	xmm0, XMMWORD PTR [rdx+r11]
+	pxor xmm6, xmm1
+	pxor xmm6, xmm0
+	paddq	xmm0, xmm5
+	movdqu	XMMWORD PTR [rcx+r11], xmm0
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movd rcx, xmm13
+	paddq	xmm1, xmm7
+	movdqu	XMMWORD PTR [rdx+r11], xmm1
+	movd	rdi, xmm6
+	mov	r10, rdi
+	and	r10d, 2097136
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm4
+	movdqu	XMMWORD PTR [r13], xmm0
+
+	mov ebx, [rsp+144]
+	mov ebp, [rsp+152]
+	add ebx, [rsp+148]
+	add ebp, [rsp+156]
+	shl rbp, 32
+	or rbx, rbp
+
+	xor rbx, QWORD PTR [r10+r11]
+	lea	r14, QWORD PTR [r10+r11]
+	mov	rbp, QWORD PTR [r14+8]
+
+	mov [rsp+160], rbx
+	mov [rsp+168], rdi
+	mov [rsp+176], rbp
+	mov [rsp+184], r10
+	mov r10, rsp
+
+	mov ebx, [rsp+144]
+	mov esi, [rsp+148]
+	mov edi, [rsp+152]
+	mov ebp, [rsp+156]
+
+	movd esp, xmm7
+	movaps xmm0, xmm7
+	psrldq xmm0, 8
+	movd r15d, xmm0
+	movd eax, xmm4
+	movd edx, xmm5
+	movaps xmm0, xmm5
+	psrldq xmm0, 8
+	movd r9d, xmm0
+
+FN_PREFIX(CryptonightR_soft_aes_template_part2):
+	mov rsp, r10
+	mov [rsp+144], ebx
+	mov [rsp+148], esi
+	mov [rsp+152], edi
+	mov [rsp+156], ebp
+
+	mov edi, edi
+	shl rbp, 32
+	or rbp, rdi
+	xor r8, rbp
+
+	mov ebx, ebx
+	shl rsi, 32
+	or rsi, rbx
+	xor QWORD PTR [rsp+320], rsi
+
+	mov rbx, [rsp+160]
+	mov rdi, [rsp+168]
+	mov rbp, [rsp+176]
+	mov r10, [rsp+184]
+
+	mov	r9, r10
+	xor	r9, 16
+	mov	rcx, r10
+	xor	rcx, 32
+	xor	r10, 48
+	mov	rax, rbx
+	mul	rdi
+	movdqu	xmm2, XMMWORD PTR [r9+r11]
+	movdqu	xmm1, XMMWORD PTR [rcx+r11]
+	pxor xmm6, xmm2
+	pxor xmm6, xmm1
+	paddq	xmm1, xmm7
+	add	r8, rdx
+	movdqu	xmm0, XMMWORD PTR [r10+r11]
+	pxor xmm6, xmm0
+	paddq	xmm0, xmm5
+	paddq	xmm2, xmm4
+	movdqu	XMMWORD PTR [r9+r11], xmm0
+	movdqa	xmm5, xmm4
+	mov	r9, QWORD PTR [rsp+320]
+	movdqa	xmm4, xmm6
+	add	r9, rax
+	movdqu	XMMWORD PTR [rcx+r11], xmm2
+	movdqu	XMMWORD PTR [r10+r11], xmm1
+	mov	r10, QWORD PTR [rsp+304]
+	movd r12d, xmm11
+	mov	QWORD PTR [r14], r8
+	xor	r8, rbx
+	mov	rax, r8
+	mov	QWORD PTR [r14+8], r9
+	and	eax, 2097136
+	xor	r9, rbp
+	mov	QWORD PTR [rsp+320], r9
+	mov	QWORD PTR [rsp+328], rax
+	sub	r12d, 1
+	jne	FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
+
+FN_PREFIX(CryptonightR_soft_aes_template_part3):
+	movaps	xmm6, XMMWORD PTR [rsp+16]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+48]
+	movaps	xmm9, XMMWORD PTR [rsp+64]
+	movaps	xmm10, XMMWORD PTR [rsp+80]
+	movaps	xmm11, XMMWORD PTR [rsp+96]
+	movaps	xmm12, XMMWORD PTR [rsp+112]
+	movaps	xmm13, XMMWORD PTR [rsp+128]
+
+	add	rsp, 232
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	pop	rbx
+	ret
+FN_PREFIX(CryptonightR_soft_aes_template_end):
diff --git a/src/crypto/asm/win/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/win/CryptonightR_soft_aes_template_win.inc
new file mode 100644
index 00000000..d6d393a9
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightR_soft_aes_template_win.inc
@@ -0,0 +1,279 @@
+PUBLIC CryptonightR_soft_aes_template_part1
+PUBLIC CryptonightR_soft_aes_template_mainloop
+PUBLIC CryptonightR_soft_aes_template_part2
+PUBLIC CryptonightR_soft_aes_template_part3
+PUBLIC CryptonightR_soft_aes_template_end
+
+ALIGN(64)
+CryptonightR_soft_aes_template_part1:
+	mov	QWORD PTR [rsp+8], rcx
+	push	rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 232
+
+	mov	eax, [rcx+96]
+	mov	ebx, [rcx+100]
+	mov	esi, [rcx+104]
+	mov	edx, [rcx+108]
+	mov [rsp+144], eax
+	mov [rsp+148], ebx
+	mov [rsp+152], esi
+	mov [rsp+156], edx
+
+	mov	rax, QWORD PTR [rcx+48]
+	mov	r10, rcx
+	xor	rax, QWORD PTR [rcx+16]
+	mov	r8, QWORD PTR [rcx+32]
+	xor	r8, QWORD PTR [rcx]
+	mov	r9, QWORD PTR [rcx+40]
+	xor	r9, QWORD PTR [rcx+8]
+	movd	xmm4, rax
+	mov	rdx, QWORD PTR [rcx+56]
+	xor	rdx, QWORD PTR [rcx+24]
+	mov	r11, QWORD PTR [rcx+224]
+	mov	rcx, QWORD PTR [rcx+88]
+	xor	rcx, QWORD PTR [r10+72]
+	mov	rax, QWORD PTR [r10+80]
+	movd	xmm0, rdx
+	xor	rax, QWORD PTR [r10+64]
+
+	movaps	XMMWORD PTR [rsp+16], xmm6
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+48], xmm8
+	movaps	XMMWORD PTR [rsp+64], xmm9
+	movaps	XMMWORD PTR [rsp+80], xmm10
+	movaps	XMMWORD PTR [rsp+96], xmm11
+	movaps	XMMWORD PTR [rsp+112], xmm12
+	movaps	XMMWORD PTR [rsp+128], xmm13
+
+	movd	xmm5, rax
+
+	mov	rax, r8
+	punpcklqdq xmm4, xmm0
+	and	eax, 2097136
+	movd	xmm10, QWORD PTR [r10+96]
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [r10+104]
+	xorps	xmm9, xmm9
+	mov	QWORD PTR [rsp+328], rax
+	movd	xmm12, r11
+	mov	QWORD PTR [rsp+320], r9
+	punpcklqdq xmm5, xmm0
+	movd xmm13, rcx
+	mov r12d, 524288
+
+	ALIGN(64)
+CryptonightR_soft_aes_template_mainloop:
+	movd xmm11, r12d
+	mov	r12, QWORD PTR [r10+272]
+	lea	r13, QWORD PTR [rax+r11]
+	mov	esi, DWORD PTR [r13]
+	movd	xmm0, r9
+	mov	r10d, DWORD PTR [r13+4]
+	movd	xmm7, r8
+	mov	ebp, DWORD PTR [r13+12]
+	mov	r14d, DWORD PTR [r13+8]
+	mov	rdx, QWORD PTR [rsp+328]
+	movzx	ecx, sil
+	shr	esi, 8
+	punpcklqdq xmm7, xmm0
+	mov	r15d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	mov	edi, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	ebx, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	shr	ebp, 8
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	xor	r15d, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	eax, r14d
+	shr	eax, 8
+	xor	edi, DWORD PTR [r12+rcx*4+1024]
+	add	eax, 256
+	movzx	ecx, bpl
+	shr	ebp, 8
+	xor	ebx, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, sil
+	shr	esi, 8
+	xor	r9d, DWORD PTR [r12+rcx*4+1024]
+	add	r12, 2048
+	movzx	ecx, r10b
+	shr	r10d, 8
+	add	r10d, 256
+	mov	r11d, DWORD PTR [r12+rax*4]
+	xor	r11d, DWORD PTR [r12+rcx*4]
+	xor	r11d, r9d
+	movzx	ecx, sil
+	mov	r10d, DWORD PTR [r12+r10*4]
+	shr	esi, 8
+	add	esi, 256
+	xor	r10d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	xor	r10d, ebx
+	shr	ebp, 8
+	movd	xmm1, r11d
+	add	ebp, 256
+	movd	r11, xmm12
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	xor	r9d, DWORD PTR [r12+rsi*4]
+	mov	eax, DWORD PTR [r12+rbp*4]
+	xor	r9d, edi
+	movzx	ecx, r14b
+	movd	xmm0, r10d
+	movd	xmm2, r9d
+	xor	eax, DWORD PTR [r12+rcx*4]
+	mov	rcx, rdx
+	xor	eax, r15d
+	punpckldq xmm2, xmm1
+	xor	rcx, 16
+	movd	xmm6, eax
+	mov	rax, rdx
+	punpckldq xmm6, xmm0
+	xor	rax, 32
+	punpckldq xmm6, xmm2
+	xor	rdx, 48
+	movdqu	xmm2, XMMWORD PTR [rcx+r11]
+	pxor xmm6, xmm2
+	pxor	xmm6, xmm7
+	paddq	xmm2, xmm4
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	movdqu	xmm0, XMMWORD PTR [rdx+r11]
+	pxor xmm6, xmm1
+	pxor xmm6, xmm0
+	paddq	xmm0, xmm5
+	movdqu	XMMWORD PTR [rcx+r11], xmm0
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movd rcx, xmm13
+	paddq	xmm1, xmm7
+	movdqu	XMMWORD PTR [rdx+r11], xmm1
+	movd	rdi, xmm6
+	mov	r10, rdi
+	and	r10d, 2097136
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm4
+	movdqu	XMMWORD PTR [r13], xmm0
+
+	mov ebx, [rsp+144]
+	mov ebp, [rsp+152]
+	add ebx, [rsp+148]
+	add ebp, [rsp+156]
+	shl rbp, 32
+	or rbx, rbp
+
+	xor rbx, QWORD PTR [r10+r11]
+	lea	r14, QWORD PTR [r10+r11]
+	mov	rbp, QWORD PTR [r14+8]
+
+	mov [rsp+160], rbx
+	mov [rsp+168], rdi
+	mov [rsp+176], rbp
+	mov [rsp+184], r10
+	mov r10, rsp
+
+	mov ebx, [rsp+144]
+	mov esi, [rsp+148]
+	mov edi, [rsp+152]
+	mov ebp, [rsp+156]
+
+	movd esp, xmm7
+	movaps xmm0, xmm7
+	psrldq xmm0, 8
+	movd r15d, xmm0
+	movd eax, xmm4
+	movd edx, xmm5
+	movaps xmm0, xmm5
+	psrldq xmm0, 8
+	movd r9d, xmm0
+
+CryptonightR_soft_aes_template_part2:
+	mov rsp, r10
+	mov [rsp+144], ebx
+	mov [rsp+148], esi
+	mov [rsp+152], edi
+	mov [rsp+156], ebp
+
+	mov edi, edi
+	shl rbp, 32
+	or rbp, rdi
+	xor r8, rbp
+
+	mov ebx, ebx
+	shl rsi, 32
+	or rsi, rbx
+	xor QWORD PTR [rsp+320], rsi
+
+	mov rbx, [rsp+160]
+	mov rdi, [rsp+168]
+	mov rbp, [rsp+176]
+	mov r10, [rsp+184]
+
+	mov	r9, r10
+	xor	r9, 16
+	mov	rcx, r10
+	xor	rcx, 32
+	xor	r10, 48
+	mov	rax, rbx
+	mul	rdi
+	movdqu	xmm2, XMMWORD PTR [r9+r11]
+	movdqu	xmm1, XMMWORD PTR [rcx+r11]
+	pxor xmm6, xmm2
+	pxor xmm6, xmm1
+	paddq	xmm1, xmm7
+	add	r8, rdx
+	movdqu	xmm0, XMMWORD PTR [r10+r11]
+	pxor xmm6, xmm0
+	paddq	xmm0, xmm5
+	paddq	xmm2, xmm4
+	movdqu	XMMWORD PTR [r9+r11], xmm0
+	movdqa	xmm5, xmm4
+	mov	r9, QWORD PTR [rsp+320]
+	movdqa	xmm4, xmm6
+	add	r9, rax
+	movdqu	XMMWORD PTR [rcx+r11], xmm2
+	movdqu	XMMWORD PTR [r10+r11], xmm1
+	mov	r10, QWORD PTR [rsp+304]
+	movd r12d, xmm11
+	mov	QWORD PTR [r14], r8
+	xor	r8, rbx
+	mov	rax, r8
+	mov	QWORD PTR [r14+8], r9
+	and	eax, 2097136
+	xor	r9, rbp
+	mov	QWORD PTR [rsp+320], r9
+	mov	QWORD PTR [rsp+328], rax
+	sub	r12d, 1
+	jne	CryptonightR_soft_aes_template_mainloop
+
+CryptonightR_soft_aes_template_part3:
+	movaps	xmm6, XMMWORD PTR [rsp+16]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+48]
+	movaps	xmm9, XMMWORD PTR [rsp+64]
+	movaps	xmm10, XMMWORD PTR [rsp+80]
+	movaps	xmm11, XMMWORD PTR [rsp+96]
+	movaps	xmm12, XMMWORD PTR [rsp+112]
+	movaps	xmm13, XMMWORD PTR [rsp+128]
+
+	add	rsp, 232
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	pop	rbx
+	ret
+CryptonightR_soft_aes_template_end:
diff --git a/src/crypto/asm/win/CryptonightR_template.S b/src/crypto/asm/win/CryptonightR_template.S
new file mode 100644
index 00000000..d2974d16
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightR_template.S
@@ -0,0 +1,1595 @@
+#ifdef __APPLE__
+#   define ALIGN(x) .align 6
+#else
+#   define ALIGN(x) .align 64
+#endif
+.intel_syntax noprefix
+#ifdef __APPLE__
+#   define FN_PREFIX(fn) _ ## fn
+.text
+#else
+#   define FN_PREFIX(fn) fn
+.section .text
+#endif
+
+#define PUBLIC .global
+
+PUBLIC FN_PREFIX(CryptonightR_instruction0)
+PUBLIC FN_PREFIX(CryptonightR_instruction1)
+PUBLIC FN_PREFIX(CryptonightR_instruction2)
+PUBLIC FN_PREFIX(CryptonightR_instruction3)
+PUBLIC FN_PREFIX(CryptonightR_instruction4)
+PUBLIC FN_PREFIX(CryptonightR_instruction5)
+PUBLIC FN_PREFIX(CryptonightR_instruction6)
+PUBLIC FN_PREFIX(CryptonightR_instruction7)
+PUBLIC FN_PREFIX(CryptonightR_instruction8)
+PUBLIC FN_PREFIX(CryptonightR_instruction9)
+PUBLIC FN_PREFIX(CryptonightR_instruction10)
+PUBLIC FN_PREFIX(CryptonightR_instruction11)
+PUBLIC FN_PREFIX(CryptonightR_instruction12)
+PUBLIC FN_PREFIX(CryptonightR_instruction13)
+PUBLIC FN_PREFIX(CryptonightR_instruction14)
+PUBLIC FN_PREFIX(CryptonightR_instruction15)
+PUBLIC FN_PREFIX(CryptonightR_instruction16)
+PUBLIC FN_PREFIX(CryptonightR_instruction17)
+PUBLIC FN_PREFIX(CryptonightR_instruction18)
+PUBLIC FN_PREFIX(CryptonightR_instruction19)
+PUBLIC FN_PREFIX(CryptonightR_instruction20)
+PUBLIC FN_PREFIX(CryptonightR_instruction21)
+PUBLIC FN_PREFIX(CryptonightR_instruction22)
+PUBLIC FN_PREFIX(CryptonightR_instruction23)
+PUBLIC FN_PREFIX(CryptonightR_instruction24)
+PUBLIC FN_PREFIX(CryptonightR_instruction25)
+PUBLIC FN_PREFIX(CryptonightR_instruction26)
+PUBLIC FN_PREFIX(CryptonightR_instruction27)
+PUBLIC FN_PREFIX(CryptonightR_instruction28)
+PUBLIC FN_PREFIX(CryptonightR_instruction29)
+PUBLIC FN_PREFIX(CryptonightR_instruction30)
+PUBLIC FN_PREFIX(CryptonightR_instruction31)
+PUBLIC FN_PREFIX(CryptonightR_instruction32)
+PUBLIC FN_PREFIX(CryptonightR_instruction33)
+PUBLIC FN_PREFIX(CryptonightR_instruction34)
+PUBLIC FN_PREFIX(CryptonightR_instruction35)
+PUBLIC FN_PREFIX(CryptonightR_instruction36)
+PUBLIC FN_PREFIX(CryptonightR_instruction37)
+PUBLIC FN_PREFIX(CryptonightR_instruction38)
+PUBLIC FN_PREFIX(CryptonightR_instruction39)
+PUBLIC FN_PREFIX(CryptonightR_instruction40)
+PUBLIC FN_PREFIX(CryptonightR_instruction41)
+PUBLIC FN_PREFIX(CryptonightR_instruction42)
+PUBLIC FN_PREFIX(CryptonightR_instruction43)
+PUBLIC FN_PREFIX(CryptonightR_instruction44)
+PUBLIC FN_PREFIX(CryptonightR_instruction45)
+PUBLIC FN_PREFIX(CryptonightR_instruction46)
+PUBLIC FN_PREFIX(CryptonightR_instruction47)
+PUBLIC FN_PREFIX(CryptonightR_instruction48)
+PUBLIC FN_PREFIX(CryptonightR_instruction49)
+PUBLIC FN_PREFIX(CryptonightR_instruction50)
+PUBLIC FN_PREFIX(CryptonightR_instruction51)
+PUBLIC FN_PREFIX(CryptonightR_instruction52)
+PUBLIC FN_PREFIX(CryptonightR_instruction53)
+PUBLIC FN_PREFIX(CryptonightR_instruction54)
+PUBLIC FN_PREFIX(CryptonightR_instruction55)
+PUBLIC FN_PREFIX(CryptonightR_instruction56)
+PUBLIC FN_PREFIX(CryptonightR_instruction57)
+PUBLIC FN_PREFIX(CryptonightR_instruction58)
+PUBLIC FN_PREFIX(CryptonightR_instruction59)
+PUBLIC FN_PREFIX(CryptonightR_instruction60)
+PUBLIC FN_PREFIX(CryptonightR_instruction61)
+PUBLIC FN_PREFIX(CryptonightR_instruction62)
+PUBLIC FN_PREFIX(CryptonightR_instruction63)
+PUBLIC FN_PREFIX(CryptonightR_instruction64)
+PUBLIC FN_PREFIX(CryptonightR_instruction65)
+PUBLIC FN_PREFIX(CryptonightR_instruction66)
+PUBLIC FN_PREFIX(CryptonightR_instruction67)
+PUBLIC FN_PREFIX(CryptonightR_instruction68)
+PUBLIC FN_PREFIX(CryptonightR_instruction69)
+PUBLIC FN_PREFIX(CryptonightR_instruction70)
+PUBLIC FN_PREFIX(CryptonightR_instruction71)
+PUBLIC FN_PREFIX(CryptonightR_instruction72)
+PUBLIC FN_PREFIX(CryptonightR_instruction73)
+PUBLIC FN_PREFIX(CryptonightR_instruction74)
+PUBLIC FN_PREFIX(CryptonightR_instruction75)
+PUBLIC FN_PREFIX(CryptonightR_instruction76)
+PUBLIC FN_PREFIX(CryptonightR_instruction77)
+PUBLIC FN_PREFIX(CryptonightR_instruction78)
+PUBLIC FN_PREFIX(CryptonightR_instruction79)
+PUBLIC FN_PREFIX(CryptonightR_instruction80)
+PUBLIC FN_PREFIX(CryptonightR_instruction81)
+PUBLIC FN_PREFIX(CryptonightR_instruction82)
+PUBLIC FN_PREFIX(CryptonightR_instruction83)
+PUBLIC FN_PREFIX(CryptonightR_instruction84)
+PUBLIC FN_PREFIX(CryptonightR_instruction85)
+PUBLIC FN_PREFIX(CryptonightR_instruction86)
+PUBLIC FN_PREFIX(CryptonightR_instruction87)
+PUBLIC FN_PREFIX(CryptonightR_instruction88)
+PUBLIC FN_PREFIX(CryptonightR_instruction89)
+PUBLIC FN_PREFIX(CryptonightR_instruction90)
+PUBLIC FN_PREFIX(CryptonightR_instruction91)
+PUBLIC FN_PREFIX(CryptonightR_instruction92)
+PUBLIC FN_PREFIX(CryptonightR_instruction93)
+PUBLIC FN_PREFIX(CryptonightR_instruction94)
+PUBLIC FN_PREFIX(CryptonightR_instruction95)
+PUBLIC FN_PREFIX(CryptonightR_instruction96)
+PUBLIC FN_PREFIX(CryptonightR_instruction97)
+PUBLIC FN_PREFIX(CryptonightR_instruction98)
+PUBLIC FN_PREFIX(CryptonightR_instruction99)
+PUBLIC FN_PREFIX(CryptonightR_instruction100)
+PUBLIC FN_PREFIX(CryptonightR_instruction101)
+PUBLIC FN_PREFIX(CryptonightR_instruction102)
+PUBLIC FN_PREFIX(CryptonightR_instruction103)
+PUBLIC FN_PREFIX(CryptonightR_instruction104)
+PUBLIC FN_PREFIX(CryptonightR_instruction105)
+PUBLIC FN_PREFIX(CryptonightR_instruction106)
+PUBLIC FN_PREFIX(CryptonightR_instruction107)
+PUBLIC FN_PREFIX(CryptonightR_instruction108)
+PUBLIC FN_PREFIX(CryptonightR_instruction109)
+PUBLIC FN_PREFIX(CryptonightR_instruction110)
+PUBLIC FN_PREFIX(CryptonightR_instruction111)
+PUBLIC FN_PREFIX(CryptonightR_instruction112)
+PUBLIC FN_PREFIX(CryptonightR_instruction113)
+PUBLIC FN_PREFIX(CryptonightR_instruction114)
+PUBLIC FN_PREFIX(CryptonightR_instruction115)
+PUBLIC FN_PREFIX(CryptonightR_instruction116)
+PUBLIC FN_PREFIX(CryptonightR_instruction117)
+PUBLIC FN_PREFIX(CryptonightR_instruction118)
+PUBLIC FN_PREFIX(CryptonightR_instruction119)
+PUBLIC FN_PREFIX(CryptonightR_instruction120)
+PUBLIC FN_PREFIX(CryptonightR_instruction121)
+PUBLIC FN_PREFIX(CryptonightR_instruction122)
+PUBLIC FN_PREFIX(CryptonightR_instruction123)
+PUBLIC FN_PREFIX(CryptonightR_instruction124)
+PUBLIC FN_PREFIX(CryptonightR_instruction125)
+PUBLIC FN_PREFIX(CryptonightR_instruction126)
+PUBLIC FN_PREFIX(CryptonightR_instruction127)
+PUBLIC FN_PREFIX(CryptonightR_instruction128)
+PUBLIC FN_PREFIX(CryptonightR_instruction129)
+PUBLIC FN_PREFIX(CryptonightR_instruction130)
+PUBLIC FN_PREFIX(CryptonightR_instruction131)
+PUBLIC FN_PREFIX(CryptonightR_instruction132)
+PUBLIC FN_PREFIX(CryptonightR_instruction133)
+PUBLIC FN_PREFIX(CryptonightR_instruction134)
+PUBLIC FN_PREFIX(CryptonightR_instruction135)
+PUBLIC FN_PREFIX(CryptonightR_instruction136)
+PUBLIC FN_PREFIX(CryptonightR_instruction137)
+PUBLIC FN_PREFIX(CryptonightR_instruction138)
+PUBLIC FN_PREFIX(CryptonightR_instruction139)
+PUBLIC FN_PREFIX(CryptonightR_instruction140)
+PUBLIC FN_PREFIX(CryptonightR_instruction141)
+PUBLIC FN_PREFIX(CryptonightR_instruction142)
+PUBLIC FN_PREFIX(CryptonightR_instruction143)
+PUBLIC FN_PREFIX(CryptonightR_instruction144)
+PUBLIC FN_PREFIX(CryptonightR_instruction145)
+PUBLIC FN_PREFIX(CryptonightR_instruction146)
+PUBLIC FN_PREFIX(CryptonightR_instruction147)
+PUBLIC FN_PREFIX(CryptonightR_instruction148)
+PUBLIC FN_PREFIX(CryptonightR_instruction149)
+PUBLIC FN_PREFIX(CryptonightR_instruction150)
+PUBLIC FN_PREFIX(CryptonightR_instruction151)
+PUBLIC FN_PREFIX(CryptonightR_instruction152)
+PUBLIC FN_PREFIX(CryptonightR_instruction153)
+PUBLIC FN_PREFIX(CryptonightR_instruction154)
+PUBLIC FN_PREFIX(CryptonightR_instruction155)
+PUBLIC FN_PREFIX(CryptonightR_instruction156)
+PUBLIC FN_PREFIX(CryptonightR_instruction157)
+PUBLIC FN_PREFIX(CryptonightR_instruction158)
+PUBLIC FN_PREFIX(CryptonightR_instruction159)
+PUBLIC FN_PREFIX(CryptonightR_instruction160)
+PUBLIC FN_PREFIX(CryptonightR_instruction161)
+PUBLIC FN_PREFIX(CryptonightR_instruction162)
+PUBLIC FN_PREFIX(CryptonightR_instruction163)
+PUBLIC FN_PREFIX(CryptonightR_instruction164)
+PUBLIC FN_PREFIX(CryptonightR_instruction165)
+PUBLIC FN_PREFIX(CryptonightR_instruction166)
+PUBLIC FN_PREFIX(CryptonightR_instruction167)
+PUBLIC FN_PREFIX(CryptonightR_instruction168)
+PUBLIC FN_PREFIX(CryptonightR_instruction169)
+PUBLIC FN_PREFIX(CryptonightR_instruction170)
+PUBLIC FN_PREFIX(CryptonightR_instruction171)
+PUBLIC FN_PREFIX(CryptonightR_instruction172)
+PUBLIC FN_PREFIX(CryptonightR_instruction173)
+PUBLIC FN_PREFIX(CryptonightR_instruction174)
+PUBLIC FN_PREFIX(CryptonightR_instruction175)
+PUBLIC FN_PREFIX(CryptonightR_instruction176)
+PUBLIC FN_PREFIX(CryptonightR_instruction177)
+PUBLIC FN_PREFIX(CryptonightR_instruction178)
+PUBLIC FN_PREFIX(CryptonightR_instruction179)
+PUBLIC FN_PREFIX(CryptonightR_instruction180)
+PUBLIC FN_PREFIX(CryptonightR_instruction181)
+PUBLIC FN_PREFIX(CryptonightR_instruction182)
+PUBLIC FN_PREFIX(CryptonightR_instruction183)
+PUBLIC FN_PREFIX(CryptonightR_instruction184)
+PUBLIC FN_PREFIX(CryptonightR_instruction185)
+PUBLIC FN_PREFIX(CryptonightR_instruction186)
+PUBLIC FN_PREFIX(CryptonightR_instruction187)
+PUBLIC FN_PREFIX(CryptonightR_instruction188)
+PUBLIC FN_PREFIX(CryptonightR_instruction189)
+PUBLIC FN_PREFIX(CryptonightR_instruction190)
+PUBLIC FN_PREFIX(CryptonightR_instruction191)
+PUBLIC FN_PREFIX(CryptonightR_instruction192)
+PUBLIC FN_PREFIX(CryptonightR_instruction193)
+PUBLIC FN_PREFIX(CryptonightR_instruction194)
+PUBLIC FN_PREFIX(CryptonightR_instruction195)
+PUBLIC FN_PREFIX(CryptonightR_instruction196)
+PUBLIC FN_PREFIX(CryptonightR_instruction197)
+PUBLIC FN_PREFIX(CryptonightR_instruction198)
+PUBLIC FN_PREFIX(CryptonightR_instruction199)
+PUBLIC FN_PREFIX(CryptonightR_instruction200)
+PUBLIC FN_PREFIX(CryptonightR_instruction201)
+PUBLIC FN_PREFIX(CryptonightR_instruction202)
+PUBLIC FN_PREFIX(CryptonightR_instruction203)
+PUBLIC FN_PREFIX(CryptonightR_instruction204)
+PUBLIC FN_PREFIX(CryptonightR_instruction205)
+PUBLIC FN_PREFIX(CryptonightR_instruction206)
+PUBLIC FN_PREFIX(CryptonightR_instruction207)
+PUBLIC FN_PREFIX(CryptonightR_instruction208)
+PUBLIC FN_PREFIX(CryptonightR_instruction209)
+PUBLIC FN_PREFIX(CryptonightR_instruction210)
+PUBLIC FN_PREFIX(CryptonightR_instruction211)
+PUBLIC FN_PREFIX(CryptonightR_instruction212)
+PUBLIC FN_PREFIX(CryptonightR_instruction213)
+PUBLIC FN_PREFIX(CryptonightR_instruction214)
+PUBLIC FN_PREFIX(CryptonightR_instruction215)
+PUBLIC FN_PREFIX(CryptonightR_instruction216)
+PUBLIC FN_PREFIX(CryptonightR_instruction217)
+PUBLIC FN_PREFIX(CryptonightR_instruction218)
+PUBLIC FN_PREFIX(CryptonightR_instruction219)
+PUBLIC FN_PREFIX(CryptonightR_instruction220)
+PUBLIC FN_PREFIX(CryptonightR_instruction221)
+PUBLIC FN_PREFIX(CryptonightR_instruction222)
+PUBLIC FN_PREFIX(CryptonightR_instruction223)
+PUBLIC FN_PREFIX(CryptonightR_instruction224)
+PUBLIC FN_PREFIX(CryptonightR_instruction225)
+PUBLIC FN_PREFIX(CryptonightR_instruction226)
+PUBLIC FN_PREFIX(CryptonightR_instruction227)
+PUBLIC FN_PREFIX(CryptonightR_instruction228)
+PUBLIC FN_PREFIX(CryptonightR_instruction229)
+PUBLIC FN_PREFIX(CryptonightR_instruction230)
+PUBLIC FN_PREFIX(CryptonightR_instruction231)
+PUBLIC FN_PREFIX(CryptonightR_instruction232)
+PUBLIC FN_PREFIX(CryptonightR_instruction233)
+PUBLIC FN_PREFIX(CryptonightR_instruction234)
+PUBLIC FN_PREFIX(CryptonightR_instruction235)
+PUBLIC FN_PREFIX(CryptonightR_instruction236)
+PUBLIC FN_PREFIX(CryptonightR_instruction237)
+PUBLIC FN_PREFIX(CryptonightR_instruction238)
+PUBLIC FN_PREFIX(CryptonightR_instruction239)
+PUBLIC FN_PREFIX(CryptonightR_instruction240)
+PUBLIC FN_PREFIX(CryptonightR_instruction241)
+PUBLIC FN_PREFIX(CryptonightR_instruction242)
+PUBLIC FN_PREFIX(CryptonightR_instruction243)
+PUBLIC FN_PREFIX(CryptonightR_instruction244)
+PUBLIC FN_PREFIX(CryptonightR_instruction245)
+PUBLIC FN_PREFIX(CryptonightR_instruction246)
+PUBLIC FN_PREFIX(CryptonightR_instruction247)
+PUBLIC FN_PREFIX(CryptonightR_instruction248)
+PUBLIC FN_PREFIX(CryptonightR_instruction249)
+PUBLIC FN_PREFIX(CryptonightR_instruction250)
+PUBLIC FN_PREFIX(CryptonightR_instruction251)
+PUBLIC FN_PREFIX(CryptonightR_instruction252)
+PUBLIC FN_PREFIX(CryptonightR_instruction253)
+PUBLIC FN_PREFIX(CryptonightR_instruction254)
+PUBLIC FN_PREFIX(CryptonightR_instruction255)
+PUBLIC FN_PREFIX(CryptonightR_instruction256)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov0)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov1)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov2)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov3)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov4)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov5)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov6)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov7)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov8)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov9)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov10)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov11)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov12)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov13)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov14)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov15)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov16)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov17)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov18)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov19)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov20)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov21)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov22)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov23)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov24)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov25)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov26)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov27)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov28)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov29)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov30)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov31)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov32)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov33)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov34)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov35)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov36)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov37)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov38)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov39)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov40)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov41)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov42)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov43)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov44)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov45)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov46)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov47)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov48)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov49)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov50)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov51)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov52)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov53)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov54)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov55)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov56)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov57)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov58)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov59)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov60)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov61)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov62)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov63)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov64)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov65)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov66)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov67)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov68)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov69)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov70)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov71)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov72)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov73)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov74)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov75)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov76)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov77)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov78)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov79)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov80)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov81)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov82)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov83)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov84)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov85)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov86)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov87)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov88)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov89)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov90)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov91)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov92)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov93)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov94)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov95)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov96)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov97)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov98)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov99)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov100)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov101)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov102)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov103)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov104)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov105)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov106)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov107)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov108)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov109)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov110)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov111)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov112)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov113)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov114)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov115)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov116)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov117)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov118)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov119)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov120)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov121)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov122)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov123)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov124)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov125)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov126)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov127)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov128)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov129)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov130)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov131)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov132)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov133)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov134)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov135)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov136)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov137)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov138)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov139)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov140)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov141)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov142)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov143)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov144)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov145)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov146)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov147)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov148)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov149)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov150)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov151)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov152)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov153)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov154)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov155)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov156)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov157)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov158)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov159)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov160)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov161)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov162)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov163)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov164)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov165)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov166)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov167)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov168)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov169)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov170)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov171)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov172)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov173)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov174)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov175)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov176)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov177)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov178)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov179)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov180)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov181)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov182)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov183)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov184)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov185)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov186)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov187)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov188)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov189)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov190)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov191)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov192)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov193)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov194)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov195)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov196)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov197)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov198)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov199)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov200)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov201)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov202)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov203)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov204)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov205)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov206)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov207)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov208)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov209)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov210)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov211)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov212)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov213)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov214)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov215)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov216)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov217)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov218)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov219)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov220)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov221)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov222)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov223)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov224)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov225)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov226)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov227)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov228)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov229)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov230)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov231)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov232)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov233)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov234)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov235)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov236)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov237)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov238)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov239)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov240)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov241)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov242)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov243)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov244)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov245)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov246)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov247)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov248)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov249)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov250)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov251)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov252)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov253)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov254)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov255)
+PUBLIC FN_PREFIX(CryptonightR_instruction_mov256)
+
+#include "CryptonightWOW_template.inc"
+#include "CryptonightR_template.inc"
+#include "CryptonightWOW_soft_aes_template.inc"
+#include "CryptonightR_soft_aes_template.inc"
+
+FN_PREFIX(CryptonightR_instruction0):
+	imul	rbx, rbx
+FN_PREFIX(CryptonightR_instruction1):
+	imul	rbx, rbx
+FN_PREFIX(CryptonightR_instruction2):
+	imul	rbx, rbx
+FN_PREFIX(CryptonightR_instruction3):
+	add	rbx, r9
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction4):
+	sub	rbx, r9
+FN_PREFIX(CryptonightR_instruction5):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction6):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction7):
+	xor	rbx, r9
+FN_PREFIX(CryptonightR_instruction8):
+	imul	rsi, rbx
+FN_PREFIX(CryptonightR_instruction9):
+	imul	rsi, rbx
+FN_PREFIX(CryptonightR_instruction10):
+	imul	rsi, rbx
+FN_PREFIX(CryptonightR_instruction11):
+	add	rsi, rbx
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction12):
+	sub	rsi, rbx
+FN_PREFIX(CryptonightR_instruction13):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction14):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction15):
+	xor	rsi, rbx
+FN_PREFIX(CryptonightR_instruction16):
+	imul	rdi, rbx
+FN_PREFIX(CryptonightR_instruction17):
+	imul	rdi, rbx
+FN_PREFIX(CryptonightR_instruction18):
+	imul	rdi, rbx
+FN_PREFIX(CryptonightR_instruction19):
+	add	rdi, rbx
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction20):
+	sub	rdi, rbx
+FN_PREFIX(CryptonightR_instruction21):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction22):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction23):
+	xor	rdi, rbx
+FN_PREFIX(CryptonightR_instruction24):
+	imul	rbp, rbx
+FN_PREFIX(CryptonightR_instruction25):
+	imul	rbp, rbx
+FN_PREFIX(CryptonightR_instruction26):
+	imul	rbp, rbx
+FN_PREFIX(CryptonightR_instruction27):
+	add	rbp, rbx
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction28):
+	sub	rbp, rbx
+FN_PREFIX(CryptonightR_instruction29):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction30):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction31):
+	xor	rbp, rbx
+FN_PREFIX(CryptonightR_instruction32):
+	imul	rbx, rsi
+FN_PREFIX(CryptonightR_instruction33):
+	imul	rbx, rsi
+FN_PREFIX(CryptonightR_instruction34):
+	imul	rbx, rsi
+FN_PREFIX(CryptonightR_instruction35):
+	add	rbx, rsi
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction36):
+	sub	rbx, rsi
+FN_PREFIX(CryptonightR_instruction37):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction38):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction39):
+	xor	rbx, rsi
+FN_PREFIX(CryptonightR_instruction40):
+	imul	rsi, rsi
+FN_PREFIX(CryptonightR_instruction41):
+	imul	rsi, rsi
+FN_PREFIX(CryptonightR_instruction42):
+	imul	rsi, rsi
+FN_PREFIX(CryptonightR_instruction43):
+	add	rsi, r9
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction44):
+	sub	rsi, r9
+FN_PREFIX(CryptonightR_instruction45):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction46):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction47):
+	xor	rsi, r9
+FN_PREFIX(CryptonightR_instruction48):
+	imul	rdi, rsi
+FN_PREFIX(CryptonightR_instruction49):
+	imul	rdi, rsi
+FN_PREFIX(CryptonightR_instruction50):
+	imul	rdi, rsi
+FN_PREFIX(CryptonightR_instruction51):
+	add	rdi, rsi
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction52):
+	sub	rdi, rsi
+FN_PREFIX(CryptonightR_instruction53):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction54):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction55):
+	xor	rdi, rsi
+FN_PREFIX(CryptonightR_instruction56):
+	imul	rbp, rsi
+FN_PREFIX(CryptonightR_instruction57):
+	imul	rbp, rsi
+FN_PREFIX(CryptonightR_instruction58):
+	imul	rbp, rsi
+FN_PREFIX(CryptonightR_instruction59):
+	add	rbp, rsi
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction60):
+	sub	rbp, rsi
+FN_PREFIX(CryptonightR_instruction61):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction62):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction63):
+	xor	rbp, rsi
+FN_PREFIX(CryptonightR_instruction64):
+	imul	rbx, rdi
+FN_PREFIX(CryptonightR_instruction65):
+	imul	rbx, rdi
+FN_PREFIX(CryptonightR_instruction66):
+	imul	rbx, rdi
+FN_PREFIX(CryptonightR_instruction67):
+	add	rbx, rdi
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction68):
+	sub	rbx, rdi
+FN_PREFIX(CryptonightR_instruction69):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction70):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction71):
+	xor	rbx, rdi
+FN_PREFIX(CryptonightR_instruction72):
+	imul	rsi, rdi
+FN_PREFIX(CryptonightR_instruction73):
+	imul	rsi, rdi
+FN_PREFIX(CryptonightR_instruction74):
+	imul	rsi, rdi
+FN_PREFIX(CryptonightR_instruction75):
+	add	rsi, rdi
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction76):
+	sub	rsi, rdi
+FN_PREFIX(CryptonightR_instruction77):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction78):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction79):
+	xor	rsi, rdi
+FN_PREFIX(CryptonightR_instruction80):
+	imul	rdi, rdi
+FN_PREFIX(CryptonightR_instruction81):
+	imul	rdi, rdi
+FN_PREFIX(CryptonightR_instruction82):
+	imul	rdi, rdi
+FN_PREFIX(CryptonightR_instruction83):
+	add	rdi, r9
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction84):
+	sub	rdi, r9
+FN_PREFIX(CryptonightR_instruction85):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction86):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction87):
+	xor	rdi, r9
+FN_PREFIX(CryptonightR_instruction88):
+	imul	rbp, rdi
+FN_PREFIX(CryptonightR_instruction89):
+	imul	rbp, rdi
+FN_PREFIX(CryptonightR_instruction90):
+	imul	rbp, rdi
+FN_PREFIX(CryptonightR_instruction91):
+	add	rbp, rdi
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction92):
+	sub	rbp, rdi
+FN_PREFIX(CryptonightR_instruction93):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction94):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction95):
+	xor	rbp, rdi
+FN_PREFIX(CryptonightR_instruction96):
+	imul	rbx, rbp
+FN_PREFIX(CryptonightR_instruction97):
+	imul	rbx, rbp
+FN_PREFIX(CryptonightR_instruction98):
+	imul	rbx, rbp
+FN_PREFIX(CryptonightR_instruction99):
+	add	rbx, rbp
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction100):
+	sub	rbx, rbp
+FN_PREFIX(CryptonightR_instruction101):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction102):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction103):
+	xor	rbx, rbp
+FN_PREFIX(CryptonightR_instruction104):
+	imul	rsi, rbp
+FN_PREFIX(CryptonightR_instruction105):
+	imul	rsi, rbp
+FN_PREFIX(CryptonightR_instruction106):
+	imul	rsi, rbp
+FN_PREFIX(CryptonightR_instruction107):
+	add	rsi, rbp
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction108):
+	sub	rsi, rbp
+FN_PREFIX(CryptonightR_instruction109):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction110):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction111):
+	xor	rsi, rbp
+FN_PREFIX(CryptonightR_instruction112):
+	imul	rdi, rbp
+FN_PREFIX(CryptonightR_instruction113):
+	imul	rdi, rbp
+FN_PREFIX(CryptonightR_instruction114):
+	imul	rdi, rbp
+FN_PREFIX(CryptonightR_instruction115):
+	add	rdi, rbp
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction116):
+	sub	rdi, rbp
+FN_PREFIX(CryptonightR_instruction117):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction118):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction119):
+	xor	rdi, rbp
+FN_PREFIX(CryptonightR_instruction120):
+	imul	rbp, rbp
+FN_PREFIX(CryptonightR_instruction121):
+	imul	rbp, rbp
+FN_PREFIX(CryptonightR_instruction122):
+	imul	rbp, rbp
+FN_PREFIX(CryptonightR_instruction123):
+	add	rbp, r9
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction124):
+	sub	rbp, r9
+FN_PREFIX(CryptonightR_instruction125):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction126):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction127):
+	xor	rbp, r9
+FN_PREFIX(CryptonightR_instruction128):
+	imul	rbx, rsp
+FN_PREFIX(CryptonightR_instruction129):
+	imul	rbx, rsp
+FN_PREFIX(CryptonightR_instruction130):
+	imul	rbx, rsp
+FN_PREFIX(CryptonightR_instruction131):
+	add	rbx, rsp
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction132):
+	sub	rbx, rsp
+FN_PREFIX(CryptonightR_instruction133):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction134):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction135):
+	xor	rbx, rsp
+FN_PREFIX(CryptonightR_instruction136):
+	imul	rsi, rsp
+FN_PREFIX(CryptonightR_instruction137):
+	imul	rsi, rsp
+FN_PREFIX(CryptonightR_instruction138):
+	imul	rsi, rsp
+FN_PREFIX(CryptonightR_instruction139):
+	add	rsi, rsp
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction140):
+	sub	rsi, rsp
+FN_PREFIX(CryptonightR_instruction141):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction142):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction143):
+	xor	rsi, rsp
+FN_PREFIX(CryptonightR_instruction144):
+	imul	rdi, rsp
+FN_PREFIX(CryptonightR_instruction145):
+	imul	rdi, rsp
+FN_PREFIX(CryptonightR_instruction146):
+	imul	rdi, rsp
+FN_PREFIX(CryptonightR_instruction147):
+	add	rdi, rsp
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction148):
+	sub	rdi, rsp
+FN_PREFIX(CryptonightR_instruction149):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction150):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction151):
+	xor	rdi, rsp
+FN_PREFIX(CryptonightR_instruction152):
+	imul	rbp, rsp
+FN_PREFIX(CryptonightR_instruction153):
+	imul	rbp, rsp
+FN_PREFIX(CryptonightR_instruction154):
+	imul	rbp, rsp
+FN_PREFIX(CryptonightR_instruction155):
+	add	rbp, rsp
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction156):
+	sub	rbp, rsp
+FN_PREFIX(CryptonightR_instruction157):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction158):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction159):
+	xor	rbp, rsp
+FN_PREFIX(CryptonightR_instruction160):
+	imul	rbx, r15
+FN_PREFIX(CryptonightR_instruction161):
+	imul	rbx, r15
+FN_PREFIX(CryptonightR_instruction162):
+	imul	rbx, r15
+FN_PREFIX(CryptonightR_instruction163):
+	add	rbx, r15
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction164):
+	sub	rbx, r15
+FN_PREFIX(CryptonightR_instruction165):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction166):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction167):
+	xor	rbx, r15
+FN_PREFIX(CryptonightR_instruction168):
+	imul	rsi, r15
+FN_PREFIX(CryptonightR_instruction169):
+	imul	rsi, r15
+FN_PREFIX(CryptonightR_instruction170):
+	imul	rsi, r15
+FN_PREFIX(CryptonightR_instruction171):
+	add	rsi, r15
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction172):
+	sub	rsi, r15
+FN_PREFIX(CryptonightR_instruction173):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction174):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction175):
+	xor	rsi, r15
+FN_PREFIX(CryptonightR_instruction176):
+	imul	rdi, r15
+FN_PREFIX(CryptonightR_instruction177):
+	imul	rdi, r15
+FN_PREFIX(CryptonightR_instruction178):
+	imul	rdi, r15
+FN_PREFIX(CryptonightR_instruction179):
+	add	rdi, r15
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction180):
+	sub	rdi, r15
+FN_PREFIX(CryptonightR_instruction181):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction182):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction183):
+	xor	rdi, r15
+FN_PREFIX(CryptonightR_instruction184):
+	imul	rbp, r15
+FN_PREFIX(CryptonightR_instruction185):
+	imul	rbp, r15
+FN_PREFIX(CryptonightR_instruction186):
+	imul	rbp, r15
+FN_PREFIX(CryptonightR_instruction187):
+	add	rbp, r15
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction188):
+	sub	rbp, r15
+FN_PREFIX(CryptonightR_instruction189):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction190):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction191):
+	xor	rbp, r15
+FN_PREFIX(CryptonightR_instruction192):
+	imul	rbx, rax
+FN_PREFIX(CryptonightR_instruction193):
+	imul	rbx, rax
+FN_PREFIX(CryptonightR_instruction194):
+	imul	rbx, rax
+FN_PREFIX(CryptonightR_instruction195):
+	add	rbx, rax
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction196):
+	sub	rbx, rax
+FN_PREFIX(CryptonightR_instruction197):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction198):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction199):
+	xor	rbx, rax
+FN_PREFIX(CryptonightR_instruction200):
+	imul	rsi, rax
+FN_PREFIX(CryptonightR_instruction201):
+	imul	rsi, rax
+FN_PREFIX(CryptonightR_instruction202):
+	imul	rsi, rax
+FN_PREFIX(CryptonightR_instruction203):
+	add	rsi, rax
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction204):
+	sub	rsi, rax
+FN_PREFIX(CryptonightR_instruction205):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction206):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction207):
+	xor	rsi, rax
+FN_PREFIX(CryptonightR_instruction208):
+	imul	rdi, rax
+FN_PREFIX(CryptonightR_instruction209):
+	imul	rdi, rax
+FN_PREFIX(CryptonightR_instruction210):
+	imul	rdi, rax
+FN_PREFIX(CryptonightR_instruction211):
+	add	rdi, rax
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction212):
+	sub	rdi, rax
+FN_PREFIX(CryptonightR_instruction213):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction214):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction215):
+	xor	rdi, rax
+FN_PREFIX(CryptonightR_instruction216):
+	imul	rbp, rax
+FN_PREFIX(CryptonightR_instruction217):
+	imul	rbp, rax
+FN_PREFIX(CryptonightR_instruction218):
+	imul	rbp, rax
+FN_PREFIX(CryptonightR_instruction219):
+	add	rbp, rax
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction220):
+	sub	rbp, rax
+FN_PREFIX(CryptonightR_instruction221):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction222):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction223):
+	xor	rbp, rax
+FN_PREFIX(CryptonightR_instruction224):
+	imul	rbx, rdx
+FN_PREFIX(CryptonightR_instruction225):
+	imul	rbx, rdx
+FN_PREFIX(CryptonightR_instruction226):
+	imul	rbx, rdx
+FN_PREFIX(CryptonightR_instruction227):
+	add	rbx, rdx
+	add	rbx, 2147483647
+FN_PREFIX(CryptonightR_instruction228):
+	sub	rbx, rdx
+FN_PREFIX(CryptonightR_instruction229):
+	ror	ebx, cl
+FN_PREFIX(CryptonightR_instruction230):
+	rol	ebx, cl
+FN_PREFIX(CryptonightR_instruction231):
+	xor	rbx, rdx
+FN_PREFIX(CryptonightR_instruction232):
+	imul	rsi, rdx
+FN_PREFIX(CryptonightR_instruction233):
+	imul	rsi, rdx
+FN_PREFIX(CryptonightR_instruction234):
+	imul	rsi, rdx
+FN_PREFIX(CryptonightR_instruction235):
+	add	rsi, rdx
+	add	rsi, 2147483647
+FN_PREFIX(CryptonightR_instruction236):
+	sub	rsi, rdx
+FN_PREFIX(CryptonightR_instruction237):
+	ror	esi, cl
+FN_PREFIX(CryptonightR_instruction238):
+	rol	esi, cl
+FN_PREFIX(CryptonightR_instruction239):
+	xor	rsi, rdx
+FN_PREFIX(CryptonightR_instruction240):
+	imul	rdi, rdx
+FN_PREFIX(CryptonightR_instruction241):
+	imul	rdi, rdx
+FN_PREFIX(CryptonightR_instruction242):
+	imul	rdi, rdx
+FN_PREFIX(CryptonightR_instruction243):
+	add	rdi, rdx
+	add	rdi, 2147483647
+FN_PREFIX(CryptonightR_instruction244):
+	sub	rdi, rdx
+FN_PREFIX(CryptonightR_instruction245):
+	ror	edi, cl
+FN_PREFIX(CryptonightR_instruction246):
+	rol	edi, cl
+FN_PREFIX(CryptonightR_instruction247):
+	xor	rdi, rdx
+FN_PREFIX(CryptonightR_instruction248):
+	imul	rbp, rdx
+FN_PREFIX(CryptonightR_instruction249):
+	imul	rbp, rdx
+FN_PREFIX(CryptonightR_instruction250):
+	imul	rbp, rdx
+FN_PREFIX(CryptonightR_instruction251):
+	add	rbp, rdx
+	add	rbp, 2147483647
+FN_PREFIX(CryptonightR_instruction252):
+	sub	rbp, rdx
+FN_PREFIX(CryptonightR_instruction253):
+	ror	ebp, cl
+FN_PREFIX(CryptonightR_instruction254):
+	rol	ebp, cl
+FN_PREFIX(CryptonightR_instruction255):
+	xor	rbp, rdx
+FN_PREFIX(CryptonightR_instruction256):
+	imul	rbx, rbx
+FN_PREFIX(CryptonightR_instruction_mov0):
+
+FN_PREFIX(CryptonightR_instruction_mov1):
+
+FN_PREFIX(CryptonightR_instruction_mov2):
+
+FN_PREFIX(CryptonightR_instruction_mov3):
+
+FN_PREFIX(CryptonightR_instruction_mov4):
+
+FN_PREFIX(CryptonightR_instruction_mov5):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov6):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov7):
+
+FN_PREFIX(CryptonightR_instruction_mov8):
+
+FN_PREFIX(CryptonightR_instruction_mov9):
+
+FN_PREFIX(CryptonightR_instruction_mov10):
+
+FN_PREFIX(CryptonightR_instruction_mov11):
+
+FN_PREFIX(CryptonightR_instruction_mov12):
+
+FN_PREFIX(CryptonightR_instruction_mov13):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov14):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov15):
+
+FN_PREFIX(CryptonightR_instruction_mov16):
+
+FN_PREFIX(CryptonightR_instruction_mov17):
+
+FN_PREFIX(CryptonightR_instruction_mov18):
+
+FN_PREFIX(CryptonightR_instruction_mov19):
+
+FN_PREFIX(CryptonightR_instruction_mov20):
+
+FN_PREFIX(CryptonightR_instruction_mov21):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov22):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov23):
+
+FN_PREFIX(CryptonightR_instruction_mov24):
+
+FN_PREFIX(CryptonightR_instruction_mov25):
+
+FN_PREFIX(CryptonightR_instruction_mov26):
+
+FN_PREFIX(CryptonightR_instruction_mov27):
+
+FN_PREFIX(CryptonightR_instruction_mov28):
+
+FN_PREFIX(CryptonightR_instruction_mov29):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov30):
+	mov	rcx, rbx
+FN_PREFIX(CryptonightR_instruction_mov31):
+
+FN_PREFIX(CryptonightR_instruction_mov32):
+
+FN_PREFIX(CryptonightR_instruction_mov33):
+
+FN_PREFIX(CryptonightR_instruction_mov34):
+
+FN_PREFIX(CryptonightR_instruction_mov35):
+
+FN_PREFIX(CryptonightR_instruction_mov36):
+
+FN_PREFIX(CryptonightR_instruction_mov37):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov38):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov39):
+
+FN_PREFIX(CryptonightR_instruction_mov40):
+
+FN_PREFIX(CryptonightR_instruction_mov41):
+
+FN_PREFIX(CryptonightR_instruction_mov42):
+
+FN_PREFIX(CryptonightR_instruction_mov43):
+
+FN_PREFIX(CryptonightR_instruction_mov44):
+
+FN_PREFIX(CryptonightR_instruction_mov45):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov46):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov47):
+
+FN_PREFIX(CryptonightR_instruction_mov48):
+
+FN_PREFIX(CryptonightR_instruction_mov49):
+
+FN_PREFIX(CryptonightR_instruction_mov50):
+
+FN_PREFIX(CryptonightR_instruction_mov51):
+
+FN_PREFIX(CryptonightR_instruction_mov52):
+
+FN_PREFIX(CryptonightR_instruction_mov53):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov54):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov55):
+
+FN_PREFIX(CryptonightR_instruction_mov56):
+
+FN_PREFIX(CryptonightR_instruction_mov57):
+
+FN_PREFIX(CryptonightR_instruction_mov58):
+
+FN_PREFIX(CryptonightR_instruction_mov59):
+
+FN_PREFIX(CryptonightR_instruction_mov60):
+
+FN_PREFIX(CryptonightR_instruction_mov61):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov62):
+	mov	rcx, rsi
+FN_PREFIX(CryptonightR_instruction_mov63):
+
+FN_PREFIX(CryptonightR_instruction_mov64):
+
+FN_PREFIX(CryptonightR_instruction_mov65):
+
+FN_PREFIX(CryptonightR_instruction_mov66):
+
+FN_PREFIX(CryptonightR_instruction_mov67):
+
+FN_PREFIX(CryptonightR_instruction_mov68):
+
+FN_PREFIX(CryptonightR_instruction_mov69):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov70):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov71):
+
+FN_PREFIX(CryptonightR_instruction_mov72):
+
+FN_PREFIX(CryptonightR_instruction_mov73):
+
+FN_PREFIX(CryptonightR_instruction_mov74):
+
+FN_PREFIX(CryptonightR_instruction_mov75):
+
+FN_PREFIX(CryptonightR_instruction_mov76):
+
+FN_PREFIX(CryptonightR_instruction_mov77):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov78):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov79):
+
+FN_PREFIX(CryptonightR_instruction_mov80):
+
+FN_PREFIX(CryptonightR_instruction_mov81):
+
+FN_PREFIX(CryptonightR_instruction_mov82):
+
+FN_PREFIX(CryptonightR_instruction_mov83):
+
+FN_PREFIX(CryptonightR_instruction_mov84):
+
+FN_PREFIX(CryptonightR_instruction_mov85):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov86):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov87):
+
+FN_PREFIX(CryptonightR_instruction_mov88):
+
+FN_PREFIX(CryptonightR_instruction_mov89):
+
+FN_PREFIX(CryptonightR_instruction_mov90):
+
+FN_PREFIX(CryptonightR_instruction_mov91):
+
+FN_PREFIX(CryptonightR_instruction_mov92):
+
+FN_PREFIX(CryptonightR_instruction_mov93):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov94):
+	mov	rcx, rdi
+FN_PREFIX(CryptonightR_instruction_mov95):
+
+FN_PREFIX(CryptonightR_instruction_mov96):
+
+FN_PREFIX(CryptonightR_instruction_mov97):
+
+FN_PREFIX(CryptonightR_instruction_mov98):
+
+FN_PREFIX(CryptonightR_instruction_mov99):
+
+FN_PREFIX(CryptonightR_instruction_mov100):
+
+FN_PREFIX(CryptonightR_instruction_mov101):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov102):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov103):
+
+FN_PREFIX(CryptonightR_instruction_mov104):
+
+FN_PREFIX(CryptonightR_instruction_mov105):
+
+FN_PREFIX(CryptonightR_instruction_mov106):
+
+FN_PREFIX(CryptonightR_instruction_mov107):
+
+FN_PREFIX(CryptonightR_instruction_mov108):
+
+FN_PREFIX(CryptonightR_instruction_mov109):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov110):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov111):
+
+FN_PREFIX(CryptonightR_instruction_mov112):
+
+FN_PREFIX(CryptonightR_instruction_mov113):
+
+FN_PREFIX(CryptonightR_instruction_mov114):
+
+FN_PREFIX(CryptonightR_instruction_mov115):
+
+FN_PREFIX(CryptonightR_instruction_mov116):
+
+FN_PREFIX(CryptonightR_instruction_mov117):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov118):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov119):
+
+FN_PREFIX(CryptonightR_instruction_mov120):
+
+FN_PREFIX(CryptonightR_instruction_mov121):
+
+FN_PREFIX(CryptonightR_instruction_mov122):
+
+FN_PREFIX(CryptonightR_instruction_mov123):
+
+FN_PREFIX(CryptonightR_instruction_mov124):
+
+FN_PREFIX(CryptonightR_instruction_mov125):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov126):
+	mov	rcx, rbp
+FN_PREFIX(CryptonightR_instruction_mov127):
+
+FN_PREFIX(CryptonightR_instruction_mov128):
+
+FN_PREFIX(CryptonightR_instruction_mov129):
+
+FN_PREFIX(CryptonightR_instruction_mov130):
+
+FN_PREFIX(CryptonightR_instruction_mov131):
+
+FN_PREFIX(CryptonightR_instruction_mov132):
+
+FN_PREFIX(CryptonightR_instruction_mov133):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov134):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov135):
+
+FN_PREFIX(CryptonightR_instruction_mov136):
+
+FN_PREFIX(CryptonightR_instruction_mov137):
+
+FN_PREFIX(CryptonightR_instruction_mov138):
+
+FN_PREFIX(CryptonightR_instruction_mov139):
+
+FN_PREFIX(CryptonightR_instruction_mov140):
+
+FN_PREFIX(CryptonightR_instruction_mov141):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov142):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov143):
+
+FN_PREFIX(CryptonightR_instruction_mov144):
+
+FN_PREFIX(CryptonightR_instruction_mov145):
+
+FN_PREFIX(CryptonightR_instruction_mov146):
+
+FN_PREFIX(CryptonightR_instruction_mov147):
+
+FN_PREFIX(CryptonightR_instruction_mov148):
+
+FN_PREFIX(CryptonightR_instruction_mov149):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov150):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov151):
+
+FN_PREFIX(CryptonightR_instruction_mov152):
+
+FN_PREFIX(CryptonightR_instruction_mov153):
+
+FN_PREFIX(CryptonightR_instruction_mov154):
+
+FN_PREFIX(CryptonightR_instruction_mov155):
+
+FN_PREFIX(CryptonightR_instruction_mov156):
+
+FN_PREFIX(CryptonightR_instruction_mov157):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov158):
+	mov	rcx, rsp
+FN_PREFIX(CryptonightR_instruction_mov159):
+
+FN_PREFIX(CryptonightR_instruction_mov160):
+
+FN_PREFIX(CryptonightR_instruction_mov161):
+
+FN_PREFIX(CryptonightR_instruction_mov162):
+
+FN_PREFIX(CryptonightR_instruction_mov163):
+
+FN_PREFIX(CryptonightR_instruction_mov164):
+
+FN_PREFIX(CryptonightR_instruction_mov165):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov166):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov167):
+
+FN_PREFIX(CryptonightR_instruction_mov168):
+
+FN_PREFIX(CryptonightR_instruction_mov169):
+
+FN_PREFIX(CryptonightR_instruction_mov170):
+
+FN_PREFIX(CryptonightR_instruction_mov171):
+
+FN_PREFIX(CryptonightR_instruction_mov172):
+
+FN_PREFIX(CryptonightR_instruction_mov173):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov174):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov175):
+
+FN_PREFIX(CryptonightR_instruction_mov176):
+
+FN_PREFIX(CryptonightR_instruction_mov177):
+
+FN_PREFIX(CryptonightR_instruction_mov178):
+
+FN_PREFIX(CryptonightR_instruction_mov179):
+
+FN_PREFIX(CryptonightR_instruction_mov180):
+
+FN_PREFIX(CryptonightR_instruction_mov181):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov182):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov183):
+
+FN_PREFIX(CryptonightR_instruction_mov184):
+
+FN_PREFIX(CryptonightR_instruction_mov185):
+
+FN_PREFIX(CryptonightR_instruction_mov186):
+
+FN_PREFIX(CryptonightR_instruction_mov187):
+
+FN_PREFIX(CryptonightR_instruction_mov188):
+
+FN_PREFIX(CryptonightR_instruction_mov189):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov190):
+	mov	rcx, r15
+FN_PREFIX(CryptonightR_instruction_mov191):
+
+FN_PREFIX(CryptonightR_instruction_mov192):
+
+FN_PREFIX(CryptonightR_instruction_mov193):
+
+FN_PREFIX(CryptonightR_instruction_mov194):
+
+FN_PREFIX(CryptonightR_instruction_mov195):
+
+FN_PREFIX(CryptonightR_instruction_mov196):
+
+FN_PREFIX(CryptonightR_instruction_mov197):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov198):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov199):
+
+FN_PREFIX(CryptonightR_instruction_mov200):
+
+FN_PREFIX(CryptonightR_instruction_mov201):
+
+FN_PREFIX(CryptonightR_instruction_mov202):
+
+FN_PREFIX(CryptonightR_instruction_mov203):
+
+FN_PREFIX(CryptonightR_instruction_mov204):
+
+FN_PREFIX(CryptonightR_instruction_mov205):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov206):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov207):
+
+FN_PREFIX(CryptonightR_instruction_mov208):
+
+FN_PREFIX(CryptonightR_instruction_mov209):
+
+FN_PREFIX(CryptonightR_instruction_mov210):
+
+FN_PREFIX(CryptonightR_instruction_mov211):
+
+FN_PREFIX(CryptonightR_instruction_mov212):
+
+FN_PREFIX(CryptonightR_instruction_mov213):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov214):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov215):
+
+FN_PREFIX(CryptonightR_instruction_mov216):
+
+FN_PREFIX(CryptonightR_instruction_mov217):
+
+FN_PREFIX(CryptonightR_instruction_mov218):
+
+FN_PREFIX(CryptonightR_instruction_mov219):
+
+FN_PREFIX(CryptonightR_instruction_mov220):
+
+FN_PREFIX(CryptonightR_instruction_mov221):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov222):
+	mov	rcx, rax
+FN_PREFIX(CryptonightR_instruction_mov223):
+
+FN_PREFIX(CryptonightR_instruction_mov224):
+
+FN_PREFIX(CryptonightR_instruction_mov225):
+
+FN_PREFIX(CryptonightR_instruction_mov226):
+
+FN_PREFIX(CryptonightR_instruction_mov227):
+
+FN_PREFIX(CryptonightR_instruction_mov228):
+
+FN_PREFIX(CryptonightR_instruction_mov229):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov230):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov231):
+
+FN_PREFIX(CryptonightR_instruction_mov232):
+
+FN_PREFIX(CryptonightR_instruction_mov233):
+
+FN_PREFIX(CryptonightR_instruction_mov234):
+
+FN_PREFIX(CryptonightR_instruction_mov235):
+
+FN_PREFIX(CryptonightR_instruction_mov236):
+
+FN_PREFIX(CryptonightR_instruction_mov237):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov238):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov239):
+
+FN_PREFIX(CryptonightR_instruction_mov240):
+
+FN_PREFIX(CryptonightR_instruction_mov241):
+
+FN_PREFIX(CryptonightR_instruction_mov242):
+
+FN_PREFIX(CryptonightR_instruction_mov243):
+
+FN_PREFIX(CryptonightR_instruction_mov244):
+
+FN_PREFIX(CryptonightR_instruction_mov245):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov246):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov247):
+
+FN_PREFIX(CryptonightR_instruction_mov248):
+
+FN_PREFIX(CryptonightR_instruction_mov249):
+
+FN_PREFIX(CryptonightR_instruction_mov250):
+
+FN_PREFIX(CryptonightR_instruction_mov251):
+
+FN_PREFIX(CryptonightR_instruction_mov252):
+
+FN_PREFIX(CryptonightR_instruction_mov253):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov254):
+	mov	rcx, rdx
+FN_PREFIX(CryptonightR_instruction_mov255):
+
+FN_PREFIX(CryptonightR_instruction_mov256):
diff --git a/src/crypto/asm/win/CryptonightR_template.asm b/src/crypto/asm/win/CryptonightR_template.asm
new file mode 100644
index 00000000..250eca3d
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightR_template.asm
@@ -0,0 +1,1585 @@
+; Auto-generated file, do not edit
+
+_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE
+PUBLIC CryptonightR_instruction0
+PUBLIC CryptonightR_instruction1
+PUBLIC CryptonightR_instruction2
+PUBLIC CryptonightR_instruction3
+PUBLIC CryptonightR_instruction4
+PUBLIC CryptonightR_instruction5
+PUBLIC CryptonightR_instruction6
+PUBLIC CryptonightR_instruction7
+PUBLIC CryptonightR_instruction8
+PUBLIC CryptonightR_instruction9
+PUBLIC CryptonightR_instruction10
+PUBLIC CryptonightR_instruction11
+PUBLIC CryptonightR_instruction12
+PUBLIC CryptonightR_instruction13
+PUBLIC CryptonightR_instruction14
+PUBLIC CryptonightR_instruction15
+PUBLIC CryptonightR_instruction16
+PUBLIC CryptonightR_instruction17
+PUBLIC CryptonightR_instruction18
+PUBLIC CryptonightR_instruction19
+PUBLIC CryptonightR_instruction20
+PUBLIC CryptonightR_instruction21
+PUBLIC CryptonightR_instruction22
+PUBLIC CryptonightR_instruction23
+PUBLIC CryptonightR_instruction24
+PUBLIC CryptonightR_instruction25
+PUBLIC CryptonightR_instruction26
+PUBLIC CryptonightR_instruction27
+PUBLIC CryptonightR_instruction28
+PUBLIC CryptonightR_instruction29
+PUBLIC CryptonightR_instruction30
+PUBLIC CryptonightR_instruction31
+PUBLIC CryptonightR_instruction32
+PUBLIC CryptonightR_instruction33
+PUBLIC CryptonightR_instruction34
+PUBLIC CryptonightR_instruction35
+PUBLIC CryptonightR_instruction36
+PUBLIC CryptonightR_instruction37
+PUBLIC CryptonightR_instruction38
+PUBLIC CryptonightR_instruction39
+PUBLIC CryptonightR_instruction40
+PUBLIC CryptonightR_instruction41
+PUBLIC CryptonightR_instruction42
+PUBLIC CryptonightR_instruction43
+PUBLIC CryptonightR_instruction44
+PUBLIC CryptonightR_instruction45
+PUBLIC CryptonightR_instruction46
+PUBLIC CryptonightR_instruction47
+PUBLIC CryptonightR_instruction48
+PUBLIC CryptonightR_instruction49
+PUBLIC CryptonightR_instruction50
+PUBLIC CryptonightR_instruction51
+PUBLIC CryptonightR_instruction52
+PUBLIC CryptonightR_instruction53
+PUBLIC CryptonightR_instruction54
+PUBLIC CryptonightR_instruction55
+PUBLIC CryptonightR_instruction56
+PUBLIC CryptonightR_instruction57
+PUBLIC CryptonightR_instruction58
+PUBLIC CryptonightR_instruction59
+PUBLIC CryptonightR_instruction60
+PUBLIC CryptonightR_instruction61
+PUBLIC CryptonightR_instruction62
+PUBLIC CryptonightR_instruction63
+PUBLIC CryptonightR_instruction64
+PUBLIC CryptonightR_instruction65
+PUBLIC CryptonightR_instruction66
+PUBLIC CryptonightR_instruction67
+PUBLIC CryptonightR_instruction68
+PUBLIC CryptonightR_instruction69
+PUBLIC CryptonightR_instruction70
+PUBLIC CryptonightR_instruction71
+PUBLIC CryptonightR_instruction72
+PUBLIC CryptonightR_instruction73
+PUBLIC CryptonightR_instruction74
+PUBLIC CryptonightR_instruction75
+PUBLIC CryptonightR_instruction76
+PUBLIC CryptonightR_instruction77
+PUBLIC CryptonightR_instruction78
+PUBLIC CryptonightR_instruction79
+PUBLIC CryptonightR_instruction80
+PUBLIC CryptonightR_instruction81
+PUBLIC CryptonightR_instruction82
+PUBLIC CryptonightR_instruction83
+PUBLIC CryptonightR_instruction84
+PUBLIC CryptonightR_instruction85
+PUBLIC CryptonightR_instruction86
+PUBLIC CryptonightR_instruction87
+PUBLIC CryptonightR_instruction88
+PUBLIC CryptonightR_instruction89
+PUBLIC CryptonightR_instruction90
+PUBLIC CryptonightR_instruction91
+PUBLIC CryptonightR_instruction92
+PUBLIC CryptonightR_instruction93
+PUBLIC CryptonightR_instruction94
+PUBLIC CryptonightR_instruction95
+PUBLIC CryptonightR_instruction96
+PUBLIC CryptonightR_instruction97
+PUBLIC CryptonightR_instruction98
+PUBLIC CryptonightR_instruction99
+PUBLIC CryptonightR_instruction100
+PUBLIC CryptonightR_instruction101
+PUBLIC CryptonightR_instruction102
+PUBLIC CryptonightR_instruction103
+PUBLIC CryptonightR_instruction104
+PUBLIC CryptonightR_instruction105
+PUBLIC CryptonightR_instruction106
+PUBLIC CryptonightR_instruction107
+PUBLIC CryptonightR_instruction108
+PUBLIC CryptonightR_instruction109
+PUBLIC CryptonightR_instruction110
+PUBLIC CryptonightR_instruction111
+PUBLIC CryptonightR_instruction112
+PUBLIC CryptonightR_instruction113
+PUBLIC CryptonightR_instruction114
+PUBLIC CryptonightR_instruction115
+PUBLIC CryptonightR_instruction116
+PUBLIC CryptonightR_instruction117
+PUBLIC CryptonightR_instruction118
+PUBLIC CryptonightR_instruction119
+PUBLIC CryptonightR_instruction120
+PUBLIC CryptonightR_instruction121
+PUBLIC CryptonightR_instruction122
+PUBLIC CryptonightR_instruction123
+PUBLIC CryptonightR_instruction124
+PUBLIC CryptonightR_instruction125
+PUBLIC CryptonightR_instruction126
+PUBLIC CryptonightR_instruction127
+PUBLIC CryptonightR_instruction128
+PUBLIC CryptonightR_instruction129
+PUBLIC CryptonightR_instruction130
+PUBLIC CryptonightR_instruction131
+PUBLIC CryptonightR_instruction132
+PUBLIC CryptonightR_instruction133
+PUBLIC CryptonightR_instruction134
+PUBLIC CryptonightR_instruction135
+PUBLIC CryptonightR_instruction136
+PUBLIC CryptonightR_instruction137
+PUBLIC CryptonightR_instruction138
+PUBLIC CryptonightR_instruction139
+PUBLIC CryptonightR_instruction140
+PUBLIC CryptonightR_instruction141
+PUBLIC CryptonightR_instruction142
+PUBLIC CryptonightR_instruction143
+PUBLIC CryptonightR_instruction144
+PUBLIC CryptonightR_instruction145
+PUBLIC CryptonightR_instruction146
+PUBLIC CryptonightR_instruction147
+PUBLIC CryptonightR_instruction148
+PUBLIC CryptonightR_instruction149
+PUBLIC CryptonightR_instruction150
+PUBLIC CryptonightR_instruction151
+PUBLIC CryptonightR_instruction152
+PUBLIC CryptonightR_instruction153
+PUBLIC CryptonightR_instruction154
+PUBLIC CryptonightR_instruction155
+PUBLIC CryptonightR_instruction156
+PUBLIC CryptonightR_instruction157
+PUBLIC CryptonightR_instruction158
+PUBLIC CryptonightR_instruction159
+PUBLIC CryptonightR_instruction160
+PUBLIC CryptonightR_instruction161
+PUBLIC CryptonightR_instruction162
+PUBLIC CryptonightR_instruction163
+PUBLIC CryptonightR_instruction164
+PUBLIC CryptonightR_instruction165
+PUBLIC CryptonightR_instruction166
+PUBLIC CryptonightR_instruction167
+PUBLIC CryptonightR_instruction168
+PUBLIC CryptonightR_instruction169
+PUBLIC CryptonightR_instruction170
+PUBLIC CryptonightR_instruction171
+PUBLIC CryptonightR_instruction172
+PUBLIC CryptonightR_instruction173
+PUBLIC CryptonightR_instruction174
+PUBLIC CryptonightR_instruction175
+PUBLIC CryptonightR_instruction176
+PUBLIC CryptonightR_instruction177
+PUBLIC CryptonightR_instruction178
+PUBLIC CryptonightR_instruction179
+PUBLIC CryptonightR_instruction180
+PUBLIC CryptonightR_instruction181
+PUBLIC CryptonightR_instruction182
+PUBLIC CryptonightR_instruction183
+PUBLIC CryptonightR_instruction184
+PUBLIC CryptonightR_instruction185
+PUBLIC CryptonightR_instruction186
+PUBLIC CryptonightR_instruction187
+PUBLIC CryptonightR_instruction188
+PUBLIC CryptonightR_instruction189
+PUBLIC CryptonightR_instruction190
+PUBLIC CryptonightR_instruction191
+PUBLIC CryptonightR_instruction192
+PUBLIC CryptonightR_instruction193
+PUBLIC CryptonightR_instruction194
+PUBLIC CryptonightR_instruction195
+PUBLIC CryptonightR_instruction196
+PUBLIC CryptonightR_instruction197
+PUBLIC CryptonightR_instruction198
+PUBLIC CryptonightR_instruction199
+PUBLIC CryptonightR_instruction200
+PUBLIC CryptonightR_instruction201
+PUBLIC CryptonightR_instruction202
+PUBLIC CryptonightR_instruction203
+PUBLIC CryptonightR_instruction204
+PUBLIC CryptonightR_instruction205
+PUBLIC CryptonightR_instruction206
+PUBLIC CryptonightR_instruction207
+PUBLIC CryptonightR_instruction208
+PUBLIC CryptonightR_instruction209
+PUBLIC CryptonightR_instruction210
+PUBLIC CryptonightR_instruction211
+PUBLIC CryptonightR_instruction212
+PUBLIC CryptonightR_instruction213
+PUBLIC CryptonightR_instruction214
+PUBLIC CryptonightR_instruction215
+PUBLIC CryptonightR_instruction216
+PUBLIC CryptonightR_instruction217
+PUBLIC CryptonightR_instruction218
+PUBLIC CryptonightR_instruction219
+PUBLIC CryptonightR_instruction220
+PUBLIC CryptonightR_instruction221
+PUBLIC CryptonightR_instruction222
+PUBLIC CryptonightR_instruction223
+PUBLIC CryptonightR_instruction224
+PUBLIC CryptonightR_instruction225
+PUBLIC CryptonightR_instruction226
+PUBLIC CryptonightR_instruction227
+PUBLIC CryptonightR_instruction228
+PUBLIC CryptonightR_instruction229
+PUBLIC CryptonightR_instruction230
+PUBLIC CryptonightR_instruction231
+PUBLIC CryptonightR_instruction232
+PUBLIC CryptonightR_instruction233
+PUBLIC CryptonightR_instruction234
+PUBLIC CryptonightR_instruction235
+PUBLIC CryptonightR_instruction236
+PUBLIC CryptonightR_instruction237
+PUBLIC CryptonightR_instruction238
+PUBLIC CryptonightR_instruction239
+PUBLIC CryptonightR_instruction240
+PUBLIC CryptonightR_instruction241
+PUBLIC CryptonightR_instruction242
+PUBLIC CryptonightR_instruction243
+PUBLIC CryptonightR_instruction244
+PUBLIC CryptonightR_instruction245
+PUBLIC CryptonightR_instruction246
+PUBLIC CryptonightR_instruction247
+PUBLIC CryptonightR_instruction248
+PUBLIC CryptonightR_instruction249
+PUBLIC CryptonightR_instruction250
+PUBLIC CryptonightR_instruction251
+PUBLIC CryptonightR_instruction252
+PUBLIC CryptonightR_instruction253
+PUBLIC CryptonightR_instruction254
+PUBLIC CryptonightR_instruction255
+PUBLIC CryptonightR_instruction256
+PUBLIC CryptonightR_instruction_mov0
+PUBLIC CryptonightR_instruction_mov1
+PUBLIC CryptonightR_instruction_mov2
+PUBLIC CryptonightR_instruction_mov3
+PUBLIC CryptonightR_instruction_mov4
+PUBLIC CryptonightR_instruction_mov5
+PUBLIC CryptonightR_instruction_mov6
+PUBLIC CryptonightR_instruction_mov7
+PUBLIC CryptonightR_instruction_mov8
+PUBLIC CryptonightR_instruction_mov9
+PUBLIC CryptonightR_instruction_mov10
+PUBLIC CryptonightR_instruction_mov11
+PUBLIC CryptonightR_instruction_mov12
+PUBLIC CryptonightR_instruction_mov13
+PUBLIC CryptonightR_instruction_mov14
+PUBLIC CryptonightR_instruction_mov15
+PUBLIC CryptonightR_instruction_mov16
+PUBLIC CryptonightR_instruction_mov17
+PUBLIC CryptonightR_instruction_mov18
+PUBLIC CryptonightR_instruction_mov19
+PUBLIC CryptonightR_instruction_mov20
+PUBLIC CryptonightR_instruction_mov21
+PUBLIC CryptonightR_instruction_mov22
+PUBLIC CryptonightR_instruction_mov23
+PUBLIC CryptonightR_instruction_mov24
+PUBLIC CryptonightR_instruction_mov25
+PUBLIC CryptonightR_instruction_mov26
+PUBLIC CryptonightR_instruction_mov27
+PUBLIC CryptonightR_instruction_mov28
+PUBLIC CryptonightR_instruction_mov29
+PUBLIC CryptonightR_instruction_mov30
+PUBLIC CryptonightR_instruction_mov31
+PUBLIC CryptonightR_instruction_mov32
+PUBLIC CryptonightR_instruction_mov33
+PUBLIC CryptonightR_instruction_mov34
+PUBLIC CryptonightR_instruction_mov35
+PUBLIC CryptonightR_instruction_mov36
+PUBLIC CryptonightR_instruction_mov37
+PUBLIC CryptonightR_instruction_mov38
+PUBLIC CryptonightR_instruction_mov39
+PUBLIC CryptonightR_instruction_mov40
+PUBLIC CryptonightR_instruction_mov41
+PUBLIC CryptonightR_instruction_mov42
+PUBLIC CryptonightR_instruction_mov43
+PUBLIC CryptonightR_instruction_mov44
+PUBLIC CryptonightR_instruction_mov45
+PUBLIC CryptonightR_instruction_mov46
+PUBLIC CryptonightR_instruction_mov47
+PUBLIC CryptonightR_instruction_mov48
+PUBLIC CryptonightR_instruction_mov49
+PUBLIC CryptonightR_instruction_mov50
+PUBLIC CryptonightR_instruction_mov51
+PUBLIC CryptonightR_instruction_mov52
+PUBLIC CryptonightR_instruction_mov53
+PUBLIC CryptonightR_instruction_mov54
+PUBLIC CryptonightR_instruction_mov55
+PUBLIC CryptonightR_instruction_mov56
+PUBLIC CryptonightR_instruction_mov57
+PUBLIC CryptonightR_instruction_mov58
+PUBLIC CryptonightR_instruction_mov59
+PUBLIC CryptonightR_instruction_mov60
+PUBLIC CryptonightR_instruction_mov61
+PUBLIC CryptonightR_instruction_mov62
+PUBLIC CryptonightR_instruction_mov63
+PUBLIC CryptonightR_instruction_mov64
+PUBLIC CryptonightR_instruction_mov65
+PUBLIC CryptonightR_instruction_mov66
+PUBLIC CryptonightR_instruction_mov67
+PUBLIC CryptonightR_instruction_mov68
+PUBLIC CryptonightR_instruction_mov69
+PUBLIC CryptonightR_instruction_mov70
+PUBLIC CryptonightR_instruction_mov71
+PUBLIC CryptonightR_instruction_mov72
+PUBLIC CryptonightR_instruction_mov73
+PUBLIC CryptonightR_instruction_mov74
+PUBLIC CryptonightR_instruction_mov75
+PUBLIC CryptonightR_instruction_mov76
+PUBLIC CryptonightR_instruction_mov77
+PUBLIC CryptonightR_instruction_mov78
+PUBLIC CryptonightR_instruction_mov79
+PUBLIC CryptonightR_instruction_mov80
+PUBLIC CryptonightR_instruction_mov81
+PUBLIC CryptonightR_instruction_mov82
+PUBLIC CryptonightR_instruction_mov83
+PUBLIC CryptonightR_instruction_mov84
+PUBLIC CryptonightR_instruction_mov85
+PUBLIC CryptonightR_instruction_mov86
+PUBLIC CryptonightR_instruction_mov87
+PUBLIC CryptonightR_instruction_mov88
+PUBLIC CryptonightR_instruction_mov89
+PUBLIC CryptonightR_instruction_mov90
+PUBLIC CryptonightR_instruction_mov91
+PUBLIC CryptonightR_instruction_mov92
+PUBLIC CryptonightR_instruction_mov93
+PUBLIC CryptonightR_instruction_mov94
+PUBLIC CryptonightR_instruction_mov95
+PUBLIC CryptonightR_instruction_mov96
+PUBLIC CryptonightR_instruction_mov97
+PUBLIC CryptonightR_instruction_mov98
+PUBLIC CryptonightR_instruction_mov99
+PUBLIC CryptonightR_instruction_mov100
+PUBLIC CryptonightR_instruction_mov101
+PUBLIC CryptonightR_instruction_mov102
+PUBLIC CryptonightR_instruction_mov103
+PUBLIC CryptonightR_instruction_mov104
+PUBLIC CryptonightR_instruction_mov105
+PUBLIC CryptonightR_instruction_mov106
+PUBLIC CryptonightR_instruction_mov107
+PUBLIC CryptonightR_instruction_mov108
+PUBLIC CryptonightR_instruction_mov109
+PUBLIC CryptonightR_instruction_mov110
+PUBLIC CryptonightR_instruction_mov111
+PUBLIC CryptonightR_instruction_mov112
+PUBLIC CryptonightR_instruction_mov113
+PUBLIC CryptonightR_instruction_mov114
+PUBLIC CryptonightR_instruction_mov115
+PUBLIC CryptonightR_instruction_mov116
+PUBLIC CryptonightR_instruction_mov117
+PUBLIC CryptonightR_instruction_mov118
+PUBLIC CryptonightR_instruction_mov119
+PUBLIC CryptonightR_instruction_mov120
+PUBLIC CryptonightR_instruction_mov121
+PUBLIC CryptonightR_instruction_mov122
+PUBLIC CryptonightR_instruction_mov123
+PUBLIC CryptonightR_instruction_mov124
+PUBLIC CryptonightR_instruction_mov125
+PUBLIC CryptonightR_instruction_mov126
+PUBLIC CryptonightR_instruction_mov127
+PUBLIC CryptonightR_instruction_mov128
+PUBLIC CryptonightR_instruction_mov129
+PUBLIC CryptonightR_instruction_mov130
+PUBLIC CryptonightR_instruction_mov131
+PUBLIC CryptonightR_instruction_mov132
+PUBLIC CryptonightR_instruction_mov133
+PUBLIC CryptonightR_instruction_mov134
+PUBLIC CryptonightR_instruction_mov135
+PUBLIC CryptonightR_instruction_mov136
+PUBLIC CryptonightR_instruction_mov137
+PUBLIC CryptonightR_instruction_mov138
+PUBLIC CryptonightR_instruction_mov139
+PUBLIC CryptonightR_instruction_mov140
+PUBLIC CryptonightR_instruction_mov141
+PUBLIC CryptonightR_instruction_mov142
+PUBLIC CryptonightR_instruction_mov143
+PUBLIC CryptonightR_instruction_mov144
+PUBLIC CryptonightR_instruction_mov145
+PUBLIC CryptonightR_instruction_mov146
+PUBLIC CryptonightR_instruction_mov147
+PUBLIC CryptonightR_instruction_mov148
+PUBLIC CryptonightR_instruction_mov149
+PUBLIC CryptonightR_instruction_mov150
+PUBLIC CryptonightR_instruction_mov151
+PUBLIC CryptonightR_instruction_mov152
+PUBLIC CryptonightR_instruction_mov153
+PUBLIC CryptonightR_instruction_mov154
+PUBLIC CryptonightR_instruction_mov155
+PUBLIC CryptonightR_instruction_mov156
+PUBLIC CryptonightR_instruction_mov157
+PUBLIC CryptonightR_instruction_mov158
+PUBLIC CryptonightR_instruction_mov159
+PUBLIC CryptonightR_instruction_mov160
+PUBLIC CryptonightR_instruction_mov161
+PUBLIC CryptonightR_instruction_mov162
+PUBLIC CryptonightR_instruction_mov163
+PUBLIC CryptonightR_instruction_mov164
+PUBLIC CryptonightR_instruction_mov165
+PUBLIC CryptonightR_instruction_mov166
+PUBLIC CryptonightR_instruction_mov167
+PUBLIC CryptonightR_instruction_mov168
+PUBLIC CryptonightR_instruction_mov169
+PUBLIC CryptonightR_instruction_mov170
+PUBLIC CryptonightR_instruction_mov171
+PUBLIC CryptonightR_instruction_mov172
+PUBLIC CryptonightR_instruction_mov173
+PUBLIC CryptonightR_instruction_mov174
+PUBLIC CryptonightR_instruction_mov175
+PUBLIC CryptonightR_instruction_mov176
+PUBLIC CryptonightR_instruction_mov177
+PUBLIC CryptonightR_instruction_mov178
+PUBLIC CryptonightR_instruction_mov179
+PUBLIC CryptonightR_instruction_mov180
+PUBLIC CryptonightR_instruction_mov181
+PUBLIC CryptonightR_instruction_mov182
+PUBLIC CryptonightR_instruction_mov183
+PUBLIC CryptonightR_instruction_mov184
+PUBLIC CryptonightR_instruction_mov185
+PUBLIC CryptonightR_instruction_mov186
+PUBLIC CryptonightR_instruction_mov187
+PUBLIC CryptonightR_instruction_mov188
+PUBLIC CryptonightR_instruction_mov189
+PUBLIC CryptonightR_instruction_mov190
+PUBLIC CryptonightR_instruction_mov191
+PUBLIC CryptonightR_instruction_mov192
+PUBLIC CryptonightR_instruction_mov193
+PUBLIC CryptonightR_instruction_mov194
+PUBLIC CryptonightR_instruction_mov195
+PUBLIC CryptonightR_instruction_mov196
+PUBLIC CryptonightR_instruction_mov197
+PUBLIC CryptonightR_instruction_mov198
+PUBLIC CryptonightR_instruction_mov199
+PUBLIC CryptonightR_instruction_mov200
+PUBLIC CryptonightR_instruction_mov201
+PUBLIC CryptonightR_instruction_mov202
+PUBLIC CryptonightR_instruction_mov203
+PUBLIC CryptonightR_instruction_mov204
+PUBLIC CryptonightR_instruction_mov205
+PUBLIC CryptonightR_instruction_mov206
+PUBLIC CryptonightR_instruction_mov207
+PUBLIC CryptonightR_instruction_mov208
+PUBLIC CryptonightR_instruction_mov209
+PUBLIC CryptonightR_instruction_mov210
+PUBLIC CryptonightR_instruction_mov211
+PUBLIC CryptonightR_instruction_mov212
+PUBLIC CryptonightR_instruction_mov213
+PUBLIC CryptonightR_instruction_mov214
+PUBLIC CryptonightR_instruction_mov215
+PUBLIC CryptonightR_instruction_mov216
+PUBLIC CryptonightR_instruction_mov217
+PUBLIC CryptonightR_instruction_mov218
+PUBLIC CryptonightR_instruction_mov219
+PUBLIC CryptonightR_instruction_mov220
+PUBLIC CryptonightR_instruction_mov221
+PUBLIC CryptonightR_instruction_mov222
+PUBLIC CryptonightR_instruction_mov223
+PUBLIC CryptonightR_instruction_mov224
+PUBLIC CryptonightR_instruction_mov225
+PUBLIC CryptonightR_instruction_mov226
+PUBLIC CryptonightR_instruction_mov227
+PUBLIC CryptonightR_instruction_mov228
+PUBLIC CryptonightR_instruction_mov229
+PUBLIC CryptonightR_instruction_mov230
+PUBLIC CryptonightR_instruction_mov231
+PUBLIC CryptonightR_instruction_mov232
+PUBLIC CryptonightR_instruction_mov233
+PUBLIC CryptonightR_instruction_mov234
+PUBLIC CryptonightR_instruction_mov235
+PUBLIC CryptonightR_instruction_mov236
+PUBLIC CryptonightR_instruction_mov237
+PUBLIC CryptonightR_instruction_mov238
+PUBLIC CryptonightR_instruction_mov239
+PUBLIC CryptonightR_instruction_mov240
+PUBLIC CryptonightR_instruction_mov241
+PUBLIC CryptonightR_instruction_mov242
+PUBLIC CryptonightR_instruction_mov243
+PUBLIC CryptonightR_instruction_mov244
+PUBLIC CryptonightR_instruction_mov245
+PUBLIC CryptonightR_instruction_mov246
+PUBLIC CryptonightR_instruction_mov247
+PUBLIC CryptonightR_instruction_mov248
+PUBLIC CryptonightR_instruction_mov249
+PUBLIC CryptonightR_instruction_mov250
+PUBLIC CryptonightR_instruction_mov251
+PUBLIC CryptonightR_instruction_mov252
+PUBLIC CryptonightR_instruction_mov253
+PUBLIC CryptonightR_instruction_mov254
+PUBLIC CryptonightR_instruction_mov255
+PUBLIC CryptonightR_instruction_mov256
+
+INCLUDE CryptonightWOW_template_win.inc
+INCLUDE CryptonightR_template_win.inc
+INCLUDE CryptonightWOW_soft_aes_template_win.inc
+INCLUDE CryptonightR_soft_aes_template_win.inc
+
+CryptonightR_instruction0:
+	imul	rbx, rbx
+CryptonightR_instruction1:
+	imul	rbx, rbx
+CryptonightR_instruction2:
+	imul	rbx, rbx
+CryptonightR_instruction3:
+	add	rbx, r9
+	add	rbx, 2147483647
+CryptonightR_instruction4:
+	sub	rbx, r9
+CryptonightR_instruction5:
+	ror	ebx, cl
+CryptonightR_instruction6:
+	rol	ebx, cl
+CryptonightR_instruction7:
+	xor	rbx, r9
+CryptonightR_instruction8:
+	imul	rsi, rbx
+CryptonightR_instruction9:
+	imul	rsi, rbx
+CryptonightR_instruction10:
+	imul	rsi, rbx
+CryptonightR_instruction11:
+	add	rsi, rbx
+	add	rsi, 2147483647
+CryptonightR_instruction12:
+	sub	rsi, rbx
+CryptonightR_instruction13:
+	ror	esi, cl
+CryptonightR_instruction14:
+	rol	esi, cl
+CryptonightR_instruction15:
+	xor	rsi, rbx
+CryptonightR_instruction16:
+	imul	rdi, rbx
+CryptonightR_instruction17:
+	imul	rdi, rbx
+CryptonightR_instruction18:
+	imul	rdi, rbx
+CryptonightR_instruction19:
+	add	rdi, rbx
+	add	rdi, 2147483647
+CryptonightR_instruction20:
+	sub	rdi, rbx
+CryptonightR_instruction21:
+	ror	edi, cl
+CryptonightR_instruction22:
+	rol	edi, cl
+CryptonightR_instruction23:
+	xor	rdi, rbx
+CryptonightR_instruction24:
+	imul	rbp, rbx
+CryptonightR_instruction25:
+	imul	rbp, rbx
+CryptonightR_instruction26:
+	imul	rbp, rbx
+CryptonightR_instruction27:
+	add	rbp, rbx
+	add	rbp, 2147483647
+CryptonightR_instruction28:
+	sub	rbp, rbx
+CryptonightR_instruction29:
+	ror	ebp, cl
+CryptonightR_instruction30:
+	rol	ebp, cl
+CryptonightR_instruction31:
+	xor	rbp, rbx
+CryptonightR_instruction32:
+	imul	rbx, rsi
+CryptonightR_instruction33:
+	imul	rbx, rsi
+CryptonightR_instruction34:
+	imul	rbx, rsi
+CryptonightR_instruction35:
+	add	rbx, rsi
+	add	rbx, 2147483647
+CryptonightR_instruction36:
+	sub	rbx, rsi
+CryptonightR_instruction37:
+	ror	ebx, cl
+CryptonightR_instruction38:
+	rol	ebx, cl
+CryptonightR_instruction39:
+	xor	rbx, rsi
+CryptonightR_instruction40:
+	imul	rsi, rsi
+CryptonightR_instruction41:
+	imul	rsi, rsi
+CryptonightR_instruction42:
+	imul	rsi, rsi
+CryptonightR_instruction43:
+	add	rsi, r9
+	add	rsi, 2147483647
+CryptonightR_instruction44:
+	sub	rsi, r9
+CryptonightR_instruction45:
+	ror	esi, cl
+CryptonightR_instruction46:
+	rol	esi, cl
+CryptonightR_instruction47:
+	xor	rsi, r9
+CryptonightR_instruction48:
+	imul	rdi, rsi
+CryptonightR_instruction49:
+	imul	rdi, rsi
+CryptonightR_instruction50:
+	imul	rdi, rsi
+CryptonightR_instruction51:
+	add	rdi, rsi
+	add	rdi, 2147483647
+CryptonightR_instruction52:
+	sub	rdi, rsi
+CryptonightR_instruction53:
+	ror	edi, cl
+CryptonightR_instruction54:
+	rol	edi, cl
+CryptonightR_instruction55:
+	xor	rdi, rsi
+CryptonightR_instruction56:
+	imul	rbp, rsi
+CryptonightR_instruction57:
+	imul	rbp, rsi
+CryptonightR_instruction58:
+	imul	rbp, rsi
+CryptonightR_instruction59:
+	add	rbp, rsi
+	add	rbp, 2147483647
+CryptonightR_instruction60:
+	sub	rbp, rsi
+CryptonightR_instruction61:
+	ror	ebp, cl
+CryptonightR_instruction62:
+	rol	ebp, cl
+CryptonightR_instruction63:
+	xor	rbp, rsi
+CryptonightR_instruction64:
+	imul	rbx, rdi
+CryptonightR_instruction65:
+	imul	rbx, rdi
+CryptonightR_instruction66:
+	imul	rbx, rdi
+CryptonightR_instruction67:
+	add	rbx, rdi
+	add	rbx, 2147483647
+CryptonightR_instruction68:
+	sub	rbx, rdi
+CryptonightR_instruction69:
+	ror	ebx, cl
+CryptonightR_instruction70:
+	rol	ebx, cl
+CryptonightR_instruction71:
+	xor	rbx, rdi
+CryptonightR_instruction72:
+	imul	rsi, rdi
+CryptonightR_instruction73:
+	imul	rsi, rdi
+CryptonightR_instruction74:
+	imul	rsi, rdi
+CryptonightR_instruction75:
+	add	rsi, rdi
+	add	rsi, 2147483647
+CryptonightR_instruction76:
+	sub	rsi, rdi
+CryptonightR_instruction77:
+	ror	esi, cl
+CryptonightR_instruction78:
+	rol	esi, cl
+CryptonightR_instruction79:
+	xor	rsi, rdi
+CryptonightR_instruction80:
+	imul	rdi, rdi
+CryptonightR_instruction81:
+	imul	rdi, rdi
+CryptonightR_instruction82:
+	imul	rdi, rdi
+CryptonightR_instruction83:
+	add	rdi, r9
+	add	rdi, 2147483647
+CryptonightR_instruction84:
+	sub	rdi, r9
+CryptonightR_instruction85:
+	ror	edi, cl
+CryptonightR_instruction86:
+	rol	edi, cl
+CryptonightR_instruction87:
+	xor	rdi, r9
+CryptonightR_instruction88:
+	imul	rbp, rdi
+CryptonightR_instruction89:
+	imul	rbp, rdi
+CryptonightR_instruction90:
+	imul	rbp, rdi
+CryptonightR_instruction91:
+	add	rbp, rdi
+	add	rbp, 2147483647
+CryptonightR_instruction92:
+	sub	rbp, rdi
+CryptonightR_instruction93:
+	ror	ebp, cl
+CryptonightR_instruction94:
+	rol	ebp, cl
+CryptonightR_instruction95:
+	xor	rbp, rdi
+CryptonightR_instruction96:
+	imul	rbx, rbp
+CryptonightR_instruction97:
+	imul	rbx, rbp
+CryptonightR_instruction98:
+	imul	rbx, rbp
+CryptonightR_instruction99:
+	add	rbx, rbp
+	add	rbx, 2147483647
+CryptonightR_instruction100:
+	sub	rbx, rbp
+CryptonightR_instruction101:
+	ror	ebx, cl
+CryptonightR_instruction102:
+	rol	ebx, cl
+CryptonightR_instruction103:
+	xor	rbx, rbp
+CryptonightR_instruction104:
+	imul	rsi, rbp
+CryptonightR_instruction105:
+	imul	rsi, rbp
+CryptonightR_instruction106:
+	imul	rsi, rbp
+CryptonightR_instruction107:
+	add	rsi, rbp
+	add	rsi, 2147483647
+CryptonightR_instruction108:
+	sub	rsi, rbp
+CryptonightR_instruction109:
+	ror	esi, cl
+CryptonightR_instruction110:
+	rol	esi, cl
+CryptonightR_instruction111:
+	xor	rsi, rbp
+CryptonightR_instruction112:
+	imul	rdi, rbp
+CryptonightR_instruction113:
+	imul	rdi, rbp
+CryptonightR_instruction114:
+	imul	rdi, rbp
+CryptonightR_instruction115:
+	add	rdi, rbp
+	add	rdi, 2147483647
+CryptonightR_instruction116:
+	sub	rdi, rbp
+CryptonightR_instruction117:
+	ror	edi, cl
+CryptonightR_instruction118:
+	rol	edi, cl
+CryptonightR_instruction119:
+	xor	rdi, rbp
+CryptonightR_instruction120:
+	imul	rbp, rbp
+CryptonightR_instruction121:
+	imul	rbp, rbp
+CryptonightR_instruction122:
+	imul	rbp, rbp
+CryptonightR_instruction123:
+	add	rbp, r9
+	add	rbp, 2147483647
+CryptonightR_instruction124:
+	sub	rbp, r9
+CryptonightR_instruction125:
+	ror	ebp, cl
+CryptonightR_instruction126:
+	rol	ebp, cl
+CryptonightR_instruction127:
+	xor	rbp, r9
+CryptonightR_instruction128:
+	imul	rbx, rsp
+CryptonightR_instruction129:
+	imul	rbx, rsp
+CryptonightR_instruction130:
+	imul	rbx, rsp
+CryptonightR_instruction131:
+	add	rbx, rsp
+	add	rbx, 2147483647
+CryptonightR_instruction132:
+	sub	rbx, rsp
+CryptonightR_instruction133:
+	ror	ebx, cl
+CryptonightR_instruction134:
+	rol	ebx, cl
+CryptonightR_instruction135:
+	xor	rbx, rsp
+CryptonightR_instruction136:
+	imul	rsi, rsp
+CryptonightR_instruction137:
+	imul	rsi, rsp
+CryptonightR_instruction138:
+	imul	rsi, rsp
+CryptonightR_instruction139:
+	add	rsi, rsp
+	add	rsi, 2147483647
+CryptonightR_instruction140:
+	sub	rsi, rsp
+CryptonightR_instruction141:
+	ror	esi, cl
+CryptonightR_instruction142:
+	rol	esi, cl
+CryptonightR_instruction143:
+	xor	rsi, rsp
+CryptonightR_instruction144:
+	imul	rdi, rsp
+CryptonightR_instruction145:
+	imul	rdi, rsp
+CryptonightR_instruction146:
+	imul	rdi, rsp
+CryptonightR_instruction147:
+	add	rdi, rsp
+	add	rdi, 2147483647
+CryptonightR_instruction148:
+	sub	rdi, rsp
+CryptonightR_instruction149:
+	ror	edi, cl
+CryptonightR_instruction150:
+	rol	edi, cl
+CryptonightR_instruction151:
+	xor	rdi, rsp
+CryptonightR_instruction152:
+	imul	rbp, rsp
+CryptonightR_instruction153:
+	imul	rbp, rsp
+CryptonightR_instruction154:
+	imul	rbp, rsp
+CryptonightR_instruction155:
+	add	rbp, rsp
+	add	rbp, 2147483647
+CryptonightR_instruction156:
+	sub	rbp, rsp
+CryptonightR_instruction157:
+	ror	ebp, cl
+CryptonightR_instruction158:
+	rol	ebp, cl
+CryptonightR_instruction159:
+	xor	rbp, rsp
+CryptonightR_instruction160:
+	imul	rbx, r15
+CryptonightR_instruction161:
+	imul	rbx, r15
+CryptonightR_instruction162:
+	imul	rbx, r15
+CryptonightR_instruction163:
+	add	rbx, r15
+	add	rbx, 2147483647
+CryptonightR_instruction164:
+	sub	rbx, r15
+CryptonightR_instruction165:
+	ror	ebx, cl
+CryptonightR_instruction166:
+	rol	ebx, cl
+CryptonightR_instruction167:
+	xor	rbx, r15
+CryptonightR_instruction168:
+	imul	rsi, r15
+CryptonightR_instruction169:
+	imul	rsi, r15
+CryptonightR_instruction170:
+	imul	rsi, r15
+CryptonightR_instruction171:
+	add	rsi, r15
+	add	rsi, 2147483647
+CryptonightR_instruction172:
+	sub	rsi, r15
+CryptonightR_instruction173:
+	ror	esi, cl
+CryptonightR_instruction174:
+	rol	esi, cl
+CryptonightR_instruction175:
+	xor	rsi, r15
+CryptonightR_instruction176:
+	imul	rdi, r15
+CryptonightR_instruction177:
+	imul	rdi, r15
+CryptonightR_instruction178:
+	imul	rdi, r15
+CryptonightR_instruction179:
+	add	rdi, r15
+	add	rdi, 2147483647
+CryptonightR_instruction180:
+	sub	rdi, r15
+CryptonightR_instruction181:
+	ror	edi, cl
+CryptonightR_instruction182:
+	rol	edi, cl
+CryptonightR_instruction183:
+	xor	rdi, r15
+CryptonightR_instruction184:
+	imul	rbp, r15
+CryptonightR_instruction185:
+	imul	rbp, r15
+CryptonightR_instruction186:
+	imul	rbp, r15
+CryptonightR_instruction187:
+	add	rbp, r15
+	add	rbp, 2147483647
+CryptonightR_instruction188:
+	sub	rbp, r15
+CryptonightR_instruction189:
+	ror	ebp, cl
+CryptonightR_instruction190:
+	rol	ebp, cl
+CryptonightR_instruction191:
+	xor	rbp, r15
+CryptonightR_instruction192:
+	imul	rbx, rax
+CryptonightR_instruction193:
+	imul	rbx, rax
+CryptonightR_instruction194:
+	imul	rbx, rax
+CryptonightR_instruction195:
+	add	rbx, rax
+	add	rbx, 2147483647
+CryptonightR_instruction196:
+	sub	rbx, rax
+CryptonightR_instruction197:
+	ror	ebx, cl
+CryptonightR_instruction198:
+	rol	ebx, cl
+CryptonightR_instruction199:
+	xor	rbx, rax
+CryptonightR_instruction200:
+	imul	rsi, rax
+CryptonightR_instruction201:
+	imul	rsi, rax
+CryptonightR_instruction202:
+	imul	rsi, rax
+CryptonightR_instruction203:
+	add	rsi, rax
+	add	rsi, 2147483647
+CryptonightR_instruction204:
+	sub	rsi, rax
+CryptonightR_instruction205:
+	ror	esi, cl
+CryptonightR_instruction206:
+	rol	esi, cl
+CryptonightR_instruction207:
+	xor	rsi, rax
+CryptonightR_instruction208:
+	imul	rdi, rax
+CryptonightR_instruction209:
+	imul	rdi, rax
+CryptonightR_instruction210:
+	imul	rdi, rax
+CryptonightR_instruction211:
+	add	rdi, rax
+	add	rdi, 2147483647
+CryptonightR_instruction212:
+	sub	rdi, rax
+CryptonightR_instruction213:
+	ror	edi, cl
+CryptonightR_instruction214:
+	rol	edi, cl
+CryptonightR_instruction215:
+	xor	rdi, rax
+CryptonightR_instruction216:
+	imul	rbp, rax
+CryptonightR_instruction217:
+	imul	rbp, rax
+CryptonightR_instruction218:
+	imul	rbp, rax
+CryptonightR_instruction219:
+	add	rbp, rax
+	add	rbp, 2147483647
+CryptonightR_instruction220:
+	sub	rbp, rax
+CryptonightR_instruction221:
+	ror	ebp, cl
+CryptonightR_instruction222:
+	rol	ebp, cl
+CryptonightR_instruction223:
+	xor	rbp, rax
+CryptonightR_instruction224:
+	imul	rbx, rdx
+CryptonightR_instruction225:
+	imul	rbx, rdx
+CryptonightR_instruction226:
+	imul	rbx, rdx
+CryptonightR_instruction227:
+	add	rbx, rdx
+	add	rbx, 2147483647
+CryptonightR_instruction228:
+	sub	rbx, rdx
+CryptonightR_instruction229:
+	ror	ebx, cl
+CryptonightR_instruction230:
+	rol	ebx, cl
+CryptonightR_instruction231:
+	xor	rbx, rdx
+CryptonightR_instruction232:
+	imul	rsi, rdx
+CryptonightR_instruction233:
+	imul	rsi, rdx
+CryptonightR_instruction234:
+	imul	rsi, rdx
+CryptonightR_instruction235:
+	add	rsi, rdx
+	add	rsi, 2147483647
+CryptonightR_instruction236:
+	sub	rsi, rdx
+CryptonightR_instruction237:
+	ror	esi, cl
+CryptonightR_instruction238:
+	rol	esi, cl
+CryptonightR_instruction239:
+	xor	rsi, rdx
+CryptonightR_instruction240:
+	imul	rdi, rdx
+CryptonightR_instruction241:
+	imul	rdi, rdx
+CryptonightR_instruction242:
+	imul	rdi, rdx
+CryptonightR_instruction243:
+	add	rdi, rdx
+	add	rdi, 2147483647
+CryptonightR_instruction244:
+	sub	rdi, rdx
+CryptonightR_instruction245:
+	ror	edi, cl
+CryptonightR_instruction246:
+	rol	edi, cl
+CryptonightR_instruction247:
+	xor	rdi, rdx
+CryptonightR_instruction248:
+	imul	rbp, rdx
+CryptonightR_instruction249:
+	imul	rbp, rdx
+CryptonightR_instruction250:
+	imul	rbp, rdx
+CryptonightR_instruction251:
+	add	rbp, rdx
+	add	rbp, 2147483647
+CryptonightR_instruction252:
+	sub	rbp, rdx
+CryptonightR_instruction253:
+	ror	ebp, cl
+CryptonightR_instruction254:
+	rol	ebp, cl
+CryptonightR_instruction255:
+	xor	rbp, rdx
+CryptonightR_instruction256:
+	imul	rbx, rbx
+CryptonightR_instruction_mov0:
+
+CryptonightR_instruction_mov1:
+
+CryptonightR_instruction_mov2:
+
+CryptonightR_instruction_mov3:
+
+CryptonightR_instruction_mov4:
+
+CryptonightR_instruction_mov5:
+	mov	rcx, rbx
+CryptonightR_instruction_mov6:
+	mov	rcx, rbx
+CryptonightR_instruction_mov7:
+
+CryptonightR_instruction_mov8:
+
+CryptonightR_instruction_mov9:
+
+CryptonightR_instruction_mov10:
+
+CryptonightR_instruction_mov11:
+
+CryptonightR_instruction_mov12:
+
+CryptonightR_instruction_mov13:
+	mov	rcx, rbx
+CryptonightR_instruction_mov14:
+	mov	rcx, rbx
+CryptonightR_instruction_mov15:
+
+CryptonightR_instruction_mov16:
+
+CryptonightR_instruction_mov17:
+
+CryptonightR_instruction_mov18:
+
+CryptonightR_instruction_mov19:
+
+CryptonightR_instruction_mov20:
+
+CryptonightR_instruction_mov21:
+	mov	rcx, rbx
+CryptonightR_instruction_mov22:
+	mov	rcx, rbx
+CryptonightR_instruction_mov23:
+
+CryptonightR_instruction_mov24:
+
+CryptonightR_instruction_mov25:
+
+CryptonightR_instruction_mov26:
+
+CryptonightR_instruction_mov27:
+
+CryptonightR_instruction_mov28:
+
+CryptonightR_instruction_mov29:
+	mov	rcx, rbx
+CryptonightR_instruction_mov30:
+	mov	rcx, rbx
+CryptonightR_instruction_mov31:
+
+CryptonightR_instruction_mov32:
+
+CryptonightR_instruction_mov33:
+
+CryptonightR_instruction_mov34:
+
+CryptonightR_instruction_mov35:
+
+CryptonightR_instruction_mov36:
+
+CryptonightR_instruction_mov37:
+	mov	rcx, rsi
+CryptonightR_instruction_mov38:
+	mov	rcx, rsi
+CryptonightR_instruction_mov39:
+
+CryptonightR_instruction_mov40:
+
+CryptonightR_instruction_mov41:
+
+CryptonightR_instruction_mov42:
+
+CryptonightR_instruction_mov43:
+
+CryptonightR_instruction_mov44:
+
+CryptonightR_instruction_mov45:
+	mov	rcx, rsi
+CryptonightR_instruction_mov46:
+	mov	rcx, rsi
+CryptonightR_instruction_mov47:
+
+CryptonightR_instruction_mov48:
+
+CryptonightR_instruction_mov49:
+
+CryptonightR_instruction_mov50:
+
+CryptonightR_instruction_mov51:
+
+CryptonightR_instruction_mov52:
+
+CryptonightR_instruction_mov53:
+	mov	rcx, rsi
+CryptonightR_instruction_mov54:
+	mov	rcx, rsi
+CryptonightR_instruction_mov55:
+
+CryptonightR_instruction_mov56:
+
+CryptonightR_instruction_mov57:
+
+CryptonightR_instruction_mov58:
+
+CryptonightR_instruction_mov59:
+
+CryptonightR_instruction_mov60:
+
+CryptonightR_instruction_mov61:
+	mov	rcx, rsi
+CryptonightR_instruction_mov62:
+	mov	rcx, rsi
+CryptonightR_instruction_mov63:
+
+CryptonightR_instruction_mov64:
+
+CryptonightR_instruction_mov65:
+
+CryptonightR_instruction_mov66:
+
+CryptonightR_instruction_mov67:
+
+CryptonightR_instruction_mov68:
+
+CryptonightR_instruction_mov69:
+	mov	rcx, rdi
+CryptonightR_instruction_mov70:
+	mov	rcx, rdi
+CryptonightR_instruction_mov71:
+
+CryptonightR_instruction_mov72:
+
+CryptonightR_instruction_mov73:
+
+CryptonightR_instruction_mov74:
+
+CryptonightR_instruction_mov75:
+
+CryptonightR_instruction_mov76:
+
+CryptonightR_instruction_mov77:
+	mov	rcx, rdi
+CryptonightR_instruction_mov78:
+	mov	rcx, rdi
+CryptonightR_instruction_mov79:
+
+CryptonightR_instruction_mov80:
+
+CryptonightR_instruction_mov81:
+
+CryptonightR_instruction_mov82:
+
+CryptonightR_instruction_mov83:
+
+CryptonightR_instruction_mov84:
+
+CryptonightR_instruction_mov85:
+	mov	rcx, rdi
+CryptonightR_instruction_mov86:
+	mov	rcx, rdi
+CryptonightR_instruction_mov87:
+
+CryptonightR_instruction_mov88:
+
+CryptonightR_instruction_mov89:
+
+CryptonightR_instruction_mov90:
+
+CryptonightR_instruction_mov91:
+
+CryptonightR_instruction_mov92:
+
+CryptonightR_instruction_mov93:
+	mov	rcx, rdi
+CryptonightR_instruction_mov94:
+	mov	rcx, rdi
+CryptonightR_instruction_mov95:
+
+CryptonightR_instruction_mov96:
+
+CryptonightR_instruction_mov97:
+
+CryptonightR_instruction_mov98:
+
+CryptonightR_instruction_mov99:
+
+CryptonightR_instruction_mov100:
+
+CryptonightR_instruction_mov101:
+	mov	rcx, rbp
+CryptonightR_instruction_mov102:
+	mov	rcx, rbp
+CryptonightR_instruction_mov103:
+
+CryptonightR_instruction_mov104:
+
+CryptonightR_instruction_mov105:
+
+CryptonightR_instruction_mov106:
+
+CryptonightR_instruction_mov107:
+
+CryptonightR_instruction_mov108:
+
+CryptonightR_instruction_mov109:
+	mov	rcx, rbp
+CryptonightR_instruction_mov110:
+	mov	rcx, rbp
+CryptonightR_instruction_mov111:
+
+CryptonightR_instruction_mov112:
+
+CryptonightR_instruction_mov113:
+
+CryptonightR_instruction_mov114:
+
+CryptonightR_instruction_mov115:
+
+CryptonightR_instruction_mov116:
+
+CryptonightR_instruction_mov117:
+	mov	rcx, rbp
+CryptonightR_instruction_mov118:
+	mov	rcx, rbp
+CryptonightR_instruction_mov119:
+
+CryptonightR_instruction_mov120:
+
+CryptonightR_instruction_mov121:
+
+CryptonightR_instruction_mov122:
+
+CryptonightR_instruction_mov123:
+
+CryptonightR_instruction_mov124:
+
+CryptonightR_instruction_mov125:
+	mov	rcx, rbp
+CryptonightR_instruction_mov126:
+	mov	rcx, rbp
+CryptonightR_instruction_mov127:
+
+CryptonightR_instruction_mov128:
+
+CryptonightR_instruction_mov129:
+
+CryptonightR_instruction_mov130:
+
+CryptonightR_instruction_mov131:
+
+CryptonightR_instruction_mov132:
+
+CryptonightR_instruction_mov133:
+	mov	rcx, rsp
+CryptonightR_instruction_mov134:
+	mov	rcx, rsp
+CryptonightR_instruction_mov135:
+
+CryptonightR_instruction_mov136:
+
+CryptonightR_instruction_mov137:
+
+CryptonightR_instruction_mov138:
+
+CryptonightR_instruction_mov139:
+
+CryptonightR_instruction_mov140:
+
+CryptonightR_instruction_mov141:
+	mov	rcx, rsp
+CryptonightR_instruction_mov142:
+	mov	rcx, rsp
+CryptonightR_instruction_mov143:
+
+CryptonightR_instruction_mov144:
+
+CryptonightR_instruction_mov145:
+
+CryptonightR_instruction_mov146:
+
+CryptonightR_instruction_mov147:
+
+CryptonightR_instruction_mov148:
+
+CryptonightR_instruction_mov149:
+	mov	rcx, rsp
+CryptonightR_instruction_mov150:
+	mov	rcx, rsp
+CryptonightR_instruction_mov151:
+
+CryptonightR_instruction_mov152:
+
+CryptonightR_instruction_mov153:
+
+CryptonightR_instruction_mov154:
+
+CryptonightR_instruction_mov155:
+
+CryptonightR_instruction_mov156:
+
+CryptonightR_instruction_mov157:
+	mov	rcx, rsp
+CryptonightR_instruction_mov158:
+	mov	rcx, rsp
+CryptonightR_instruction_mov159:
+
+CryptonightR_instruction_mov160:
+
+CryptonightR_instruction_mov161:
+
+CryptonightR_instruction_mov162:
+
+CryptonightR_instruction_mov163:
+
+CryptonightR_instruction_mov164:
+
+CryptonightR_instruction_mov165:
+	mov	rcx, r15
+CryptonightR_instruction_mov166:
+	mov	rcx, r15
+CryptonightR_instruction_mov167:
+
+CryptonightR_instruction_mov168:
+
+CryptonightR_instruction_mov169:
+
+CryptonightR_instruction_mov170:
+
+CryptonightR_instruction_mov171:
+
+CryptonightR_instruction_mov172:
+
+CryptonightR_instruction_mov173:
+	mov	rcx, r15
+CryptonightR_instruction_mov174:
+	mov	rcx, r15
+CryptonightR_instruction_mov175:
+
+CryptonightR_instruction_mov176:
+
+CryptonightR_instruction_mov177:
+
+CryptonightR_instruction_mov178:
+
+CryptonightR_instruction_mov179:
+
+CryptonightR_instruction_mov180:
+
+CryptonightR_instruction_mov181:
+	mov	rcx, r15
+CryptonightR_instruction_mov182:
+	mov	rcx, r15
+CryptonightR_instruction_mov183:
+
+CryptonightR_instruction_mov184:
+
+CryptonightR_instruction_mov185:
+
+CryptonightR_instruction_mov186:
+
+CryptonightR_instruction_mov187:
+
+CryptonightR_instruction_mov188:
+
+CryptonightR_instruction_mov189:
+	mov	rcx, r15
+CryptonightR_instruction_mov190:
+	mov	rcx, r15
+CryptonightR_instruction_mov191:
+
+CryptonightR_instruction_mov192:
+
+CryptonightR_instruction_mov193:
+
+CryptonightR_instruction_mov194:
+
+CryptonightR_instruction_mov195:
+
+CryptonightR_instruction_mov196:
+
+CryptonightR_instruction_mov197:
+	mov	rcx, rax
+CryptonightR_instruction_mov198:
+	mov	rcx, rax
+CryptonightR_instruction_mov199:
+
+CryptonightR_instruction_mov200:
+
+CryptonightR_instruction_mov201:
+
+CryptonightR_instruction_mov202:
+
+CryptonightR_instruction_mov203:
+
+CryptonightR_instruction_mov204:
+
+CryptonightR_instruction_mov205:
+	mov	rcx, rax
+CryptonightR_instruction_mov206:
+	mov	rcx, rax
+CryptonightR_instruction_mov207:
+
+CryptonightR_instruction_mov208:
+
+CryptonightR_instruction_mov209:
+
+CryptonightR_instruction_mov210:
+
+CryptonightR_instruction_mov211:
+
+CryptonightR_instruction_mov212:
+
+CryptonightR_instruction_mov213:
+	mov	rcx, rax
+CryptonightR_instruction_mov214:
+	mov	rcx, rax
+CryptonightR_instruction_mov215:
+
+CryptonightR_instruction_mov216:
+
+CryptonightR_instruction_mov217:
+
+CryptonightR_instruction_mov218:
+
+CryptonightR_instruction_mov219:
+
+CryptonightR_instruction_mov220:
+
+CryptonightR_instruction_mov221:
+	mov	rcx, rax
+CryptonightR_instruction_mov222:
+	mov	rcx, rax
+CryptonightR_instruction_mov223:
+
+CryptonightR_instruction_mov224:
+
+CryptonightR_instruction_mov225:
+
+CryptonightR_instruction_mov226:
+
+CryptonightR_instruction_mov227:
+
+CryptonightR_instruction_mov228:
+
+CryptonightR_instruction_mov229:
+	mov	rcx, rdx
+CryptonightR_instruction_mov230:
+	mov	rcx, rdx
+CryptonightR_instruction_mov231:
+
+CryptonightR_instruction_mov232:
+
+CryptonightR_instruction_mov233:
+
+CryptonightR_instruction_mov234:
+
+CryptonightR_instruction_mov235:
+
+CryptonightR_instruction_mov236:
+
+CryptonightR_instruction_mov237:
+	mov	rcx, rdx
+CryptonightR_instruction_mov238:
+	mov	rcx, rdx
+CryptonightR_instruction_mov239:
+
+CryptonightR_instruction_mov240:
+
+CryptonightR_instruction_mov241:
+
+CryptonightR_instruction_mov242:
+
+CryptonightR_instruction_mov243:
+
+CryptonightR_instruction_mov244:
+
+CryptonightR_instruction_mov245:
+	mov	rcx, rdx
+CryptonightR_instruction_mov246:
+	mov	rcx, rdx
+CryptonightR_instruction_mov247:
+
+CryptonightR_instruction_mov248:
+
+CryptonightR_instruction_mov249:
+
+CryptonightR_instruction_mov250:
+
+CryptonightR_instruction_mov251:
+
+CryptonightR_instruction_mov252:
+
+CryptonightR_instruction_mov253:
+	mov	rcx, rdx
+CryptonightR_instruction_mov254:
+	mov	rcx, rdx
+CryptonightR_instruction_mov255:
+
+CryptonightR_instruction_mov256:
+
+_TEXT_CN_TEMPLATE ENDS
+END
diff --git a/src/crypto/asm/win/CryptonightR_template.inc b/src/crypto/asm/win/CryptonightR_template.inc
new file mode 100644
index 00000000..1dae434a
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightR_template.inc
@@ -0,0 +1,529 @@
+PUBLIC FN_PREFIX(CryptonightR_template_part1)
+PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
+PUBLIC FN_PREFIX(CryptonightR_template_part2)
+PUBLIC FN_PREFIX(CryptonightR_template_part3)
+PUBLIC FN_PREFIX(CryptonightR_template_end)
+PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
+PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
+PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
+PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
+PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
+PUBLIC FN_PREFIX(CryptonightR_template_double_end)
+
+ALIGN(64)
+FN_PREFIX(CryptonightR_template_part1):
+	mov	QWORD PTR [rsp+16], rbx
+	mov	QWORD PTR [rsp+24], rbp
+	mov	QWORD PTR [rsp+32], rsi
+	push	r10
+	push	r11
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	push	rdi
+	sub	rsp, 64
+	mov	r12, rcx
+	mov	r8, QWORD PTR [r12+32]
+	mov	rdx, r12
+	xor	r8, QWORD PTR [r12]
+	mov	r15, QWORD PTR [r12+40]
+	mov	r9, r8
+	xor	r15, QWORD PTR [r12+8]
+	mov	r11, QWORD PTR [r12+224]
+	mov	r12, QWORD PTR [r12+56]
+	xor	r12, QWORD PTR [rdx+24]
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	movaps	XMMWORD PTR [rsp+48], xmm6
+	movd	xmm0, r12
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+16], xmm8
+	movaps	XMMWORD PTR [rsp], xmm9
+	mov	r12, QWORD PTR [rdx+88]
+	xor	r12, QWORD PTR [rdx+72]
+	movd	xmm6, rax
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm6, xmm0
+	and	r9d, 2097136
+	movd	xmm0, r12
+	movd	xmm7, rax
+	punpcklqdq xmm7, xmm0
+	mov r10d, r9d
+	movd	xmm9, rsp
+	mov rsp, r8
+	mov	r8d, 524288
+
+	mov	ebx, [rdx+96]
+	mov	esi, [rdx+100]
+	mov	edi, [rdx+104]
+	mov	ebp, [rdx+108]
+
+	ALIGN(64)
+FN_PREFIX(CryptonightR_template_mainloop):
+	movdqa	xmm5, XMMWORD PTR [r9+r11]
+	movd	xmm0, r15
+	movd	xmm4, rsp
+	punpcklqdq xmm4, xmm0
+	lea	rdx, QWORD PTR [r9+r11]
+
+	aesenc	xmm5, xmm4
+
+	mov	r12d, r9d
+	mov	eax, r9d
+	xor	r9d, 48
+	xor	r12d, 16
+	xor	eax, 32
+	movdqu	xmm0, XMMWORD PTR [r9+r11]
+	movaps xmm3, xmm0
+	movdqu	xmm2, XMMWORD PTR [r12+r11]
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	pxor xmm0, xmm2
+	pxor xmm5, xmm1
+	pxor xmm5, xmm0
+	paddq	xmm3, xmm7
+	paddq	xmm2, xmm6
+	paddq	xmm1, xmm4
+	movdqu	XMMWORD PTR [r12+r11], xmm3
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movdqu	XMMWORD PTR [r9+r11], xmm1
+
+	movd	r12, xmm5
+	movd	r10d, xmm5
+	and	r10d, 2097136
+
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm6
+	movdqu	XMMWORD PTR [rdx], xmm0
+
+	lea	r13d, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	r13, rdx
+
+	xor	r13, QWORD PTR [r10+r11]
+	mov	r14, QWORD PTR [r10+r11+8]
+
+	movd eax, xmm6
+	movd edx, xmm7
+	pextrd r9d, xmm7, 2
+
+FN_PREFIX(CryptonightR_template_part2):
+	mov eax, edi
+	mov edx, ebp
+	shl rdx, 32
+	or rax, rdx
+	xor rsp, rax
+
+	mov eax, ebx
+	mov edx, esi
+	shl rdx, 32
+	or rax, rdx
+	xor r15, rax
+
+	mov	rax, r13
+	mul	r12
+
+	mov	r9d, r10d
+	mov	r12d, r10d
+	xor	r9d, 16
+	xor	r12d, 32
+	xor	r10d, 48
+	movdqa	xmm1, XMMWORD PTR [r12+r11]
+	movaps xmm3, xmm1
+	movdqa	xmm2, XMMWORD PTR [r9+r11]
+	movdqa	xmm0, XMMWORD PTR [r10+r11]
+	pxor xmm1, xmm2
+	pxor xmm5, xmm0
+	pxor xmm5, xmm1
+	paddq	xmm3, xmm4
+	paddq	xmm2, xmm6
+	paddq	xmm0, xmm7
+	movdqu	XMMWORD PTR [r9+r11], xmm0
+	movdqu	XMMWORD PTR [r12+r11], xmm2
+	movdqu	XMMWORD PTR [r10+r11], xmm3
+
+	movdqa	xmm7, xmm6
+	add	r15, rax
+	add	rsp, rdx
+	xor	r10, 48
+	mov	QWORD PTR [r10+r11], rsp
+	xor	rsp, r13
+	mov	r9d, esp
+	mov	QWORD PTR [r10+r11+8], r15
+	and	r9d, 2097136
+	xor	r15, r14
+	movdqa	xmm6, xmm5
+	dec	r8d
+	jnz	FN_PREFIX(CryptonightR_template_mainloop)
+
+FN_PREFIX(CryptonightR_template_part3):
+	movd	rsp, xmm9
+
+	mov	rbx, QWORD PTR [rsp+136]
+	mov	rbp, QWORD PTR [rsp+144]
+	mov	rsi, QWORD PTR [rsp+152]
+	movaps	xmm6, XMMWORD PTR [rsp+48]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+16]
+	movaps	xmm9, XMMWORD PTR [rsp]
+	add	rsp, 64
+	pop	rdi
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	r11
+	pop	r10
+	ret	0
+FN_PREFIX(CryptonightR_template_end):
+
+ALIGN(64)
+FN_PREFIX(CryptonightR_template_double_part1):
+	mov	QWORD PTR [rsp+24], rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 320
+	mov	r14, QWORD PTR [rcx+32]
+	mov	r8, rcx
+	xor	r14, QWORD PTR [rcx]
+	mov	r12, QWORD PTR [rcx+40]
+	mov	ebx, r14d
+	mov	rsi, QWORD PTR [rcx+224]
+	and	ebx, 2097136
+	xor	r12, QWORD PTR [rcx+8]
+	mov	rcx, QWORD PTR [rcx+56]
+	xor	rcx, QWORD PTR [r8+24]
+	mov	rax, QWORD PTR [r8+48]
+	xor	rax, QWORD PTR [r8+16]
+	mov	r15, QWORD PTR [rdx+32]
+	xor	r15, QWORD PTR [rdx]
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [r8+88]
+	xor	rcx, QWORD PTR [r8+72]
+	mov	r13, QWORD PTR [rdx+40]
+	mov	rdi, QWORD PTR [rdx+224]
+	xor	r13, QWORD PTR [rdx+8]
+	movaps	XMMWORD PTR [rsp+160], xmm6
+	movaps	XMMWORD PTR [rsp+176], xmm7
+	movaps	XMMWORD PTR [rsp+192], xmm8
+	movaps	XMMWORD PTR [rsp+208], xmm9
+	movaps	XMMWORD PTR [rsp+224], xmm10
+	movaps	XMMWORD PTR [rsp+240], xmm11
+	movaps	XMMWORD PTR [rsp+256], xmm12
+	movaps	XMMWORD PTR [rsp+272], xmm13
+	movaps	XMMWORD PTR [rsp+288], xmm14
+	movaps	XMMWORD PTR [rsp+304], xmm15
+	movd	xmm7, rax
+	mov	rax, QWORD PTR [r8+80]
+	xor	rax, QWORD PTR [r8+64]
+
+	movaps xmm1, XMMWORD PTR [rdx+96]
+	movaps xmm2, XMMWORD PTR [r8+96]
+	movaps XMMWORD PTR [rsp], xmm1
+	movaps XMMWORD PTR [rsp+16], xmm2
+
+	mov	r8d, r15d
+	punpcklqdq xmm7, xmm0
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+56]
+	xor	rcx, QWORD PTR [rdx+24]
+	movd	xmm9, rax
+	mov	QWORD PTR [rsp+128], rsi
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	punpcklqdq xmm9, xmm0
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+88]
+	xor	rcx, QWORD PTR [rdx+72]
+	movd	xmm8, rax
+	mov	QWORD PTR [rsp+136], rdi
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm8, xmm0
+	and	r8d, 2097136
+	movd	xmm0, rcx
+	mov	r11d, 524288
+	movd	xmm10, rax
+	punpcklqdq xmm10, xmm0
+	
+	movd xmm14, QWORD PTR [rsp+128]
+	movd xmm15, QWORD PTR [rsp+136]
+
+	ALIGN(64)
+FN_PREFIX(CryptonightR_template_double_mainloop):
+	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
+	movd	xmm0, r12
+	mov	ecx, ebx
+	movd	xmm3, r14
+	punpcklqdq xmm3, xmm0
+	xor	ebx, 16
+	aesenc	xmm6, xmm3
+	movd	xmm4, r15
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	pxor	xmm6, xmm0
+	xor	ebx, 48
+	paddq	xmm0, xmm7
+	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
+	pxor	xmm6, xmm1
+	movdqu	XMMWORD PTR [rbx+rsi], xmm0
+	paddq	xmm1, xmm3
+	xor	ebx, 16
+	mov	eax, ebx
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	pxor	xmm6, xmm0
+	movd	rdx, xmm6
+	movdqu	XMMWORD PTR [rbx+rsi], xmm1
+	paddq	xmm0, xmm9
+	movdqu	XMMWORD PTR [rax+rsi], xmm0
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm7
+	movdqu	XMMWORD PTR [rcx+rsi], xmm0
+	mov	esi, edx
+	movdqu	xmm5, XMMWORD PTR [r8+rdi]
+	and	esi, 2097136
+	mov	ecx, r8d
+	movd	xmm0, r13
+	punpcklqdq xmm4, xmm0
+	xor	r8d, 16
+	aesenc	xmm5, xmm4
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	pxor	xmm5, xmm0
+	xor	r8d, 48
+	paddq	xmm0, xmm8
+	movdqu	xmm1, XMMWORD PTR [r8+rdi]
+	pxor	xmm5, xmm1
+	movdqu	XMMWORD PTR [r8+rdi], xmm0
+	paddq	xmm1, xmm4
+	xor	r8d, 16
+	mov	eax, r8d
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	pxor	xmm5, xmm0
+	movdqu	XMMWORD PTR [r8+rdi], xmm1
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rdi], xmm0
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm8
+	movdqu	XMMWORD PTR [rcx+rdi], xmm0
+	movd	rdi, xmm5
+	movd	rcx, xmm14
+	mov	ebp, edi
+	mov	r8, QWORD PTR [rcx+rsi]
+	mov	r10, QWORD PTR [rcx+rsi+8]
+	lea	r9, QWORD PTR [rcx+rsi]
+	xor	esi, 16
+
+	movd xmm0, rsp
+	movd xmm1, rsi
+	movd xmm2, rdi
+	movd xmm11, rbp
+	movd xmm12, r15
+	movd xmm13, rdx
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp+16]
+	mov esi, DWORD PTR [rsp+20]
+	mov edi, DWORD PTR [rsp+24]
+	mov ebp, DWORD PTR [rsp+28]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+	xor r8, rax
+
+	movd esp, xmm3
+	pextrd r15d, xmm3, 2
+	movd eax, xmm7
+	movd edx, xmm9
+	pextrd r9d, xmm9, 2
+
+FN_PREFIX(CryptonightR_template_double_part2):
+
+	mov eax, edi
+	mov edx, ebp
+	shl rdx, 32
+	or rax, rdx
+	xor r14, rax
+
+	mov eax, ebx
+	mov edx, esi
+	shl rdx, 32
+	or rax, rdx
+	xor r12, rax
+
+	movd rsp, xmm0
+	mov DWORD PTR [rsp+16], ebx
+	mov DWORD PTR [rsp+20], esi
+	mov DWORD PTR [rsp+24], edi
+	mov DWORD PTR [rsp+28], ebp
+
+	movd rsi, xmm1
+	movd rdi, xmm2
+	movd rbp, xmm11
+	movd r15, xmm12
+	movd rdx, xmm13
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rbx, r8
+	mov	rax, r8
+	mul	rdx
+	and	ebp, 2097136
+	mov	r8, rax
+	movdqu	xmm1, XMMWORD PTR [rcx+rsi]
+	pxor	xmm6, xmm1
+	xor	esi, 48
+	paddq	xmm1, xmm7
+	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
+	pxor	xmm6, xmm2
+	paddq	xmm2, xmm3
+	movdqu	XMMWORD PTR [rsi+rcx], xmm1
+	xor	esi, 16
+	mov	eax, esi
+	mov	rsi, rcx
+	movdqu	xmm0, XMMWORD PTR [rax+rcx]
+	pxor	xmm6, xmm0
+	movdqu	XMMWORD PTR [rax+rcx], xmm2
+	paddq	xmm0, xmm9
+	add	r12, r8
+	xor	rax, 32
+	add	r14, rdx
+	movdqa	xmm9, xmm7
+	movdqa	xmm7, xmm6
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	mov	QWORD PTR [r9+8], r12
+	xor	r12, r10
+	mov	QWORD PTR [r9], r14
+	movd rcx, xmm15
+	xor	r14, rbx
+	mov	r10d, ebp
+	mov	ebx, r14d
+	xor	ebp, 16
+	and	ebx, 2097136
+	mov	r8, QWORD PTR [r10+rcx]
+	mov	r9, QWORD PTR [r10+rcx+8]
+
+	movd xmm0, rsp
+	movd xmm1, rbx
+	movd xmm2, rsi
+	movd xmm11, rdi
+	movd xmm12, rbp
+	movd xmm13, r15
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp]
+	mov esi, DWORD PTR [rsp+4]
+	mov edi, DWORD PTR [rsp+8]
+	mov ebp, DWORD PTR [rsp+12]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+
+	xor r8, rax
+	movd xmm3, r8
+
+	movd esp, xmm4
+	pextrd r15d, xmm4, 2
+	movd eax, xmm8
+	movd edx, xmm10
+	pextrd r9d, xmm10, 2
+
+FN_PREFIX(CryptonightR_template_double_part3):
+
+	movd r15, xmm13
+
+	mov eax, edi
+	mov edx, ebp
+	shl rdx, 32
+	or rax, rdx
+	xor r15, rax
+
+	mov eax, ebx
+	mov edx, esi
+	shl rdx, 32
+	or rax, rdx
+	xor r13, rax
+
+	movd rsp, xmm0
+	mov DWORD PTR [rsp], ebx
+	mov DWORD PTR [rsp+4], esi
+	mov DWORD PTR [rsp+8], edi
+	mov DWORD PTR [rsp+12], ebp
+
+	movd rbx, xmm1
+	movd rsi, xmm2
+	movd rdi, xmm11
+	movd rbp, xmm12
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rax, r8
+	mul	rdi
+	mov	rdi, rcx
+	mov	r8, rax
+	movdqu	xmm1, XMMWORD PTR [rbp+rcx]
+	pxor xmm5, xmm1
+	xor	ebp, 48
+	paddq	xmm1, xmm8
+	add	r13, r8
+	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
+	pxor xmm5, xmm2
+	add	r15, rdx
+	movdqu	XMMWORD PTR [rbp+rcx], xmm1
+	paddq	xmm2, xmm4
+	xor	ebp, 16
+	mov	eax, ebp
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
+	pxor xmm5, xmm0
+	movdqu	XMMWORD PTR [rbp+rcx], xmm2
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	movd rax, xmm3
+	movdqa	xmm10, xmm8
+	mov	QWORD PTR [r10+rcx], r15
+	movdqa	xmm8, xmm5
+	xor	r15, rax
+	mov	QWORD PTR [r10+rcx+8], r13
+	mov	r8d, r15d
+	xor	r13, r9
+	and	r8d, 2097136
+	dec r11d
+	jnz	FN_PREFIX(CryptonightR_template_double_mainloop)
+
+FN_PREFIX(CryptonightR_template_double_part4):
+
+	mov	rbx, QWORD PTR [rsp+400]
+	movaps	xmm6, XMMWORD PTR [rsp+160]
+	movaps	xmm7, XMMWORD PTR [rsp+176]
+	movaps	xmm8, XMMWORD PTR [rsp+192]
+	movaps	xmm9, XMMWORD PTR [rsp+208]
+	movaps	xmm10, XMMWORD PTR [rsp+224]
+	movaps	xmm11, XMMWORD PTR [rsp+240]
+	movaps	xmm12, XMMWORD PTR [rsp+256]
+	movaps	xmm13, XMMWORD PTR [rsp+272]
+	movaps	xmm14, XMMWORD PTR [rsp+288]
+	movaps	xmm15, XMMWORD PTR [rsp+304]
+	add	rsp, 320
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	ret	0
+FN_PREFIX(CryptonightR_template_double_end):
diff --git a/src/crypto/asm/win/CryptonightR_template_win.inc b/src/crypto/asm/win/CryptonightR_template_win.inc
new file mode 100644
index 00000000..2f2d71a2
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightR_template_win.inc
@@ -0,0 +1,529 @@
+PUBLIC CryptonightR_template_part1
+PUBLIC CryptonightR_template_mainloop
+PUBLIC CryptonightR_template_part2
+PUBLIC CryptonightR_template_part3
+PUBLIC CryptonightR_template_end
+PUBLIC CryptonightR_template_double_part1
+PUBLIC CryptonightR_template_double_mainloop
+PUBLIC CryptonightR_template_double_part2
+PUBLIC CryptonightR_template_double_part3
+PUBLIC CryptonightR_template_double_part4
+PUBLIC CryptonightR_template_double_end
+
+ALIGN(64)
+CryptonightR_template_part1:
+	mov	QWORD PTR [rsp+16], rbx
+	mov	QWORD PTR [rsp+24], rbp
+	mov	QWORD PTR [rsp+32], rsi
+	push	r10
+	push	r11
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	push	rdi
+	sub	rsp, 64
+	mov	r12, rcx
+	mov	r8, QWORD PTR [r12+32]
+	mov	rdx, r12
+	xor	r8, QWORD PTR [r12]
+	mov	r15, QWORD PTR [r12+40]
+	mov	r9, r8
+	xor	r15, QWORD PTR [r12+8]
+	mov	r11, QWORD PTR [r12+224]
+	mov	r12, QWORD PTR [r12+56]
+	xor	r12, QWORD PTR [rdx+24]
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	movaps	XMMWORD PTR [rsp+48], xmm6
+	movd	xmm0, r12
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+16], xmm8
+	movaps	XMMWORD PTR [rsp], xmm9
+	mov	r12, QWORD PTR [rdx+88]
+	xor	r12, QWORD PTR [rdx+72]
+	movd	xmm6, rax
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm6, xmm0
+	and	r9d, 2097136
+	movd	xmm0, r12
+	movd	xmm7, rax
+	punpcklqdq xmm7, xmm0
+	mov r10d, r9d
+	movd	xmm9, rsp
+	mov rsp, r8
+	mov	r8d, 524288
+
+	mov	ebx, [rdx+96]
+	mov	esi, [rdx+100]
+	mov	edi, [rdx+104]
+	mov	ebp, [rdx+108]
+
+	ALIGN(64)
+CryptonightR_template_mainloop:
+	movdqa	xmm5, XMMWORD PTR [r9+r11]
+	movd	xmm0, r15
+	movd	xmm4, rsp
+	punpcklqdq xmm4, xmm0
+	lea	rdx, QWORD PTR [r9+r11]
+
+	aesenc	xmm5, xmm4
+
+	mov	r12d, r9d
+	mov	eax, r9d
+	xor	r9d, 48
+	xor	r12d, 16
+	xor	eax, 32
+	movdqu	xmm0, XMMWORD PTR [r9+r11]
+	movaps xmm3, xmm0
+	movdqu	xmm2, XMMWORD PTR [r12+r11]
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	pxor xmm0, xmm2
+	pxor xmm5, xmm1
+	pxor xmm5, xmm0
+	paddq	xmm3, xmm7
+	paddq	xmm2, xmm6
+	paddq	xmm1, xmm4
+	movdqu	XMMWORD PTR [r12+r11], xmm3
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movdqu	XMMWORD PTR [r9+r11], xmm1
+
+	movd	r12, xmm5
+	movd	r10d, xmm5
+	and	r10d, 2097136
+
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm6
+	movdqu	XMMWORD PTR [rdx], xmm0
+
+	lea	r13d, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	r13, rdx
+
+	xor	r13, QWORD PTR [r10+r11]
+	mov	r14, QWORD PTR [r10+r11+8]
+
+	movd eax, xmm6
+	movd edx, xmm7
+	pextrd r9d, xmm7, 2
+
+CryptonightR_template_part2:
+	mov eax, edi
+	mov edx, ebp
+	shl rdx, 32
+	or rax, rdx
+	xor rsp, rax
+
+	mov eax, ebx
+	mov edx, esi
+	shl rdx, 32
+	or rax, rdx
+	xor r15, rax
+
+	mov	rax, r13
+	mul	r12
+
+	mov	r9d, r10d
+	mov	r12d, r10d
+	xor	r9d, 16
+	xor	r12d, 32
+	xor	r10d, 48
+	movdqa	xmm1, XMMWORD PTR [r12+r11]
+	movaps xmm3, xmm1
+	movdqa	xmm2, XMMWORD PTR [r9+r11]
+	movdqa	xmm0, XMMWORD PTR [r10+r11]
+	pxor xmm1, xmm2
+	pxor xmm5, xmm0
+	pxor xmm5, xmm1
+	paddq	xmm3, xmm4
+	paddq	xmm2, xmm6
+	paddq	xmm0, xmm7
+	movdqu	XMMWORD PTR [r9+r11], xmm0
+	movdqu	XMMWORD PTR [r12+r11], xmm2
+	movdqu	XMMWORD PTR [r10+r11], xmm3
+
+	movdqa	xmm7, xmm6
+	add	r15, rax
+	add	rsp, rdx
+	xor	r10, 48
+	mov	QWORD PTR [r10+r11], rsp
+	xor	rsp, r13
+	mov	r9d, esp
+	mov	QWORD PTR [r10+r11+8], r15
+	and	r9d, 2097136
+	xor	r15, r14
+	movdqa	xmm6, xmm5
+	dec	r8d
+	jnz	CryptonightR_template_mainloop
+
+CryptonightR_template_part3:
+	movd	rsp, xmm9
+
+	mov	rbx, QWORD PTR [rsp+136]
+	mov	rbp, QWORD PTR [rsp+144]
+	mov	rsi, QWORD PTR [rsp+152]
+	movaps	xmm6, XMMWORD PTR [rsp+48]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+16]
+	movaps	xmm9, XMMWORD PTR [rsp]
+	add	rsp, 64
+	pop	rdi
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	r11
+	pop	r10
+	ret	0
+CryptonightR_template_end:
+
+ALIGN(64)
+CryptonightR_template_double_part1:
+	mov	QWORD PTR [rsp+24], rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 320
+	mov	r14, QWORD PTR [rcx+32]
+	mov	r8, rcx
+	xor	r14, QWORD PTR [rcx]
+	mov	r12, QWORD PTR [rcx+40]
+	mov	ebx, r14d
+	mov	rsi, QWORD PTR [rcx+224]
+	and	ebx, 2097136
+	xor	r12, QWORD PTR [rcx+8]
+	mov	rcx, QWORD PTR [rcx+56]
+	xor	rcx, QWORD PTR [r8+24]
+	mov	rax, QWORD PTR [r8+48]
+	xor	rax, QWORD PTR [r8+16]
+	mov	r15, QWORD PTR [rdx+32]
+	xor	r15, QWORD PTR [rdx]
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [r8+88]
+	xor	rcx, QWORD PTR [r8+72]
+	mov	r13, QWORD PTR [rdx+40]
+	mov	rdi, QWORD PTR [rdx+224]
+	xor	r13, QWORD PTR [rdx+8]
+	movaps	XMMWORD PTR [rsp+160], xmm6
+	movaps	XMMWORD PTR [rsp+176], xmm7
+	movaps	XMMWORD PTR [rsp+192], xmm8
+	movaps	XMMWORD PTR [rsp+208], xmm9
+	movaps	XMMWORD PTR [rsp+224], xmm10
+	movaps	XMMWORD PTR [rsp+240], xmm11
+	movaps	XMMWORD PTR [rsp+256], xmm12
+	movaps	XMMWORD PTR [rsp+272], xmm13
+	movaps	XMMWORD PTR [rsp+288], xmm14
+	movaps	XMMWORD PTR [rsp+304], xmm15
+	movd	xmm7, rax
+	mov	rax, QWORD PTR [r8+80]
+	xor	rax, QWORD PTR [r8+64]
+
+	movaps xmm1, XMMWORD PTR [rdx+96]
+	movaps xmm2, XMMWORD PTR [r8+96]
+	movaps XMMWORD PTR [rsp], xmm1
+	movaps XMMWORD PTR [rsp+16], xmm2
+
+	mov	r8d, r15d
+	punpcklqdq xmm7, xmm0
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+56]
+	xor	rcx, QWORD PTR [rdx+24]
+	movd	xmm9, rax
+	mov	QWORD PTR [rsp+128], rsi
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	punpcklqdq xmm9, xmm0
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+88]
+	xor	rcx, QWORD PTR [rdx+72]
+	movd	xmm8, rax
+	mov	QWORD PTR [rsp+136], rdi
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm8, xmm0
+	and	r8d, 2097136
+	movd	xmm0, rcx
+	mov	r11d, 524288
+	movd	xmm10, rax
+	punpcklqdq xmm10, xmm0
+	
+	movd xmm14, QWORD PTR [rsp+128]
+	movd xmm15, QWORD PTR [rsp+136]
+
+	ALIGN(64)
+CryptonightR_template_double_mainloop:
+	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
+	movd	xmm0, r12
+	mov	ecx, ebx
+	movd	xmm3, r14
+	punpcklqdq xmm3, xmm0
+	xor	ebx, 16
+	aesenc	xmm6, xmm3
+	movd	xmm4, r15
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	pxor	xmm6, xmm0
+	xor	ebx, 48
+	paddq	xmm0, xmm7
+	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
+	pxor	xmm6, xmm1
+	movdqu	XMMWORD PTR [rbx+rsi], xmm0
+	paddq	xmm1, xmm3
+	xor	ebx, 16
+	mov	eax, ebx
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	pxor	xmm6, xmm0
+	movd	rdx, xmm6
+	movdqu	XMMWORD PTR [rbx+rsi], xmm1
+	paddq	xmm0, xmm9
+	movdqu	XMMWORD PTR [rax+rsi], xmm0
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm7
+	movdqu	XMMWORD PTR [rcx+rsi], xmm0
+	mov	esi, edx
+	movdqu	xmm5, XMMWORD PTR [r8+rdi]
+	and	esi, 2097136
+	mov	ecx, r8d
+	movd	xmm0, r13
+	punpcklqdq xmm4, xmm0
+	xor	r8d, 16
+	aesenc	xmm5, xmm4
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	pxor	xmm5, xmm0
+	xor	r8d, 48
+	paddq	xmm0, xmm8
+	movdqu	xmm1, XMMWORD PTR [r8+rdi]
+	pxor	xmm5, xmm1
+	movdqu	XMMWORD PTR [r8+rdi], xmm0
+	paddq	xmm1, xmm4
+	xor	r8d, 16
+	mov	eax, r8d
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	pxor	xmm5, xmm0
+	movdqu	XMMWORD PTR [r8+rdi], xmm1
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rdi], xmm0
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm8
+	movdqu	XMMWORD PTR [rcx+rdi], xmm0
+	movd	rdi, xmm5
+	movd	rcx, xmm14
+	mov	ebp, edi
+	mov	r8, QWORD PTR [rcx+rsi]
+	mov	r10, QWORD PTR [rcx+rsi+8]
+	lea	r9, QWORD PTR [rcx+rsi]
+	xor	esi, 16
+
+	movd xmm0, rsp
+	movd xmm1, rsi
+	movd xmm2, rdi
+	movd xmm11, rbp
+	movd xmm12, r15
+	movd xmm13, rdx
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp+16]
+	mov esi, DWORD PTR [rsp+20]
+	mov edi, DWORD PTR [rsp+24]
+	mov ebp, DWORD PTR [rsp+28]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+	xor r8, rax
+
+	movd esp, xmm3
+	pextrd r15d, xmm3, 2
+	movd eax, xmm7
+	movd edx, xmm9
+	pextrd r9d, xmm9, 2
+
+CryptonightR_template_double_part2:
+
+	mov eax, edi
+	mov edx, ebp
+	shl rdx, 32
+	or rax, rdx
+	xor r14, rax
+
+	mov eax, ebx
+	mov edx, esi
+	shl rdx, 32
+	or rax, rdx
+	xor r12, rax
+
+	movd rsp, xmm0
+	mov DWORD PTR [rsp+16], ebx
+	mov DWORD PTR [rsp+20], esi
+	mov DWORD PTR [rsp+24], edi
+	mov DWORD PTR [rsp+28], ebp
+
+	movd rsi, xmm1
+	movd rdi, xmm2
+	movd rbp, xmm11
+	movd r15, xmm12
+	movd rdx, xmm13
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rbx, r8
+	mov	rax, r8
+	mul	rdx
+	and	ebp, 2097136
+	mov	r8, rax
+	movdqu	xmm1, XMMWORD PTR [rcx+rsi]
+	pxor	xmm6, xmm1
+	xor	esi, 48
+	paddq	xmm1, xmm7
+	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
+	pxor	xmm6, xmm2
+	paddq	xmm2, xmm3
+	movdqu	XMMWORD PTR [rsi+rcx], xmm1
+	xor	esi, 16
+	mov	eax, esi
+	mov	rsi, rcx
+	movdqu	xmm0, XMMWORD PTR [rax+rcx]
+	pxor	xmm6, xmm0
+	movdqu	XMMWORD PTR [rax+rcx], xmm2
+	paddq	xmm0, xmm9
+	add	r12, r8
+	xor	rax, 32
+	add	r14, rdx
+	movdqa	xmm9, xmm7
+	movdqa	xmm7, xmm6
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	mov	QWORD PTR [r9+8], r12
+	xor	r12, r10
+	mov	QWORD PTR [r9], r14
+	movd rcx, xmm15
+	xor	r14, rbx
+	mov	r10d, ebp
+	mov	ebx, r14d
+	xor	ebp, 16
+	and	ebx, 2097136
+	mov	r8, QWORD PTR [r10+rcx]
+	mov	r9, QWORD PTR [r10+rcx+8]
+
+	movd xmm0, rsp
+	movd xmm1, rbx
+	movd xmm2, rsi
+	movd xmm11, rdi
+	movd xmm12, rbp
+	movd xmm13, r15
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp]
+	mov esi, DWORD PTR [rsp+4]
+	mov edi, DWORD PTR [rsp+8]
+	mov ebp, DWORD PTR [rsp+12]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+
+	xor r8, rax
+	movd xmm3, r8
+
+	movd esp, xmm4
+	pextrd r15d, xmm4, 2
+	movd eax, xmm8
+	movd edx, xmm10
+	pextrd r9d, xmm10, 2
+
+CryptonightR_template_double_part3:
+
+	movd r15, xmm13
+
+	mov eax, edi
+	mov edx, ebp
+	shl rdx, 32
+	or rax, rdx
+	xor r15, rax
+
+	mov eax, ebx
+	mov edx, esi
+	shl rdx, 32
+	or rax, rdx
+	xor r13, rax
+
+	movd rsp, xmm0
+	mov DWORD PTR [rsp], ebx
+	mov DWORD PTR [rsp+4], esi
+	mov DWORD PTR [rsp+8], edi
+	mov DWORD PTR [rsp+12], ebp
+
+	movd rbx, xmm1
+	movd rsi, xmm2
+	movd rdi, xmm11
+	movd rbp, xmm12
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rax, r8
+	mul	rdi
+	mov	rdi, rcx
+	mov	r8, rax
+	movdqu	xmm1, XMMWORD PTR [rbp+rcx]
+	pxor xmm5, xmm1
+	xor	ebp, 48
+	paddq	xmm1, xmm8
+	add	r13, r8
+	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
+	pxor xmm5, xmm2
+	add	r15, rdx
+	movdqu	XMMWORD PTR [rbp+rcx], xmm1
+	paddq	xmm2, xmm4
+	xor	ebp, 16
+	mov	eax, ebp
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
+	pxor xmm5, xmm0
+	movdqu	XMMWORD PTR [rbp+rcx], xmm2
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	movd rax, xmm3
+	movdqa	xmm10, xmm8
+	mov	QWORD PTR [r10+rcx], r15
+	movdqa	xmm8, xmm5
+	xor	r15, rax
+	mov	QWORD PTR [r10+rcx+8], r13
+	mov	r8d, r15d
+	xor	r13, r9
+	and	r8d, 2097136
+	dec r11d
+	jnz	CryptonightR_template_double_mainloop
+
+CryptonightR_template_double_part4:
+
+	mov	rbx, QWORD PTR [rsp+400]
+	movaps	xmm6, XMMWORD PTR [rsp+160]
+	movaps	xmm7, XMMWORD PTR [rsp+176]
+	movaps	xmm8, XMMWORD PTR [rsp+192]
+	movaps	xmm9, XMMWORD PTR [rsp+208]
+	movaps	xmm10, XMMWORD PTR [rsp+224]
+	movaps	xmm11, XMMWORD PTR [rsp+240]
+	movaps	xmm12, XMMWORD PTR [rsp+256]
+	movaps	xmm13, XMMWORD PTR [rsp+272]
+	movaps	xmm14, XMMWORD PTR [rsp+288]
+	movaps	xmm15, XMMWORD PTR [rsp+304]
+	add	rsp, 320
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	ret	0
+CryptonightR_template_double_end:
diff --git a/src/crypto/asm/win/CryptonightWOW_soft_aes_template.inc b/src/crypto/asm/win/CryptonightWOW_soft_aes_template.inc
new file mode 100644
index 00000000..cc273781
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightWOW_soft_aes_template.inc
@@ -0,0 +1,266 @@
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1)
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2)
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3)
+PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
+
+ALIGN(64)
+FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
+	mov	QWORD PTR [rsp+8], rcx
+	push	rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 232
+
+	mov	eax, [rcx+96]
+	mov	ebx, [rcx+100]
+	mov	esi, [rcx+104]
+	mov	edx, [rcx+108]
+	mov [rsp+144], eax
+	mov [rsp+148], ebx
+	mov [rsp+152], esi
+	mov [rsp+156], edx
+
+	mov	rax, QWORD PTR [rcx+48]
+	mov	r10, rcx
+	xor	rax, QWORD PTR [rcx+16]
+	mov	r8, QWORD PTR [rcx+32]
+	xor	r8, QWORD PTR [rcx]
+	mov	r9, QWORD PTR [rcx+40]
+	xor	r9, QWORD PTR [rcx+8]
+	movd	xmm4, rax
+	mov	rdx, QWORD PTR [rcx+56]
+	xor	rdx, QWORD PTR [rcx+24]
+	mov	r11, QWORD PTR [rcx+224]
+	mov	rcx, QWORD PTR [rcx+88]
+	xor	rcx, QWORD PTR [r10+72]
+	mov	rax, QWORD PTR [r10+80]
+	movd	xmm0, rdx
+	xor	rax, QWORD PTR [r10+64]
+
+	movaps	XMMWORD PTR [rsp+16], xmm6
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+48], xmm8
+	movaps	XMMWORD PTR [rsp+64], xmm9
+	movaps	XMMWORD PTR [rsp+80], xmm10
+	movaps	XMMWORD PTR [rsp+96], xmm11
+	movaps	XMMWORD PTR [rsp+112], xmm12
+	movaps	XMMWORD PTR [rsp+128], xmm13
+
+	movd	xmm5, rax
+
+	mov	rax, r8
+	punpcklqdq xmm4, xmm0
+	and	eax, 2097136
+	movd	xmm10, QWORD PTR [r10+96]
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [r10+104]
+	xorps	xmm9, xmm9
+	mov	QWORD PTR [rsp+328], rax
+	movd	xmm12, r11
+	mov	QWORD PTR [rsp+320], r9
+	punpcklqdq xmm5, xmm0
+	movd xmm13, rcx
+	mov r12d, 524288
+
+	ALIGN(64)
+FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop):
+	movd xmm11, r12d
+	mov	r12, QWORD PTR [r10+272]
+	lea	r13, QWORD PTR [rax+r11]
+	mov	esi, DWORD PTR [r13]
+	movd	xmm0, r9
+	mov	r10d, DWORD PTR [r13+4]
+	movd	xmm7, r8
+	mov	ebp, DWORD PTR [r13+12]
+	mov	r14d, DWORD PTR [r13+8]
+	mov	rdx, QWORD PTR [rsp+328]
+	movzx	ecx, sil
+	shr	esi, 8
+	punpcklqdq xmm7, xmm0
+	mov	r15d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	mov	edi, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	ebx, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	shr	ebp, 8
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	xor	r15d, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	eax, r14d
+	shr	eax, 8
+	xor	edi, DWORD PTR [r12+rcx*4+1024]
+	add	eax, 256
+	movzx	ecx, bpl
+	shr	ebp, 8
+	xor	ebx, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, sil
+	shr	esi, 8
+	xor	r9d, DWORD PTR [r12+rcx*4+1024]
+	add	r12, 2048
+	movzx	ecx, r10b
+	shr	r10d, 8
+	add	r10d, 256
+	mov	r11d, DWORD PTR [r12+rax*4]
+	xor	r11d, DWORD PTR [r12+rcx*4]
+	xor	r11d, r9d
+	movzx	ecx, sil
+	mov	r10d, DWORD PTR [r12+r10*4]
+	shr	esi, 8
+	add	esi, 256
+	xor	r10d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	xor	r10d, ebx
+	shr	ebp, 8
+	movd	xmm1, r11d
+	add	ebp, 256
+	movd	r11, xmm12
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	xor	r9d, DWORD PTR [r12+rsi*4]
+	mov	eax, DWORD PTR [r12+rbp*4]
+	xor	r9d, edi
+	movzx	ecx, r14b
+	movd	xmm0, r10d
+	movd	xmm2, r9d
+	xor	eax, DWORD PTR [r12+rcx*4]
+	mov	rcx, rdx
+	xor	eax, r15d
+	punpckldq xmm2, xmm1
+	xor	rcx, 16
+	movd	xmm6, eax
+	mov	rax, rdx
+	punpckldq xmm6, xmm0
+	xor	rax, 32
+	punpckldq xmm6, xmm2
+	xor	rdx, 48
+	movdqu	xmm2, XMMWORD PTR [rcx+r11]
+	pxor	xmm6, xmm7
+	paddq	xmm2, xmm4
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	movdqu	xmm0, XMMWORD PTR [rdx+r11]
+	paddq	xmm0, xmm5
+	movdqu	XMMWORD PTR [rcx+r11], xmm0
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movd rcx, xmm13
+	paddq	xmm1, xmm7
+	movdqu	XMMWORD PTR [rdx+r11], xmm1
+	movd	rdi, xmm6
+	mov	r10, rdi
+	and	r10d, 2097136
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm4
+	movdqu	XMMWORD PTR [r13], xmm0
+
+	mov ebx, [rsp+144]
+	mov ebp, [rsp+152]
+	add ebx, [rsp+148]
+	add ebp, [rsp+156]
+	shl rbp, 32
+	or rbx, rbp
+
+	xor rbx, QWORD PTR [r10+r11]
+	lea	r14, QWORD PTR [r10+r11]
+	mov	rbp, QWORD PTR [r14+8]
+
+	mov [rsp+160], rbx
+	mov [rsp+168], rdi
+	mov [rsp+176], rbp
+	mov [rsp+184], r10
+	mov r10, rsp
+
+	mov ebx, [rsp+144]
+	mov esi, [rsp+148]
+	mov edi, [rsp+152]
+	mov ebp, [rsp+156]
+
+	movd esp, xmm7
+	movaps xmm0, xmm7
+	psrldq xmm0, 8
+	movd r15d, xmm0
+	movd eax, xmm4
+	movd edx, xmm5
+
+FN_PREFIX(CryptonightWOW_soft_aes_template_part2):
+	mov rsp, r10
+	mov [rsp+144], ebx
+	mov [rsp+148], esi
+	mov [rsp+152], edi
+	mov [rsp+156], ebp
+
+	mov rbx, [rsp+160]
+	mov rdi, [rsp+168]
+	mov rbp, [rsp+176]
+	mov r10, [rsp+184]
+
+	mov	r9, r10
+	xor	r9, 16
+	mov	rcx, r10
+	xor	rcx, 32
+	xor	r10, 48
+	mov	rax, rbx
+	mul	rdi
+	movdqu	xmm2, XMMWORD PTR [r9+r11]
+	movdqu	xmm1, XMMWORD PTR [rcx+r11]
+	paddq	xmm1, xmm7
+	movd	xmm0, rax
+	movd	xmm3, rdx
+	xor	rax, QWORD PTR [r11+rcx+8]
+	xor	rdx, QWORD PTR [rcx+r11]
+	punpcklqdq xmm3, xmm0
+	add	r8, rdx
+	movdqu	xmm0, XMMWORD PTR [r10+r11]
+	pxor	xmm2, xmm3
+	paddq	xmm0, xmm5
+	paddq	xmm2, xmm4
+	movdqu	XMMWORD PTR [r9+r11], xmm0
+	movdqa	xmm5, xmm4
+	mov	r9, QWORD PTR [rsp+320]
+	movdqa	xmm4, xmm6
+	add	r9, rax
+	movdqu	XMMWORD PTR [rcx+r11], xmm2
+	movdqu	XMMWORD PTR [r10+r11], xmm1
+	mov	r10, QWORD PTR [rsp+304]
+	movd r12d, xmm11
+	mov	QWORD PTR [r14], r8
+	xor	r8, rbx
+	mov	rax, r8
+	mov	QWORD PTR [r14+8], r9
+	and	eax, 2097136
+	xor	r9, rbp
+	mov	QWORD PTR [rsp+320], r9
+	mov	QWORD PTR [rsp+328], rax
+	sub	r12d, 1
+	jne	FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
+
+FN_PREFIX(CryptonightWOW_soft_aes_template_part3):
+	movaps	xmm6, XMMWORD PTR [rsp+16]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+48]
+	movaps	xmm9, XMMWORD PTR [rsp+64]
+	movaps	xmm10, XMMWORD PTR [rsp+80]
+	movaps	xmm11, XMMWORD PTR [rsp+96]
+	movaps	xmm12, XMMWORD PTR [rsp+112]
+	movaps	xmm13, XMMWORD PTR [rsp+128]
+
+	add	rsp, 232
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	pop	rbx
+	ret
+FN_PREFIX(CryptonightWOW_soft_aes_template_end):
diff --git a/src/crypto/asm/win/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/win/CryptonightWOW_soft_aes_template_win.inc
new file mode 100644
index 00000000..68209036
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightWOW_soft_aes_template_win.inc
@@ -0,0 +1,266 @@
+PUBLIC CryptonightWOW_soft_aes_template_part1
+PUBLIC CryptonightWOW_soft_aes_template_mainloop
+PUBLIC CryptonightWOW_soft_aes_template_part2
+PUBLIC CryptonightWOW_soft_aes_template_part3
+PUBLIC CryptonightWOW_soft_aes_template_end
+
+ALIGN(64)
+CryptonightWOW_soft_aes_template_part1:
+	mov	QWORD PTR [rsp+8], rcx
+	push	rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 232
+
+	mov	eax, [rcx+96]
+	mov	ebx, [rcx+100]
+	mov	esi, [rcx+104]
+	mov	edx, [rcx+108]
+	mov [rsp+144], eax
+	mov [rsp+148], ebx
+	mov [rsp+152], esi
+	mov [rsp+156], edx
+
+	mov	rax, QWORD PTR [rcx+48]
+	mov	r10, rcx
+	xor	rax, QWORD PTR [rcx+16]
+	mov	r8, QWORD PTR [rcx+32]
+	xor	r8, QWORD PTR [rcx]
+	mov	r9, QWORD PTR [rcx+40]
+	xor	r9, QWORD PTR [rcx+8]
+	movd	xmm4, rax
+	mov	rdx, QWORD PTR [rcx+56]
+	xor	rdx, QWORD PTR [rcx+24]
+	mov	r11, QWORD PTR [rcx+224]
+	mov	rcx, QWORD PTR [rcx+88]
+	xor	rcx, QWORD PTR [r10+72]
+	mov	rax, QWORD PTR [r10+80]
+	movd	xmm0, rdx
+	xor	rax, QWORD PTR [r10+64]
+
+	movaps	XMMWORD PTR [rsp+16], xmm6
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+48], xmm8
+	movaps	XMMWORD PTR [rsp+64], xmm9
+	movaps	XMMWORD PTR [rsp+80], xmm10
+	movaps	XMMWORD PTR [rsp+96], xmm11
+	movaps	XMMWORD PTR [rsp+112], xmm12
+	movaps	XMMWORD PTR [rsp+128], xmm13
+
+	movd	xmm5, rax
+
+	mov	rax, r8
+	punpcklqdq xmm4, xmm0
+	and	eax, 2097136
+	movd	xmm10, QWORD PTR [r10+96]
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [r10+104]
+	xorps	xmm9, xmm9
+	mov	QWORD PTR [rsp+328], rax
+	movd	xmm12, r11
+	mov	QWORD PTR [rsp+320], r9
+	punpcklqdq xmm5, xmm0
+	movd xmm13, rcx
+	mov r12d, 524288
+
+	ALIGN(64)
+CryptonightWOW_soft_aes_template_mainloop:
+	movd xmm11, r12d
+	mov	r12, QWORD PTR [r10+272]
+	lea	r13, QWORD PTR [rax+r11]
+	mov	esi, DWORD PTR [r13]
+	movd	xmm0, r9
+	mov	r10d, DWORD PTR [r13+4]
+	movd	xmm7, r8
+	mov	ebp, DWORD PTR [r13+12]
+	mov	r14d, DWORD PTR [r13+8]
+	mov	rdx, QWORD PTR [rsp+328]
+	movzx	ecx, sil
+	shr	esi, 8
+	punpcklqdq xmm7, xmm0
+	mov	r15d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	mov	edi, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	ebx, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	shr	ebp, 8
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, r10b
+	shr	r10d, 8
+	xor	r15d, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, r14b
+	shr	r14d, 8
+	mov	eax, r14d
+	shr	eax, 8
+	xor	edi, DWORD PTR [r12+rcx*4+1024]
+	add	eax, 256
+	movzx	ecx, bpl
+	shr	ebp, 8
+	xor	ebx, DWORD PTR [r12+rcx*4+1024]
+	movzx	ecx, sil
+	shr	esi, 8
+	xor	r9d, DWORD PTR [r12+rcx*4+1024]
+	add	r12, 2048
+	movzx	ecx, r10b
+	shr	r10d, 8
+	add	r10d, 256
+	mov	r11d, DWORD PTR [r12+rax*4]
+	xor	r11d, DWORD PTR [r12+rcx*4]
+	xor	r11d, r9d
+	movzx	ecx, sil
+	mov	r10d, DWORD PTR [r12+r10*4]
+	shr	esi, 8
+	add	esi, 256
+	xor	r10d, DWORD PTR [r12+rcx*4]
+	movzx	ecx, bpl
+	xor	r10d, ebx
+	shr	ebp, 8
+	movd	xmm1, r11d
+	add	ebp, 256
+	movd	r11, xmm12
+	mov	r9d, DWORD PTR [r12+rcx*4]
+	xor	r9d, DWORD PTR [r12+rsi*4]
+	mov	eax, DWORD PTR [r12+rbp*4]
+	xor	r9d, edi
+	movzx	ecx, r14b
+	movd	xmm0, r10d
+	movd	xmm2, r9d
+	xor	eax, DWORD PTR [r12+rcx*4]
+	mov	rcx, rdx
+	xor	eax, r15d
+	punpckldq xmm2, xmm1
+	xor	rcx, 16
+	movd	xmm6, eax
+	mov	rax, rdx
+	punpckldq xmm6, xmm0
+	xor	rax, 32
+	punpckldq xmm6, xmm2
+	xor	rdx, 48
+	movdqu	xmm2, XMMWORD PTR [rcx+r11]
+	pxor	xmm6, xmm7
+	paddq	xmm2, xmm4
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	movdqu	xmm0, XMMWORD PTR [rdx+r11]
+	paddq	xmm0, xmm5
+	movdqu	XMMWORD PTR [rcx+r11], xmm0
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movd rcx, xmm13
+	paddq	xmm1, xmm7
+	movdqu	XMMWORD PTR [rdx+r11], xmm1
+	movd	rdi, xmm6
+	mov	r10, rdi
+	and	r10d, 2097136
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm4
+	movdqu	XMMWORD PTR [r13], xmm0
+
+	mov ebx, [rsp+144]
+	mov ebp, [rsp+152]
+	add ebx, [rsp+148]
+	add ebp, [rsp+156]
+	shl rbp, 32
+	or rbx, rbp
+
+	xor rbx, QWORD PTR [r10+r11]
+	lea	r14, QWORD PTR [r10+r11]
+	mov	rbp, QWORD PTR [r14+8]
+
+	mov [rsp+160], rbx
+	mov [rsp+168], rdi
+	mov [rsp+176], rbp
+	mov [rsp+184], r10
+	mov r10, rsp
+
+	mov ebx, [rsp+144]
+	mov esi, [rsp+148]
+	mov edi, [rsp+152]
+	mov ebp, [rsp+156]
+
+	movd esp, xmm7
+	movaps xmm0, xmm7
+	psrldq xmm0, 8
+	movd r15d, xmm0
+	movd eax, xmm4
+	movd edx, xmm5
+
+CryptonightWOW_soft_aes_template_part2:
+	mov rsp, r10
+	mov [rsp+144], ebx
+	mov [rsp+148], esi
+	mov [rsp+152], edi
+	mov [rsp+156], ebp
+
+	mov rbx, [rsp+160]
+	mov rdi, [rsp+168]
+	mov rbp, [rsp+176]
+	mov r10, [rsp+184]
+
+	mov	r9, r10
+	xor	r9, 16
+	mov	rcx, r10
+	xor	rcx, 32
+	xor	r10, 48
+	mov	rax, rbx
+	mul	rdi
+	movdqu	xmm2, XMMWORD PTR [r9+r11]
+	movdqu	xmm1, XMMWORD PTR [rcx+r11]
+	paddq	xmm1, xmm7
+	movd	xmm0, rax
+	movd	xmm3, rdx
+	xor	rax, QWORD PTR [r11+rcx+8]
+	xor	rdx, QWORD PTR [rcx+r11]
+	punpcklqdq xmm3, xmm0
+	add	r8, rdx
+	movdqu	xmm0, XMMWORD PTR [r10+r11]
+	pxor	xmm2, xmm3
+	paddq	xmm0, xmm5
+	paddq	xmm2, xmm4
+	movdqu	XMMWORD PTR [r9+r11], xmm0
+	movdqa	xmm5, xmm4
+	mov	r9, QWORD PTR [rsp+320]
+	movdqa	xmm4, xmm6
+	add	r9, rax
+	movdqu	XMMWORD PTR [rcx+r11], xmm2
+	movdqu	XMMWORD PTR [r10+r11], xmm1
+	mov	r10, QWORD PTR [rsp+304]
+	movd r12d, xmm11
+	mov	QWORD PTR [r14], r8
+	xor	r8, rbx
+	mov	rax, r8
+	mov	QWORD PTR [r14+8], r9
+	and	eax, 2097136
+	xor	r9, rbp
+	mov	QWORD PTR [rsp+320], r9
+	mov	QWORD PTR [rsp+328], rax
+	sub	r12d, 1
+	jne	CryptonightWOW_soft_aes_template_mainloop
+
+CryptonightWOW_soft_aes_template_part3:
+	movaps	xmm6, XMMWORD PTR [rsp+16]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+48]
+	movaps	xmm9, XMMWORD PTR [rsp+64]
+	movaps	xmm10, XMMWORD PTR [rsp+80]
+	movaps	xmm11, XMMWORD PTR [rsp+96]
+	movaps	xmm12, XMMWORD PTR [rsp+112]
+	movaps	xmm13, XMMWORD PTR [rsp+128]
+
+	add	rsp, 232
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	pop	rbx
+	ret
+CryptonightWOW_soft_aes_template_end:
diff --git a/src/crypto/asm/win/CryptonightWOW_template.inc b/src/crypto/asm/win/CryptonightWOW_template.inc
new file mode 100644
index 00000000..47fbc94f
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightWOW_template.inc
@@ -0,0 +1,486 @@
+PUBLIC FN_PREFIX(CryptonightWOW_template_part1)
+PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop)
+PUBLIC FN_PREFIX(CryptonightWOW_template_part2)
+PUBLIC FN_PREFIX(CryptonightWOW_template_part3)
+PUBLIC FN_PREFIX(CryptonightWOW_template_end)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4)
+PUBLIC FN_PREFIX(CryptonightWOW_template_double_end)
+
+ALIGN(64)
+FN_PREFIX(CryptonightWOW_template_part1):
+	mov	QWORD PTR [rsp+16], rbx
+	mov	QWORD PTR [rsp+24], rbp
+	mov	QWORD PTR [rsp+32], rsi
+	push	r10
+	push	r11
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	push	rdi
+	sub	rsp, 64
+	mov	r12, rcx
+	mov	r8, QWORD PTR [r12+32]
+	mov	rdx, r12
+	xor	r8, QWORD PTR [r12]
+	mov	r15, QWORD PTR [r12+40]
+	mov	r9, r8
+	xor	r15, QWORD PTR [r12+8]
+	mov	r11, QWORD PTR [r12+224]
+	mov	r12, QWORD PTR [r12+56]
+	xor	r12, QWORD PTR [rdx+24]
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	movaps	XMMWORD PTR [rsp+48], xmm6
+	movd	xmm0, r12
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+16], xmm8
+	movaps	XMMWORD PTR [rsp], xmm9
+	mov	r12, QWORD PTR [rdx+88]
+	xor	r12, QWORD PTR [rdx+72]
+	movd	xmm6, rax
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm6, xmm0
+	and	r9d, 2097136
+	movd	xmm0, r12
+	movd	xmm7, rax
+	punpcklqdq xmm7, xmm0
+	mov r10d, r9d
+	movd	xmm9, rsp
+	mov rsp, r8
+	mov	r8d, 524288
+
+	mov	ebx, [rdx+96]
+	mov	esi, [rdx+100]
+	mov	edi, [rdx+104]
+	mov	ebp, [rdx+108]
+
+	ALIGN(64)
+FN_PREFIX(CryptonightWOW_template_mainloop):
+	movdqa	xmm5, XMMWORD PTR [r9+r11]
+	movd	xmm0, r15
+	movd	xmm4, rsp
+	punpcklqdq xmm4, xmm0
+	lea	rdx, QWORD PTR [r9+r11]
+
+	aesenc	xmm5, xmm4
+	movd	r10d, xmm5
+	and	r10d, 2097136
+
+	mov	r12d, r9d
+	mov	eax, r9d
+	xor	r9d, 48
+	xor	r12d, 16
+	xor	eax, 32
+	movdqu	xmm0, XMMWORD PTR [r9+r11]
+	movdqu	xmm2, XMMWORD PTR [r12+r11]
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	paddq	xmm0, xmm7
+	paddq	xmm2, xmm6
+	paddq	xmm1, xmm4
+	movdqu	XMMWORD PTR [r12+r11], xmm0
+	movd	r12, xmm5
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movdqu	XMMWORD PTR [r9+r11], xmm1
+
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm6
+	movdqu	XMMWORD PTR [rdx], xmm0
+
+	lea	r13d, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	r13, rdx
+
+	xor	r13, QWORD PTR [r10+r11]
+	mov	r14, QWORD PTR [r10+r11+8]
+
+	movd eax, xmm6
+	movd edx, xmm7
+	pextrd r9d, xmm7, 2
+
+FN_PREFIX(CryptonightWOW_template_part2):
+	mov	rax, r13
+	mul	r12
+	movd	xmm0, rax
+	movd	xmm3, rdx
+	punpcklqdq xmm3, xmm0
+
+	mov	r9d, r10d
+	mov	r12d, r10d
+	xor	r9d, 16
+	xor	r12d, 32
+	xor	r10d, 48
+	movdqa	xmm1, XMMWORD PTR [r12+r11]
+	xor	rdx, QWORD PTR [r12+r11]
+	xor	rax, QWORD PTR [r11+r12+8]
+	movdqa	xmm2, XMMWORD PTR [r9+r11]
+	pxor	xmm3, xmm2
+	paddq	xmm7, XMMWORD PTR [r10+r11]
+	paddq	xmm1, xmm4
+	paddq	xmm3, xmm6
+	movdqu	XMMWORD PTR [r9+r11], xmm7
+	movdqu	XMMWORD PTR [r12+r11], xmm3
+	movdqu	XMMWORD PTR [r10+r11], xmm1
+
+	movdqa	xmm7, xmm6
+	add	r15, rax
+	add	rsp, rdx
+	xor	r10, 48
+	mov	QWORD PTR [r10+r11], rsp
+	xor	rsp, r13
+	mov	r9d, esp
+	mov	QWORD PTR [r10+r11+8], r15
+	and	r9d, 2097136
+	xor	r15, r14
+	movdqa	xmm6, xmm5
+	dec	r8d
+	jnz	FN_PREFIX(CryptonightWOW_template_mainloop)
+
+FN_PREFIX(CryptonightWOW_template_part3):
+	movd	rsp, xmm9
+
+	mov	rbx, QWORD PTR [rsp+136]
+	mov	rbp, QWORD PTR [rsp+144]
+	mov	rsi, QWORD PTR [rsp+152]
+	movaps	xmm6, XMMWORD PTR [rsp+48]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+16]
+	movaps	xmm9, XMMWORD PTR [rsp]
+	add	rsp, 64
+	pop	rdi
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	r11
+	pop	r10
+	ret	0
+FN_PREFIX(CryptonightWOW_template_end):
+
+ALIGN(64)
+FN_PREFIX(CryptonightWOW_template_double_part1):
+	mov	QWORD PTR [rsp+24], rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 320
+	mov	r14, QWORD PTR [rcx+32]
+	mov	r8, rcx
+	xor	r14, QWORD PTR [rcx]
+	mov	r12, QWORD PTR [rcx+40]
+	mov	ebx, r14d
+	mov	rsi, QWORD PTR [rcx+224]
+	and	ebx, 2097136
+	xor	r12, QWORD PTR [rcx+8]
+	mov	rcx, QWORD PTR [rcx+56]
+	xor	rcx, QWORD PTR [r8+24]
+	mov	rax, QWORD PTR [r8+48]
+	xor	rax, QWORD PTR [r8+16]
+	mov	r15, QWORD PTR [rdx+32]
+	xor	r15, QWORD PTR [rdx]
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [r8+88]
+	xor	rcx, QWORD PTR [r8+72]
+	mov	r13, QWORD PTR [rdx+40]
+	mov	rdi, QWORD PTR [rdx+224]
+	xor	r13, QWORD PTR [rdx+8]
+	movaps	XMMWORD PTR [rsp+160], xmm6
+	movaps	XMMWORD PTR [rsp+176], xmm7
+	movaps	XMMWORD PTR [rsp+192], xmm8
+	movaps	XMMWORD PTR [rsp+208], xmm9
+	movaps	XMMWORD PTR [rsp+224], xmm10
+	movaps	XMMWORD PTR [rsp+240], xmm11
+	movaps	XMMWORD PTR [rsp+256], xmm12
+	movaps	XMMWORD PTR [rsp+272], xmm13
+	movaps	XMMWORD PTR [rsp+288], xmm14
+	movaps	XMMWORD PTR [rsp+304], xmm15
+	movd	xmm7, rax
+	mov	rax, QWORD PTR [r8+80]
+	xor	rax, QWORD PTR [r8+64]
+
+	movaps xmm1, XMMWORD PTR [rdx+96]
+	movaps xmm2, XMMWORD PTR [r8+96]
+	movaps XMMWORD PTR [rsp], xmm1
+	movaps XMMWORD PTR [rsp+16], xmm2
+
+	mov	r8d, r15d
+	punpcklqdq xmm7, xmm0
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+56]
+	xor	rcx, QWORD PTR [rdx+24]
+	movd	xmm9, rax
+	mov	QWORD PTR [rsp+128], rsi
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	punpcklqdq xmm9, xmm0
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+88]
+	xor	rcx, QWORD PTR [rdx+72]
+	movd	xmm8, rax
+	mov	QWORD PTR [rsp+136], rdi
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm8, xmm0
+	and	r8d, 2097136
+	movd	xmm0, rcx
+	mov	r11d, 524288
+	movd	xmm10, rax
+	punpcklqdq xmm10, xmm0
+	
+	movd xmm14, QWORD PTR [rsp+128]
+	movd xmm15, QWORD PTR [rsp+136]
+
+	ALIGN(64)
+FN_PREFIX(CryptonightWOW_template_double_mainloop):
+	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
+	movd	xmm0, r12
+	mov	ecx, ebx
+	movd	xmm3, r14
+	punpcklqdq xmm3, xmm0
+	xor	ebx, 16
+	aesenc	xmm6, xmm3
+	movd	rdx, xmm6
+	movd	xmm4, r15
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	xor	ebx, 48
+	paddq	xmm0, xmm7
+	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
+	movdqu	XMMWORD PTR [rbx+rsi], xmm0
+	paddq	xmm1, xmm3
+	xor	ebx, 16
+	mov	eax, ebx
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	movdqu	XMMWORD PTR [rbx+rsi], xmm1
+	paddq	xmm0, xmm9
+	movdqu	XMMWORD PTR [rax+rsi], xmm0
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm7
+	movdqu	XMMWORD PTR [rcx+rsi], xmm0
+	mov	esi, edx
+	movdqu	xmm5, XMMWORD PTR [r8+rdi]
+	and	esi, 2097136
+	mov	ecx, r8d
+	movd	xmm0, r13
+	punpcklqdq xmm4, xmm0
+	xor	r8d, 16
+	aesenc	xmm5, xmm4
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	xor	r8d, 48
+	paddq	xmm0, xmm8
+	movdqu	xmm1, XMMWORD PTR [r8+rdi]
+	movdqu	XMMWORD PTR [r8+rdi], xmm0
+	paddq	xmm1, xmm4
+	xor	r8d, 16
+	mov	eax, r8d
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	movdqu	XMMWORD PTR [r8+rdi], xmm1
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rdi], xmm0
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm8
+	movdqu	XMMWORD PTR [rcx+rdi], xmm0
+	movd	rdi, xmm5
+	movd	rcx, xmm14
+	mov	ebp, edi
+	mov	r8, QWORD PTR [rcx+rsi]
+	mov	r10, QWORD PTR [rcx+rsi+8]
+	lea	r9, QWORD PTR [rcx+rsi]
+	xor	esi, 16
+
+	movd xmm0, rsp
+	movd xmm1, rsi
+	movd xmm2, rdi
+	movd xmm11, rbp
+	movd xmm12, r15
+	movd xmm13, rdx
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp+16]
+	mov esi, DWORD PTR [rsp+20]
+	mov edi, DWORD PTR [rsp+24]
+	mov ebp, DWORD PTR [rsp+28]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+	xor r8, rax
+
+	movd esp, xmm3
+	pextrd r15d, xmm3, 2
+	movd eax, xmm7
+	movd edx, xmm9
+	pextrd r9d, xmm9, 2
+
+FN_PREFIX(CryptonightWOW_template_double_part2):
+
+	movd rsp, xmm0
+	mov DWORD PTR [rsp+16], ebx
+	mov DWORD PTR [rsp+20], esi
+	mov DWORD PTR [rsp+24], edi
+	mov DWORD PTR [rsp+28], ebp
+
+	movd rsi, xmm1
+	movd rdi, xmm2
+	movd rbp, xmm11
+	movd r15, xmm12
+	movd rdx, xmm13
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rbx, r8
+	mov	rax, r8
+	mul	rdx
+	and	ebp, 2097136
+	mov	r8, rax
+	movd	xmm1, rdx
+	movd	xmm0, r8
+	punpcklqdq xmm1, xmm0
+	pxor	xmm1, XMMWORD PTR [rcx+rsi]
+	xor	esi, 48
+	paddq	xmm1, xmm7
+	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
+	xor	rdx, QWORD PTR [rsi+rcx]
+	paddq	xmm2, xmm3
+	xor	r8, QWORD PTR [rsi+rcx+8]
+	movdqu	XMMWORD PTR [rsi+rcx], xmm1
+	xor	esi, 16
+	mov	eax, esi
+	mov	rsi, rcx
+	movdqu	xmm0, XMMWORD PTR [rax+rcx]
+	movdqu	XMMWORD PTR [rax+rcx], xmm2
+	paddq	xmm0, xmm9
+	add	r12, r8
+	xor	rax, 32
+	add	r14, rdx
+	movdqa	xmm9, xmm7
+	movdqa	xmm7, xmm6
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	mov	QWORD PTR [r9+8], r12
+	xor	r12, r10
+	mov	QWORD PTR [r9], r14
+	movd rcx, xmm15
+	xor	r14, rbx
+	mov	r10d, ebp
+	mov	ebx, r14d
+	xor	ebp, 16
+	and	ebx, 2097136
+	mov	r8, QWORD PTR [r10+rcx]
+	mov	r9, QWORD PTR [r10+rcx+8]
+
+	movd xmm0, rsp
+	movd xmm1, rbx
+	movd xmm2, rsi
+	movd xmm11, rdi
+	movd xmm12, rbp
+	movd xmm13, r15
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp]
+	mov esi, DWORD PTR [rsp+4]
+	mov edi, DWORD PTR [rsp+8]
+	mov ebp, DWORD PTR [rsp+12]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+
+	xor r8, rax
+	movd xmm3, r8
+
+	movd esp, xmm4
+	pextrd r15d, xmm4, 2
+	movd eax, xmm8
+	movd edx, xmm10
+	pextrd r9d, xmm10, 2
+
+FN_PREFIX(CryptonightWOW_template_double_part3):
+
+	movd rsp, xmm0
+	mov DWORD PTR [rsp], ebx
+	mov DWORD PTR [rsp+4], esi
+	mov DWORD PTR [rsp+8], edi
+	mov DWORD PTR [rsp+12], ebp
+
+	movd rbx, xmm1
+	movd rsi, xmm2
+	movd rdi, xmm11
+	movd rbp, xmm12
+	movd r15, xmm13
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rax, r8
+	mul	rdi
+	movd	xmm1, rdx
+	movd	xmm0, rax
+	punpcklqdq xmm1, xmm0
+	mov	rdi, rcx
+	mov	r8, rax
+	pxor	xmm1, XMMWORD PTR [rbp+rcx]
+	xor	ebp, 48
+	paddq	xmm1, xmm8
+	xor	r8, QWORD PTR [rbp+rcx+8]
+	xor	rdx, QWORD PTR [rbp+rcx]
+	add	r13, r8
+	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
+	add	r15, rdx
+	movdqu	XMMWORD PTR [rbp+rcx], xmm1
+	paddq	xmm2, xmm4
+	xor	ebp, 16
+	mov	eax, ebp
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
+	movdqu	XMMWORD PTR [rbp+rcx], xmm2
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	movd rax, xmm3
+	movdqa	xmm10, xmm8
+	mov	QWORD PTR [r10+rcx], r15
+	movdqa	xmm8, xmm5
+	xor	r15, rax
+	mov	QWORD PTR [r10+rcx+8], r13
+	mov	r8d, r15d
+	xor	r13, r9
+	and	r8d, 2097136
+	dec r11d
+	jnz	FN_PREFIX(CryptonightWOW_template_double_mainloop)
+
+FN_PREFIX(CryptonightWOW_template_double_part4):
+
+	mov	rbx, QWORD PTR [rsp+400]
+	movaps	xmm6, XMMWORD PTR [rsp+160]
+	movaps	xmm7, XMMWORD PTR [rsp+176]
+	movaps	xmm8, XMMWORD PTR [rsp+192]
+	movaps	xmm9, XMMWORD PTR [rsp+208]
+	movaps	xmm10, XMMWORD PTR [rsp+224]
+	movaps	xmm11, XMMWORD PTR [rsp+240]
+	movaps	xmm12, XMMWORD PTR [rsp+256]
+	movaps	xmm13, XMMWORD PTR [rsp+272]
+	movaps	xmm14, XMMWORD PTR [rsp+288]
+	movaps	xmm15, XMMWORD PTR [rsp+304]
+	add	rsp, 320
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	ret	0
+FN_PREFIX(CryptonightWOW_template_double_end):
diff --git a/src/crypto/asm/win/CryptonightWOW_template_win.inc b/src/crypto/asm/win/CryptonightWOW_template_win.inc
new file mode 100644
index 00000000..9db2cf39
--- /dev/null
+++ b/src/crypto/asm/win/CryptonightWOW_template_win.inc
@@ -0,0 +1,486 @@
+PUBLIC CryptonightWOW_template_part1
+PUBLIC CryptonightWOW_template_mainloop
+PUBLIC CryptonightWOW_template_part2
+PUBLIC CryptonightWOW_template_part3
+PUBLIC CryptonightWOW_template_end
+PUBLIC CryptonightWOW_template_double_part1
+PUBLIC CryptonightWOW_template_double_mainloop
+PUBLIC CryptonightWOW_template_double_part2
+PUBLIC CryptonightWOW_template_double_part3
+PUBLIC CryptonightWOW_template_double_part4
+PUBLIC CryptonightWOW_template_double_end
+
+ALIGN(64)
+CryptonightWOW_template_part1:
+	mov	QWORD PTR [rsp+16], rbx
+	mov	QWORD PTR [rsp+24], rbp
+	mov	QWORD PTR [rsp+32], rsi
+	push	r10
+	push	r11
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	push	rdi
+	sub	rsp, 64
+	mov	r12, rcx
+	mov	r8, QWORD PTR [r12+32]
+	mov	rdx, r12
+	xor	r8, QWORD PTR [r12]
+	mov	r15, QWORD PTR [r12+40]
+	mov	r9, r8
+	xor	r15, QWORD PTR [r12+8]
+	mov	r11, QWORD PTR [r12+224]
+	mov	r12, QWORD PTR [r12+56]
+	xor	r12, QWORD PTR [rdx+24]
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	movaps	XMMWORD PTR [rsp+48], xmm6
+	movd	xmm0, r12
+	movaps	XMMWORD PTR [rsp+32], xmm7
+	movaps	XMMWORD PTR [rsp+16], xmm8
+	movaps	XMMWORD PTR [rsp], xmm9
+	mov	r12, QWORD PTR [rdx+88]
+	xor	r12, QWORD PTR [rdx+72]
+	movd	xmm6, rax
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm6, xmm0
+	and	r9d, 2097136
+	movd	xmm0, r12
+	movd	xmm7, rax
+	punpcklqdq xmm7, xmm0
+	mov r10d, r9d
+	movd	xmm9, rsp
+	mov rsp, r8
+	mov	r8d, 524288
+
+	mov	ebx, [rdx+96]
+	mov	esi, [rdx+100]
+	mov	edi, [rdx+104]
+	mov	ebp, [rdx+108]
+
+	ALIGN(64)
+CryptonightWOW_template_mainloop:
+	movdqa	xmm5, XMMWORD PTR [r9+r11]
+	movd	xmm0, r15
+	movd	xmm4, rsp
+	punpcklqdq xmm4, xmm0
+	lea	rdx, QWORD PTR [r9+r11]
+
+	aesenc	xmm5, xmm4
+	movd	r10d, xmm5
+	and	r10d, 2097136
+
+	mov	r12d, r9d
+	mov	eax, r9d
+	xor	r9d, 48
+	xor	r12d, 16
+	xor	eax, 32
+	movdqu	xmm0, XMMWORD PTR [r9+r11]
+	movdqu	xmm2, XMMWORD PTR [r12+r11]
+	movdqu	xmm1, XMMWORD PTR [rax+r11]
+	paddq	xmm0, xmm7
+	paddq	xmm2, xmm6
+	paddq	xmm1, xmm4
+	movdqu	XMMWORD PTR [r12+r11], xmm0
+	movd	r12, xmm5
+	movdqu	XMMWORD PTR [rax+r11], xmm2
+	movdqu	XMMWORD PTR [r9+r11], xmm1
+
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm6
+	movdqu	XMMWORD PTR [rdx], xmm0
+
+	lea	r13d, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	r13, rdx
+
+	xor	r13, QWORD PTR [r10+r11]
+	mov	r14, QWORD PTR [r10+r11+8]
+
+	movd eax, xmm6
+	movd edx, xmm7
+	pextrd r9d, xmm7, 2
+
+CryptonightWOW_template_part2:
+	mov	rax, r13
+	mul	r12
+	movd	xmm0, rax
+	movd	xmm3, rdx
+	punpcklqdq xmm3, xmm0
+
+	mov	r9d, r10d
+	mov	r12d, r10d
+	xor	r9d, 16
+	xor	r12d, 32
+	xor	r10d, 48
+	movdqa	xmm1, XMMWORD PTR [r12+r11]
+	xor	rdx, QWORD PTR [r12+r11]
+	xor	rax, QWORD PTR [r11+r12+8]
+	movdqa	xmm2, XMMWORD PTR [r9+r11]
+	pxor	xmm3, xmm2
+	paddq	xmm7, XMMWORD PTR [r10+r11]
+	paddq	xmm1, xmm4
+	paddq	xmm3, xmm6
+	movdqu	XMMWORD PTR [r9+r11], xmm7
+	movdqu	XMMWORD PTR [r12+r11], xmm3
+	movdqu	XMMWORD PTR [r10+r11], xmm1
+
+	movdqa	xmm7, xmm6
+	add	r15, rax
+	add	rsp, rdx
+	xor	r10, 48
+	mov	QWORD PTR [r10+r11], rsp
+	xor	rsp, r13
+	mov	r9d, esp
+	mov	QWORD PTR [r10+r11+8], r15
+	and	r9d, 2097136
+	xor	r15, r14
+	movdqa	xmm6, xmm5
+	dec	r8d
+	jnz	CryptonightWOW_template_mainloop
+
+CryptonightWOW_template_part3:
+	movd	rsp, xmm9
+
+	mov	rbx, QWORD PTR [rsp+136]
+	mov	rbp, QWORD PTR [rsp+144]
+	mov	rsi, QWORD PTR [rsp+152]
+	movaps	xmm6, XMMWORD PTR [rsp+48]
+	movaps	xmm7, XMMWORD PTR [rsp+32]
+	movaps	xmm8, XMMWORD PTR [rsp+16]
+	movaps	xmm9, XMMWORD PTR [rsp]
+	add	rsp, 64
+	pop	rdi
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	r11
+	pop	r10
+	ret	0
+CryptonightWOW_template_end:
+
+ALIGN(64)
+CryptonightWOW_template_double_part1:
+	mov	QWORD PTR [rsp+24], rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 320
+	mov	r14, QWORD PTR [rcx+32]
+	mov	r8, rcx
+	xor	r14, QWORD PTR [rcx]
+	mov	r12, QWORD PTR [rcx+40]
+	mov	ebx, r14d
+	mov	rsi, QWORD PTR [rcx+224]
+	and	ebx, 2097136
+	xor	r12, QWORD PTR [rcx+8]
+	mov	rcx, QWORD PTR [rcx+56]
+	xor	rcx, QWORD PTR [r8+24]
+	mov	rax, QWORD PTR [r8+48]
+	xor	rax, QWORD PTR [r8+16]
+	mov	r15, QWORD PTR [rdx+32]
+	xor	r15, QWORD PTR [rdx]
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [r8+88]
+	xor	rcx, QWORD PTR [r8+72]
+	mov	r13, QWORD PTR [rdx+40]
+	mov	rdi, QWORD PTR [rdx+224]
+	xor	r13, QWORD PTR [rdx+8]
+	movaps	XMMWORD PTR [rsp+160], xmm6
+	movaps	XMMWORD PTR [rsp+176], xmm7
+	movaps	XMMWORD PTR [rsp+192], xmm8
+	movaps	XMMWORD PTR [rsp+208], xmm9
+	movaps	XMMWORD PTR [rsp+224], xmm10
+	movaps	XMMWORD PTR [rsp+240], xmm11
+	movaps	XMMWORD PTR [rsp+256], xmm12
+	movaps	XMMWORD PTR [rsp+272], xmm13
+	movaps	XMMWORD PTR [rsp+288], xmm14
+	movaps	XMMWORD PTR [rsp+304], xmm15
+	movd	xmm7, rax
+	mov	rax, QWORD PTR [r8+80]
+	xor	rax, QWORD PTR [r8+64]
+
+	movaps xmm1, XMMWORD PTR [rdx+96]
+	movaps xmm2, XMMWORD PTR [r8+96]
+	movaps XMMWORD PTR [rsp], xmm1
+	movaps XMMWORD PTR [rsp+16], xmm2
+
+	mov	r8d, r15d
+	punpcklqdq xmm7, xmm0
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+56]
+	xor	rcx, QWORD PTR [rdx+24]
+	movd	xmm9, rax
+	mov	QWORD PTR [rsp+128], rsi
+	mov	rax, QWORD PTR [rdx+48]
+	xor	rax, QWORD PTR [rdx+16]
+	punpcklqdq xmm9, xmm0
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [rdx+88]
+	xor	rcx, QWORD PTR [rdx+72]
+	movd	xmm8, rax
+	mov	QWORD PTR [rsp+136], rdi
+	mov	rax, QWORD PTR [rdx+80]
+	xor	rax, QWORD PTR [rdx+64]
+	punpcklqdq xmm8, xmm0
+	and	r8d, 2097136
+	movd	xmm0, rcx
+	mov	r11d, 524288
+	movd	xmm10, rax
+	punpcklqdq xmm10, xmm0
+	
+	movd xmm14, QWORD PTR [rsp+128]
+	movd xmm15, QWORD PTR [rsp+136]
+
+	ALIGN(64)
+CryptonightWOW_template_double_mainloop:
+	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
+	movd	xmm0, r12
+	mov	ecx, ebx
+	movd	xmm3, r14
+	punpcklqdq xmm3, xmm0
+	xor	ebx, 16
+	aesenc	xmm6, xmm3
+	movd	rdx, xmm6
+	movd	xmm4, r15
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	xor	ebx, 48
+	paddq	xmm0, xmm7
+	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
+	movdqu	XMMWORD PTR [rbx+rsi], xmm0
+	paddq	xmm1, xmm3
+	xor	ebx, 16
+	mov	eax, ebx
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
+	movdqu	XMMWORD PTR [rbx+rsi], xmm1
+	paddq	xmm0, xmm9
+	movdqu	XMMWORD PTR [rax+rsi], xmm0
+	movdqa	xmm0, xmm6
+	pxor	xmm0, xmm7
+	movdqu	XMMWORD PTR [rcx+rsi], xmm0
+	mov	esi, edx
+	movdqu	xmm5, XMMWORD PTR [r8+rdi]
+	and	esi, 2097136
+	mov	ecx, r8d
+	movd	xmm0, r13
+	punpcklqdq xmm4, xmm0
+	xor	r8d, 16
+	aesenc	xmm5, xmm4
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	xor	r8d, 48
+	paddq	xmm0, xmm8
+	movdqu	xmm1, XMMWORD PTR [r8+rdi]
+	movdqu	XMMWORD PTR [r8+rdi], xmm0
+	paddq	xmm1, xmm4
+	xor	r8d, 16
+	mov	eax, r8d
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [r8+rdi]
+	movdqu	XMMWORD PTR [r8+rdi], xmm1
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rdi], xmm0
+	movdqa	xmm0, xmm5
+	pxor	xmm0, xmm8
+	movdqu	XMMWORD PTR [rcx+rdi], xmm0
+	movd	rdi, xmm5
+	movd	rcx, xmm14
+	mov	ebp, edi
+	mov	r8, QWORD PTR [rcx+rsi]
+	mov	r10, QWORD PTR [rcx+rsi+8]
+	lea	r9, QWORD PTR [rcx+rsi]
+	xor	esi, 16
+
+	movd xmm0, rsp
+	movd xmm1, rsi
+	movd xmm2, rdi
+	movd xmm11, rbp
+	movd xmm12, r15
+	movd xmm13, rdx
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp+16]
+	mov esi, DWORD PTR [rsp+20]
+	mov edi, DWORD PTR [rsp+24]
+	mov ebp, DWORD PTR [rsp+28]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+	xor r8, rax
+
+	movd esp, xmm3
+	pextrd r15d, xmm3, 2
+	movd eax, xmm7
+	movd edx, xmm9
+	pextrd r9d, xmm9, 2
+
+CryptonightWOW_template_double_part2:
+
+	movd rsp, xmm0
+	mov DWORD PTR [rsp+16], ebx
+	mov DWORD PTR [rsp+20], esi
+	mov DWORD PTR [rsp+24], edi
+	mov DWORD PTR [rsp+28], ebp
+
+	movd rsi, xmm1
+	movd rdi, xmm2
+	movd rbp, xmm11
+	movd r15, xmm12
+	movd rdx, xmm13
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rbx, r8
+	mov	rax, r8
+	mul	rdx
+	and	ebp, 2097136
+	mov	r8, rax
+	movd	xmm1, rdx
+	movd	xmm0, r8
+	punpcklqdq xmm1, xmm0
+	pxor	xmm1, XMMWORD PTR [rcx+rsi]
+	xor	esi, 48
+	paddq	xmm1, xmm7
+	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
+	xor	rdx, QWORD PTR [rsi+rcx]
+	paddq	xmm2, xmm3
+	xor	r8, QWORD PTR [rsi+rcx+8]
+	movdqu	XMMWORD PTR [rsi+rcx], xmm1
+	xor	esi, 16
+	mov	eax, esi
+	mov	rsi, rcx
+	movdqu	xmm0, XMMWORD PTR [rax+rcx]
+	movdqu	XMMWORD PTR [rax+rcx], xmm2
+	paddq	xmm0, xmm9
+	add	r12, r8
+	xor	rax, 32
+	add	r14, rdx
+	movdqa	xmm9, xmm7
+	movdqa	xmm7, xmm6
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	mov	QWORD PTR [r9+8], r12
+	xor	r12, r10
+	mov	QWORD PTR [r9], r14
+	movd rcx, xmm15
+	xor	r14, rbx
+	mov	r10d, ebp
+	mov	ebx, r14d
+	xor	ebp, 16
+	and	ebx, 2097136
+	mov	r8, QWORD PTR [r10+rcx]
+	mov	r9, QWORD PTR [r10+rcx+8]
+
+	movd xmm0, rsp
+	movd xmm1, rbx
+	movd xmm2, rsi
+	movd xmm11, rdi
+	movd xmm12, rbp
+	movd xmm13, r15
+	mov [rsp+104], rcx
+	mov [rsp+112], r9
+
+	mov ebx, DWORD PTR [rsp]
+	mov esi, DWORD PTR [rsp+4]
+	mov edi, DWORD PTR [rsp+8]
+	mov ebp, DWORD PTR [rsp+12]
+
+	lea	eax, [ebx+esi]
+	lea	edx, [edi+ebp]
+	shl rdx, 32
+	or	rax, rdx
+
+	xor r8, rax
+	movd xmm3, r8
+
+	movd esp, xmm4
+	pextrd r15d, xmm4, 2
+	movd eax, xmm8
+	movd edx, xmm10
+	pextrd r9d, xmm10, 2
+
+CryptonightWOW_template_double_part3:
+
+	movd rsp, xmm0
+	mov DWORD PTR [rsp], ebx
+	mov DWORD PTR [rsp+4], esi
+	mov DWORD PTR [rsp+8], edi
+	mov DWORD PTR [rsp+12], ebp
+
+	movd rbx, xmm1
+	movd rsi, xmm2
+	movd rdi, xmm11
+	movd rbp, xmm12
+	movd r15, xmm13
+	mov rcx, [rsp+104]
+	mov r9, [rsp+112]
+
+	mov rax, r8
+	mul	rdi
+	movd	xmm1, rdx
+	movd	xmm0, rax
+	punpcklqdq xmm1, xmm0
+	mov	rdi, rcx
+	mov	r8, rax
+	pxor	xmm1, XMMWORD PTR [rbp+rcx]
+	xor	ebp, 48
+	paddq	xmm1, xmm8
+	xor	r8, QWORD PTR [rbp+rcx+8]
+	xor	rdx, QWORD PTR [rbp+rcx]
+	add	r13, r8
+	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
+	add	r15, rdx
+	movdqu	XMMWORD PTR [rbp+rcx], xmm1
+	paddq	xmm2, xmm4
+	xor	ebp, 16
+	mov	eax, ebp
+	xor	rax, 32
+	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
+	movdqu	XMMWORD PTR [rbp+rcx], xmm2
+	paddq	xmm0, xmm10
+	movdqu	XMMWORD PTR [rax+rcx], xmm0
+	movd rax, xmm3
+	movdqa	xmm10, xmm8
+	mov	QWORD PTR [r10+rcx], r15
+	movdqa	xmm8, xmm5
+	xor	r15, rax
+	mov	QWORD PTR [r10+rcx+8], r13
+	mov	r8d, r15d
+	xor	r13, r9
+	and	r8d, 2097136
+	dec r11d
+	jnz	CryptonightWOW_template_double_mainloop
+
+CryptonightWOW_template_double_part4:
+
+	mov	rbx, QWORD PTR [rsp+400]
+	movaps	xmm6, XMMWORD PTR [rsp+160]
+	movaps	xmm7, XMMWORD PTR [rsp+176]
+	movaps	xmm8, XMMWORD PTR [rsp+192]
+	movaps	xmm9, XMMWORD PTR [rsp+208]
+	movaps	xmm10, XMMWORD PTR [rsp+224]
+	movaps	xmm11, XMMWORD PTR [rsp+240]
+	movaps	xmm12, XMMWORD PTR [rsp+256]
+	movaps	xmm13, XMMWORD PTR [rsp+272]
+	movaps	xmm14, XMMWORD PTR [rsp+288]
+	movaps	xmm15, XMMWORD PTR [rsp+304]
+	add	rsp, 320
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	ret	0
+CryptonightWOW_template_double_end:
diff --git a/src/crypto/asm/win/cn_main_loop.asm b/src/crypto/asm/win/cn_main_loop.asm
index a23addd9..e62d1124 100644
--- a/src/crypto/asm/win/cn_main_loop.asm
+++ b/src/crypto/asm/win/cn_main_loop.asm
@@ -21,6 +21,19 @@ PUBLIC cnv2_main_loop_ultralite_ryzen_asm
 PUBLIC cnv2_main_loop_ultralite_bulldozer_asm
 PUBLIC cnv2_double_main_loop_ultralite_sandybridge_asm
 
+PUBLIC cnv2_main_loop_xcash_ivybridge_asm
+PUBLIC cnv2_main_loop_xcash_ryzen_asm
+PUBLIC cnv2_main_loop_xcash_bulldozer_asm
+PUBLIC cnv2_double_main_loop_xcash_sandybridge_asm
+
+PUBLIC cnv2_main_loop_zelerius_ivybridge_asm
+PUBLIC cnv2_main_loop_zelerius_ryzen_asm
+PUBLIC cnv2_main_loop_zelerius_bulldozer_asm
+PUBLIC cnv2_double_main_loop_zelerius_sandybridge_asm
+
+PUBLIC cnv2_main_loop_rwz_all_asm
+PUBLIC cnv2_double_main_loop_rwz_all_asm
+
 PUBLIC cnv1_main_loop_soft_aes_sandybridge_asm
 PUBLIC cnv1_main_loop_lite_soft_aes_sandybridge_asm
 PUBLIC cnv1_main_loop_fast_soft_aes_sandybridge_asm
@@ -30,6 +43,8 @@ PUBLIC cnv1_main_loop_rto_soft_aes_sandybridge_asm
 PUBLIC cnv2_main_loop_soft_aes_sandybridge_asm
 PUBLIC cnv2_main_loop_fastv2_soft_aes_sandybridge_asm
 PUBLIC cnv2_main_loop_ultralite_soft_aes_sandybridge_asm
+PUBLIC cnv2_main_loop_xcash_soft_aes_sandybridge_asm
+PUBLIC cnv2_main_loop_zelerius_soft_aes_sandybridge_asm
 
 ALIGN 64
 cnv1_main_loop_sandybridge_asm PROC
@@ -133,6 +148,66 @@ cnv2_double_main_loop_ultralite_sandybridge_asm PROC
 	ret 0
 cnv2_double_main_loop_ultralite_sandybridge_asm ENDP
 
+ALIGN 64
+cnv2_main_loop_xcash_ivybridge_asm PROC
+	INCLUDE cnv2_main_loop_xcash_ivybridge.inc
+	ret 0
+cnv2_main_loop_xcash_ivybridge_asm ENDP
+
+ALIGN 64
+cnv2_main_loop_xcash_ryzen_asm PROC
+	INCLUDE cnv2_main_loop_xcash_ryzen.inc
+	ret 0
+cnv2_main_loop_xcash_ryzen_asm ENDP
+
+ALIGN 64
+cnv2_main_loop_xcash_bulldozer_asm PROC
+	INCLUDE cnv2_main_loop_xcash_bulldozer.inc
+	ret 0
+cnv2_main_loop_xcash_bulldozer_asm ENDP
+
+ALIGN 64
+cnv2_double_main_loop_xcash_sandybridge_asm PROC
+	INCLUDE cnv2_double_main_loop_xcash_sandybridge.inc
+	ret 0
+cnv2_double_main_loop_xcash_sandybridge_asm ENDP
+
+ALIGN 64
+cnv2_main_loop_zelerius_ivybridge_asm PROC
+	INCLUDE cnv2_main_loop_zelerius_ivybridge.inc
+	ret 0
+cnv2_main_loop_zelerius_ivybridge_asm ENDP
+
+ALIGN 64
+cnv2_main_loop_zelerius_ryzen_asm PROC
+	INCLUDE cnv2_main_loop_zelerius_ryzen.inc
+	ret 0
+cnv2_main_loop_zelerius_ryzen_asm ENDP
+
+ALIGN 64
+cnv2_main_loop_zelerius_bulldozer_asm PROC
+	INCLUDE cnv2_main_loop_zelerius_bulldozer.inc
+	ret 0
+cnv2_main_loop_zelerius_bulldozer_asm ENDP
+
+ALIGN 64
+cnv2_double_main_loop_zelerius_sandybridge_asm PROC
+	INCLUDE cnv2_double_main_loop_zelerius_sandybridge.inc
+	ret 0
+cnv2_double_main_loop_zelerius_sandybridge_asm ENDP
+
+ALIGN 64
+cnv2_main_loop_rwz_all_asm PROC
+	INCLUDE cnv2_main_loop_rwz_all.inc
+	ret 0
+cnv2_main_loop_rwz_all_asm ENDP
+
+ALIGN 64
+cnv2_double_main_loop_rwz_all_asm PROC
+	INCLUDE cnv2_double_main_loop_rwz_all.inc
+	ret 0
+cnv2_double_main_loop_rwz_all_asm ENDP
+
 ALIGN 64
 cnv1_main_loop_soft_aes_sandybridge_asm PROC
 	INCLUDE cnv1_main_loop_soft_aes_sandybridge.inc
@@ -181,5 +256,17 @@ cnv2_main_loop_ultralite_soft_aes_sandybridge_asm PROC
 	ret 0
 cnv2_main_loop_ultralite_soft_aes_sandybridge_asm ENDP
 
+ALIGN 64
+cnv2_main_loop_xcash_soft_aes_sandybridge_asm PROC
+	INCLUDE cnv2_main_loop_xcash_soft_aes_sandybridge.inc
+	ret 0
+cnv2_main_loop_xcash_soft_aes_sandybridge_asm ENDP
+
+ALIGN 64
+cnv2_main_loop_zelerius_soft_aes_sandybridge_asm PROC
+	INCLUDE cnv2_main_loop_zelerius_soft_aes_sandybridge.inc
+	ret 0
+cnv2_main_loop_zelerius_soft_aes_sandybridge_asm ENDP
+
 _TEXT_CN_MAINLOOP ENDS
 END
\ No newline at end of file
diff --git a/src/crypto/asm/win/cn_main_loop_win_gcc.S b/src/crypto/asm/win/cn_main_loop_win_gcc.S
index 7bf3c668..ace49b54 100644
--- a/src/crypto/asm/win/cn_main_loop_win_gcc.S
+++ b/src/crypto/asm/win/cn_main_loop_win_gcc.S
@@ -24,6 +24,19 @@
 .global FN_PREFIX(cnv2_main_loop_ultralite_bulldozer_asm)
 .global FN_PREFIX(cnv2_double_main_loop_ultralite_sandybridge_asm)
 
+.global FN_PREFIX(cnv2_main_loop_xcash_ivybridge_asm)
+.global FN_PREFIX(cnv2_main_loop_xcash_ryzen_asm)
+.global FN_PREFIX(cnv2_main_loop_xcash_bulldozer_asm)
+.global FN_PREFIX(cnv2_double_main_loop_xcash_sandybridge_asm)
+
+.global FN_PREFIX(cnv2_main_loop_zelerius_ivybridge_asm)
+.global FN_PREFIX(cnv2_main_loop_zelerius_ryzen_asm)
+.global FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm)
+.global FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm)
+
+.global FN_PREFIX(cnv2_main_loop_rwz_allr_asm)
+.global FN_PREFIX(cnv2_double_main_loop_rwz_all_asm)
+
 .global FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm)
 .global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm)
 .global FN_PREFIX(cnv1_main_loop_fast_soft_aes_sandybridge_asm)
@@ -33,6 +46,8 @@
 .global FN_PREFIX(cnv2_main_loop_soft_aes_sandybridge_asm)
 .global FN_PREFIX(cnv2_main_loop_fastv2_soft_aes_sandybridge_asm)
 .global FN_PREFIX(cnv2_main_loop_ultralite_soft_aes_sandybridge_asm)
+.global FN_PREFIX(cnv2_main_loop_xcash_soft_aes_sandybridge_asm)
+.global FN_PREFIX(cnv2_main_loop_zelerius_soft_aes_sandybridge_asm)
 
 ALIGN 64
 FN_PREFIX(cnv1_main_loop_sandybridge_asm):
@@ -119,6 +134,56 @@ FN_PREFIX(cnv2_double_main_loop_ultralite_sandybridge_asm):
 	#include "../cnv2_double_main_loop_ultralite_sandybridge.inc"
 	ret 0
 
+ALIGN 64
+FN_PREFIX(cnv2_main_loop_xcash_ivybridge_asm):
+	#include "../cnv2_main_loop_xcash_ivybridge.inc"
+	ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_main_loop_xcash_ryzen_asm):
+	#include "../cnv2_main_loop_xcash_ryzen.inc"
+	ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_main_loop_xcash_bulldozer_asm):
+	#include "../cnv2_main_loop_xcash_bulldozer.inc"
+	ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_double_main_loop_xcash_sandybridge_asm):
+	#include "../cnv2_double_main_loop_xcash_sandybridge.inc"
+	ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_main_loop_zelerius_ivybridge_asm):
+	#include "../cnv2_main_loop_zelerius_ivybridge.inc"
+	ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_main_loop_zelerius_ryzen_asm):
+	#include "../cnv2_main_loop_zelerius_ryzen.inc"
+	ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm):
+	#include "../cnv2_main_loop_zelerius_bulldozer.inc"
+	ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm):
+	#include "../cnv2_double_main_loop_zelerius_sandybridge.inc"
+	ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_main_loop_rwz_allr_asm):
+	#include "../cnv2_main_loop_rwz_allr.inc"
+	ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_double_main_loop_rwz_all_asm):
+	#include "../cnv2_double_main_loop_rwz_all.inc"
+	ret 0
+
 ALIGN 64
 FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm):
 	#include "../cnv1_main_loop_soft_aes_sandybridge.inc"
@@ -157,4 +222,14 @@ FN_PREFIX(cnv2_main_loop_fastv2_soft_aes_sandybridge_asm):
 ALIGN 64
 FN_PREFIX(cnv2_main_loop_ultralite_soft_aes_sandybridge_asm):
     #include "../cnv2_main_loop_ultralite_soft_aes_sandybridge.inc"
+    ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_main_loop_xcash_soft_aes_sandybridge_asm):
+    #include "../cnv2_main_loop_xcash_soft_aes_sandybridge.inc"
+    ret 0
+
+ALIGN 64
+FN_PREFIX(cnv2_main_loop_zelerius_soft_aes_sandybridge_asm):
+    #include "../cnv2_main_loop_zelerius_soft_aes_sandybridge.inc"
     ret 0
\ No newline at end of file
diff --git a/src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc b/src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc
new file mode 100644
index 00000000..69ca8793
--- /dev/null
+++ b/src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc
@@ -0,0 +1,410 @@
+	mov	rax, rsp
+	push	rbx
+	push	rbp
+	push	rsi
+	push	rdi
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	sub	rsp, 184
+
+	stmxcsr DWORD PTR [rsp+272]
+	mov DWORD PTR [rsp+276], 24448
+	ldmxcsr DWORD PTR [rsp+276]
+
+	mov	r13, QWORD PTR [rcx+224]
+	mov	r9, rdx
+	mov	r10, QWORD PTR [rcx+32]
+	mov	r8, rcx
+	xor	r10, QWORD PTR [rcx]
+	mov	r14d, 393216
+	mov	r11, QWORD PTR [rcx+40]
+	xor	r11, QWORD PTR [rcx+8]
+	mov	rsi, QWORD PTR [rdx+224]
+	mov	rdx, QWORD PTR [rcx+56]
+	xor	rdx, QWORD PTR [rcx+24]
+	mov	rdi, QWORD PTR [r9+32]
+	xor	rdi, QWORD PTR [r9]
+	mov	rbp, QWORD PTR [r9+40]
+	xor	rbp, QWORD PTR [r9+8]
+	movd	xmm0, rdx
+	movaps	XMMWORD PTR [rax-88], xmm6
+	movaps	XMMWORD PTR [rax-104], xmm7
+	movaps	XMMWORD PTR [rax-120], xmm8
+	movaps	XMMWORD PTR [rsp+112], xmm9
+	movaps	XMMWORD PTR [rsp+96], xmm10
+	movaps	XMMWORD PTR [rsp+80], xmm11
+	movaps	XMMWORD PTR [rsp+64], xmm12
+	movaps	XMMWORD PTR [rsp+48], xmm13
+	movaps	XMMWORD PTR [rsp+32], xmm14
+	movaps	XMMWORD PTR [rsp+16], xmm15
+	mov	rdx, r10
+	movd	xmm4, QWORD PTR [r8+96]
+	and	edx, 2097136
+	mov	rax, QWORD PTR [rcx+48]
+	xorps	xmm13, xmm13
+	xor	rax, QWORD PTR [rcx+16]
+	mov	rcx, QWORD PTR [rcx+88]
+	xor	rcx, QWORD PTR [r8+72]
+	movd	xmm5, QWORD PTR [r8+104]
+	movd	xmm7, rax
+
+	mov eax, 1
+	shl rax, 52
+	movd xmm14, rax
+	punpcklqdq xmm14, xmm14
+
+	mov eax, 1023
+	shl rax, 52
+	movd xmm12, rax
+	punpcklqdq xmm12, xmm12
+
+	mov	rax, QWORD PTR [r8+80]
+	xor	rax, QWORD PTR [r8+64]
+	punpcklqdq xmm7, xmm0
+	movd	xmm0, rcx
+	mov	rcx, QWORD PTR [r9+56]
+	xor	rcx, QWORD PTR [r9+24]
+	movd	xmm3, rax
+	mov	rax, QWORD PTR [r9+48]
+	xor	rax, QWORD PTR [r9+16]
+	punpcklqdq xmm3, xmm0
+	movd	xmm0, rcx
+	mov	QWORD PTR [rsp], r13
+	mov	rcx, QWORD PTR [r9+88]
+	xor	rcx, QWORD PTR [r9+72]
+	movd	xmm6, rax
+	mov	rax, QWORD PTR [r9+80]
+	xor	rax, QWORD PTR [r9+64]
+	punpcklqdq xmm6, xmm0
+	movd	xmm0, rcx
+	mov	QWORD PTR [rsp+256], r10
+	mov	rcx, rdi
+	mov	QWORD PTR [rsp+264], r11
+	movd	xmm8, rax
+	and	ecx, 2097136
+	punpcklqdq xmm8, xmm0
+	movd	xmm0, QWORD PTR [r9+96]
+	punpcklqdq xmm4, xmm0
+	movd	xmm0, QWORD PTR [r9+104]
+	lea	r8, QWORD PTR [rcx+rsi]
+	movdqu	xmm11, XMMWORD PTR [r8]
+	punpcklqdq xmm5, xmm0
+	lea	r9, QWORD PTR [rdx+r13]
+	movdqu	xmm15, XMMWORD PTR [r9]
+
+	ALIGN(64)
+rwz_main_loop_double:
+	movdqu	xmm9, xmm15
+	mov eax, edx
+	mov ebx, edx
+	xor eax, 16
+	xor ebx, 32
+	xor edx, 48
+
+	movd	xmm0, r11
+	movd	xmm2, r10
+	punpcklqdq xmm2, xmm0
+	aesenc	xmm9, xmm2
+
+	movdqu	xmm0, XMMWORD PTR [rdx+r13]
+	movdqu	xmm1, XMMWORD PTR [rbx+r13]
+	paddq	xmm0, xmm7
+	paddq	xmm1, xmm2
+	movdqu	XMMWORD PTR [rbx+r13], xmm0
+	movdqu	xmm0, XMMWORD PTR [rax+r13]
+	movdqu	XMMWORD PTR [rdx+r13], xmm1
+	paddq	xmm0, xmm3
+	movdqu	XMMWORD PTR [rax+r13], xmm0
+
+	movd	r11, xmm9
+	mov	edx, r11d
+	and	edx, 2097136
+	movdqa	xmm0, xmm9
+	pxor	xmm0, xmm7
+	movdqu	XMMWORD PTR [r9], xmm0
+
+	lea	rbx, QWORD PTR [rdx+r13]
+	mov	r10, QWORD PTR [rdx+r13]
+
+	movdqu	xmm10, xmm11
+	movd	xmm0, rbp
+	movd	xmm11, rdi
+	punpcklqdq xmm11, xmm0
+	aesenc	xmm10, xmm11
+
+	mov eax, ecx
+	mov r12d, ecx
+	xor eax, 16
+	xor r12d, 32
+	xor ecx, 48
+
+	movdqu	xmm0, XMMWORD PTR [rcx+rsi]
+	paddq	xmm0, xmm6
+	movdqu	xmm1, XMMWORD PTR [r12+rsi]
+	movdqu	XMMWORD PTR [r12+rsi], xmm0
+	paddq	xmm1, xmm11
+	movdqu	xmm0, XMMWORD PTR [rax+rsi]
+	movdqu	XMMWORD PTR [rcx+rsi], xmm1
+	paddq	xmm0, xmm8
+	movdqu	XMMWORD PTR [rax+rsi], xmm0
+
+	movd	rcx, xmm10
+	and	ecx, 2097136
+
+	movdqa	xmm0, xmm10
+	pxor	xmm0, xmm6
+	movdqu	XMMWORD PTR [r8], xmm0
+	mov r12, QWORD PTR [rcx+rsi]
+
+	mov	r9, QWORD PTR [rbx+8]
+
+	xor edx, 16
+	mov r8d, edx
+	mov r15d, edx
+
+	movd	rdx, xmm5
+	shl	rdx, 32
+	movd	rax, xmm4
+	xor	rdx, rax
+	xor	r10, rdx
+	mov	rax, r10
+	mul	r11
+	mov r11d, r8d
+	xor r11d, 48
+	movd xmm0, rdx
+	xor rdx, [r11+r13]
+	movd xmm1, rax
+	xor rax, [r11+r13+8]
+	punpcklqdq xmm0, xmm1
+
+	pxor xmm0, XMMWORD PTR [r8+r13]
+	movdqu	xmm1, XMMWORD PTR [r11+r13]
+	paddq	xmm0, xmm3
+	paddq	xmm1, xmm2
+	movdqu	XMMWORD PTR [r8+r13], xmm0
+	xor	r8d, 32
+	movdqu	xmm0, XMMWORD PTR [r8+r13]
+	movdqu	XMMWORD PTR [r8+r13], xmm1
+	paddq	xmm0, xmm7
+	movdqu	XMMWORD PTR [r11+r13], xmm0
+
+	mov	r11, QWORD PTR [rsp+256]
+	add	r11, rdx
+	mov	rdx, QWORD PTR [rsp+264]
+	add	rdx, rax
+	mov	QWORD PTR [rbx], r11
+	xor	r11, r10
+	mov	QWORD PTR [rbx+8], rdx
+	xor	rdx, r9
+	mov	QWORD PTR [rsp+256], r11
+	and	r11d, 2097136
+	mov	QWORD PTR [rsp+264], rdx
+	mov	QWORD PTR [rsp+8], r11
+	lea	r15, QWORD PTR [r11+r13]
+	movdqu xmm15, XMMWORD PTR [r11+r13]
+	lea	r13, QWORD PTR [rsi+rcx]
+	movdqa	xmm0, xmm5
+	psrldq	xmm0, 8
+	movaps	xmm2, xmm13
+	movd	r10, xmm0
+	psllq	xmm5, 1
+	shl	r10, 32
+	movdqa	xmm0, xmm9
+	psrldq	xmm0, 8
+	movdqa	xmm1, xmm10
+	movd	r11, xmm0
+	psrldq	xmm1, 8
+	movd	r8, xmm1
+	psrldq	xmm4, 8
+	movaps	xmm0, xmm13
+	movd	rax, xmm4
+	xor	r10, rax
+	movaps	xmm1, xmm13
+	xor	r10, r12
+	lea	rax, QWORD PTR [r11+1]
+	shr	rax, 1
+	movdqa	xmm3, xmm9
+	punpcklqdq xmm3, xmm10
+	paddq	xmm5, xmm3
+	movd	rdx, xmm5
+	psrldq	xmm5, 8
+	cvtsi2sd xmm2, rax
+	or	edx, -2147483647
+	lea	rax, QWORD PTR [r8+1]
+	shr	rax, 1
+	movd	r9, xmm5
+	cvtsi2sd xmm0, rax
+	or	r9d, -2147483647
+	cvtsi2sd xmm1, rdx
+	unpcklpd xmm2, xmm0
+	movaps	xmm0, xmm13
+	cvtsi2sd xmm0, r9
+	unpcklpd xmm1, xmm0
+	divpd	xmm2, xmm1
+	paddq	xmm2, xmm14
+	cvttsd2si rax, xmm2
+	psrldq	xmm2, 8
+	mov	rbx, rax
+	imul	rax, rdx
+	sub	r11, rax
+	js	rwz_div_fix_1
+rwz_div_fix_1_ret:
+
+	cvttsd2si rdx, xmm2
+	mov	rax, rdx
+	imul	rax, r9
+	movd	xmm2, r11d
+	movd	xmm4, ebx
+	sub	r8, rax
+	js	rwz_div_fix_2
+rwz_div_fix_2_ret:
+
+	movd	xmm1, r8d
+	movd	xmm0, edx
+	punpckldq xmm2, xmm1
+	punpckldq xmm4, xmm0
+	punpckldq xmm4, xmm2
+	paddq	xmm3, xmm4
+	movdqa	xmm0, xmm3
+	psrlq	xmm0, 12
+	paddq	xmm0, xmm12
+	sqrtpd	xmm1, xmm0
+	movd	r9, xmm1
+	movdqa xmm5, xmm1
+	psrlq xmm5, 19
+	test	r9, 524287
+	je	rwz_sqrt_fix_1
+rwz_sqrt_fix_1_ret:
+
+	movd r9, xmm10
+	psrldq	xmm1, 8
+	movd	r8, xmm1
+	test	r8, 524287
+	je	rwz_sqrt_fix_2
+rwz_sqrt_fix_2_ret:
+
+	mov r12d, ecx
+	mov r8d, ecx
+	xor r12d, 16
+	xor r8d, 32
+	xor ecx, 48
+	mov	rax, r10
+	mul	r9
+	movd xmm0, rax
+	movd xmm3, rdx
+	punpcklqdq xmm3, xmm0
+
+	movdqu	xmm0, XMMWORD PTR [r12+rsi]
+	pxor xmm0, xmm3
+	movdqu	xmm1, XMMWORD PTR [r8+rsi]
+	xor rdx, [r8+rsi]
+	xor rax, [r8+rsi+8]
+	movdqu	xmm3, XMMWORD PTR [rcx+rsi]
+	paddq	xmm3, xmm6
+	paddq	xmm1, xmm11
+	paddq	xmm0, xmm8
+	movdqu	XMMWORD PTR [r8+rsi], xmm3
+	movdqu	XMMWORD PTR [rcx+rsi], xmm1
+	movdqu	XMMWORD PTR [r12+rsi], xmm0
+
+	add	rdi, rdx
+	mov	QWORD PTR [r13], rdi
+	xor	rdi, r10
+	mov	ecx, edi
+	and	ecx, 2097136
+	lea	r8, QWORD PTR [rcx+rsi]
+
+	mov rdx, QWORD PTR [r13+8]	
+	add	rbp, rax
+	mov	QWORD PTR [r13+8], rbp
+	movdqu xmm11, XMMWORD PTR [rcx+rsi]
+	xor	rbp, rdx
+	mov	r13, QWORD PTR [rsp]
+	movdqa	xmm3, xmm7
+	mov	rdx, QWORD PTR [rsp+8]
+	movdqa	xmm8, xmm6
+	mov	r10, QWORD PTR [rsp+256]
+	movdqa	xmm7, xmm9
+	mov	r11, QWORD PTR [rsp+264]
+	movdqa	xmm6, xmm10
+	mov	r9, r15
+	dec r14d
+	jne	rwz_main_loop_double
+
+	ldmxcsr DWORD PTR [rsp+272]
+	movaps	xmm13, XMMWORD PTR [rsp+48]
+	lea	r11, QWORD PTR [rsp+184]
+	movaps	xmm6, XMMWORD PTR [r11-24]
+	movaps	xmm7, XMMWORD PTR [r11-40]
+	movaps	xmm8, XMMWORD PTR [r11-56]
+	movaps	xmm9, XMMWORD PTR [r11-72]
+	movaps	xmm10, XMMWORD PTR [r11-88]
+	movaps	xmm11, XMMWORD PTR [r11-104]
+	movaps	xmm12, XMMWORD PTR [r11-120]
+	movaps	xmm14, XMMWORD PTR [rsp+32]
+	movaps	xmm15, XMMWORD PTR [rsp+16]
+	mov	rsp, r11
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rdi
+	pop	rsi
+	pop	rbp
+	pop	rbx
+	jmp rwz_cnv2_double_mainloop_asm_endp
+
+rwz_div_fix_1:
+	dec	rbx
+	add	r11, rdx
+	jmp	rwz_div_fix_1_ret
+
+rwz_div_fix_2:
+	dec	rdx
+	add	r8, r9
+	jmp	rwz_div_fix_2_ret
+
+rwz_sqrt_fix_1:
+	movd	r8, xmm3
+	movdqa xmm0, xmm5
+	psrldq xmm0, 8
+	dec	r9
+	mov r11d, -1022
+	shl r11, 32
+	mov	rax, r9
+	shr	r9, 19
+	shr	rax, 20
+	mov	rdx, r9
+	sub	rdx, rax
+	lea	rdx, [rdx+r11+1]
+	add	rax, r11
+	imul	rdx, rax
+	sub	rdx, r8
+	adc	r9, 0
+	movd xmm5, r9
+	punpcklqdq xmm5, xmm0
+	jmp	rwz_sqrt_fix_1_ret
+
+rwz_sqrt_fix_2:
+	psrldq	xmm3, 8
+	movd	r11, xmm3
+	dec	r8
+	mov ebx, -1022
+	shl rbx, 32
+	mov	rax, r8
+	shr	r8, 19
+	shr	rax, 20
+	mov	rdx, r8
+	sub	rdx, rax
+	lea	rdx, [rdx+rbx+1]
+	add	rax, rbx
+	imul	rdx, rax
+	sub	rdx, r11
+	adc	r8, 0
+	movd xmm0, r8
+	punpcklqdq xmm5, xmm0
+	jmp	rwz_sqrt_fix_2_ret
+
+rwz_cnv2_double_mainloop_asm_endp:
diff --git a/src/crypto/asm/win/cnv2_main_loop_rwz_all.inc b/src/crypto/asm/win/cnv2_main_loop_rwz_all.inc
new file mode 100644
index 00000000..99317730
--- /dev/null
+++ b/src/crypto/asm/win/cnv2_main_loop_rwz_all.inc
@@ -0,0 +1,186 @@
+	mov	 QWORD PTR [rsp+24], rbx
+	push	 rbp
+	push	 rsi
+	push	 rdi
+	push	 r12
+	push	 r13
+	push	 r14
+	push	 r15
+	sub	 rsp, 80
+
+	stmxcsr DWORD PTR [rsp]
+	mov DWORD PTR [rsp+4], 24448
+	ldmxcsr DWORD PTR [rsp+4]
+
+	mov	 rax, QWORD PTR [rcx+48]
+	mov	 r9, rcx
+	xor	 rax, QWORD PTR [rcx+16]
+	mov	 esi, 393216
+	mov	 r8, QWORD PTR [rcx+32]
+	mov	 r13d, -2147483647
+	xor	 r8, QWORD PTR [rcx]
+	mov	 r11, QWORD PTR [rcx+40]
+	mov	 r10, r8
+	mov	 rdx, QWORD PTR [rcx+56]
+	movd	 xmm4, rax
+	xor	 rdx, QWORD PTR [rcx+24]
+	xor	 r11, QWORD PTR [rcx+8]
+	mov	 rbx, QWORD PTR [rcx+224]
+	mov	 rax, QWORD PTR [r9+80]
+	xor	 rax, QWORD PTR [r9+64]
+	movd	 xmm0, rdx
+	mov	 rcx, QWORD PTR [rcx+88]
+	xor	 rcx, QWORD PTR [r9+72]
+	movd	 xmm3, QWORD PTR [r9+104]
+	movaps	 XMMWORD PTR [rsp+64], xmm6
+	movaps	 XMMWORD PTR [rsp+48], xmm7
+	movaps	 XMMWORD PTR [rsp+32], xmm8
+	and	 r10d, 2097136
+	movd	 xmm5, rax
+
+	xor eax, eax
+	mov QWORD PTR [rsp+16], rax
+
+	mov ax, 1023
+	shl rax, 52
+	movd xmm8, rax
+	mov r15, QWORD PTR [r9+96]
+	punpcklqdq xmm4, xmm0
+	movd	 xmm0, rcx
+	punpcklqdq xmm5, xmm0
+	movdqu	 xmm6, XMMWORD PTR [r10+rbx]
+
+	ALIGN(64)
+rwz_main_loop:
+	lea	 rdx, QWORD PTR [r10+rbx]
+	mov	 ecx, r10d
+	mov	 eax, r10d
+	mov rdi, r15
+	xor	 ecx, 16
+	xor	 eax, 32
+	xor	 r10d, 48
+	movd	 xmm0, r11
+	movd	 xmm7, r8
+	punpcklqdq xmm7, xmm0
+	aesenc	 xmm6, xmm7
+	movd	 rbp, xmm6
+	mov	 r9, rbp
+	and	 r9d, 2097136
+	movdqu	 xmm0, XMMWORD PTR [rcx+rbx]
+	movdqu	 xmm1, XMMWORD PTR [rax+rbx]
+	movdqu	 xmm2, XMMWORD PTR [r10+rbx]
+	paddq	 xmm0, xmm5
+	paddq	 xmm1, xmm7
+	paddq	 xmm2, xmm4
+	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
+	movdqu	 XMMWORD PTR [rax+rbx], xmm2
+	movdqu	 XMMWORD PTR [r10+rbx], xmm1
+	mov r10, r9
+	xor r10d, 32
+	movd	 rcx, xmm3
+	mov	 rax, rcx
+	shl	 rax, 32
+	xor	 rdi, rax
+	movdqa	 xmm0, xmm6
+	pxor	 xmm0, xmm4
+	movdqu	 XMMWORD PTR [rdx], xmm0
+	xor	 rdi, QWORD PTR [r9+rbx]
+	lea	 r14, QWORD PTR [r9+rbx]
+	mov	 r12, QWORD PTR [r14+8]
+	xor	 edx, edx
+	lea	 r9d, DWORD PTR [ecx+ecx]
+	add	 r9d, ebp
+	movdqa	 xmm0, xmm6
+	psrldq	 xmm0, 8
+	or	 r9d, r13d
+	movd	 rax, xmm0
+	div	 r9
+	xorps xmm3, xmm3
+	mov	 eax, eax
+	shl	 rdx, 32
+	add	 rdx, rax
+	lea	 r9, QWORD PTR [rdx+rbp]
+	mov r15, rdx
+	mov	 rax, r9
+	shr	 rax, 12
+	movd	 xmm0, rax
+	paddq	 xmm0, xmm8
+	sqrtsd	 xmm3, xmm0
+	psubq	 xmm3, XMMWORD PTR [rsp+16]
+	movd	 rdx, xmm3
+	test	 edx, 524287
+	je	 rwz_sqrt_fixup
+	psrlq	 xmm3, 19
+rwz_sqrt_fixup_ret:
+
+	mov	 ecx, r10d
+	mov	 rax, rdi
+	mul	 rbp
+	movd xmm2, rdx
+	xor rdx, [rcx+rbx]
+	add	 r8, rdx
+	mov	 QWORD PTR [r14], r8
+	xor	 r8, rdi
+	mov edi, r8d
+	and edi, 2097136
+	movd xmm0, rax
+	xor rax, [rcx+rbx+8]
+	add	 r11, rax
+	mov	 QWORD PTR [r14+8], r11
+	punpcklqdq xmm2, xmm0
+
+	mov	 r9d, r10d
+	xor	 r9d, 48
+	xor	 r10d, 16
+	pxor	 xmm2, XMMWORD PTR [r9+rbx]
+	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
+	paddq	 xmm0, xmm4
+	movdqu	 xmm1, XMMWORD PTR [rcx+rbx]
+	paddq	 xmm2, xmm5
+	paddq	 xmm1, xmm7
+	movdqa	 xmm5, xmm4
+	movdqu	 XMMWORD PTR [r9+rbx], xmm2
+	movdqa	 xmm4, xmm6
+	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
+	movdqu	 XMMWORD PTR [r10+rbx], xmm1
+	movdqu xmm6, [rdi+rbx]
+	mov	 r10d, edi
+	xor	 r11, r12
+	dec rsi
+	jne	 rwz_main_loop
+
+	ldmxcsr DWORD PTR [rsp]
+	mov	 rbx, QWORD PTR [rsp+160]
+	movaps	 xmm6, XMMWORD PTR [rsp+64]
+	movaps	 xmm7, XMMWORD PTR [rsp+48]
+	movaps	 xmm8, XMMWORD PTR [rsp+32]
+	add	 rsp, 80
+	pop	 r15
+	pop	 r14
+	pop	 r13
+	pop	 r12
+	pop	 rdi
+	pop	 rsi
+	pop	 rbp
+	jmp cnv2_rwz_main_loop_endp
+
+rwz_sqrt_fixup:
+	dec	 rdx
+	mov r13d, -1022
+	shl r13, 32
+	mov	 rax, rdx
+	shr	 rdx, 19
+	shr	 rax, 20
+	mov	 rcx, rdx
+	sub	 rcx, rax
+	add	 rax, r13
+	not r13
+	sub	 rcx, r13
+	mov	 r13d, -2147483647
+	imul	 rcx, rax
+	sub	 rcx, r9
+	adc	 rdx, 0
+	movd	 xmm3, rdx
+	jmp	 rwz_sqrt_fixup_ret
+
+cnv2_rwz_main_loop_endp:
diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h
new file mode 100644
index 00000000..3bbcdc5b
--- /dev/null
+++ b/src/crypto/variant4_random_math.h
@@ -0,0 +1,447 @@
+#ifndef VARIANT4_RANDOM_MATH_H
+#define VARIANT4_RANDOM_MATH_H
+
+extern "C"
+{
+#include "c_blake256.h"
+}
+
+enum V4_Settings
+{
+    // Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications
+            TOTAL_LATENCY = 15 * 3,
+
+    // Always generate at least 60 instructions
+            NUM_INSTRUCTIONS_MIN = 60,
+
+    // Never generate more than 70 instructions (final RET instruction doesn't count here)
+            NUM_INSTRUCTIONS_MAX = 70,
+
+    // Available ALUs for MUL
+    // Modern CPUs typically have only 1 ALU which can do multiplications
+            ALU_COUNT_MUL = 1,
+
+    // Total available ALUs
+    // Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code
+            ALU_COUNT = 3,
+};
+
+enum V4_InstructionList
+{
+    MUL,	// a*b
+    ADD,	// a+b + C, C is an unsigned 32-bit constant
+    SUB,	// a-b
+    ROR,	// rotate right "a" by "b & 31" bits
+    ROL,	// rotate left "a" by "b & 31" bits
+    XOR,	// a^b
+    RET,	// finish execution
+    V4_INSTRUCTION_COUNT = RET,
+};
+
+// V4_InstructionDefinition is used to generate code from random data
+// Every random sequence of bytes is a valid code
+//
+// There are 9 registers in total:
+// - 4 variable registers
+// - 5 constant registers initialized from loop variables
+// This is why dst_index is 2 bits
+enum V4_InstructionDefinition
+{
+    V4_OPCODE_BITS = 3,
+    V4_DST_INDEX_BITS = 2,
+    V4_SRC_INDEX_BITS = 3,
+};
+
+struct V4_Instruction
+{
+    uint8_t opcode;
+    uint8_t dst_index;
+    uint8_t src_index;
+    uint32_t C;
+};
+
+#ifndef FORCEINLINE
+#ifdef __GNUC__
+#define FORCEINLINE __attribute__((always_inline)) inline
+#elif _MSC_VER
+#define FORCEINLINE __forceinline
+#else
+#define FORCEINLINE inline
+#endif
+#endif
+
+#ifndef UNREACHABLE_CODE
+#ifdef __GNUC__
+#define UNREACHABLE_CODE __builtin_unreachable()
+#elif _MSC_VER
+#define UNREACHABLE_CODE __assume(false)
+#else
+#define UNREACHABLE_CODE
+#endif
+#endif
+
+// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU:
+// every switch-case will point to the same destination on every iteration of Cryptonight main loop
+//
+// This is about as fast as it can get without using low-level machine code generation
+template<typename v4_reg>
+static void v4_random_math(const struct V4_Instruction* code, v4_reg* r)
+{
+    enum
+    {
+        REG_BITS = sizeof(v4_reg) * 8,
+    };
+
+#define V4_EXEC(i) \
+	{ \
+		const struct V4_Instruction* op = code + i; \
+		const v4_reg src = r[op->src_index]; \
+		v4_reg* dst = r + op->dst_index; \
+		switch (op->opcode) \
+		{ \
+		case MUL: \
+			*dst *= src; \
+			break; \
+		case ADD: \
+			*dst += src + op->C; \
+			break; \
+		case SUB: \
+			*dst -= src; \
+			break; \
+		case ROR: \
+			{ \
+				const uint32_t shift = src % REG_BITS; \
+				*dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \
+			} \
+			break; \
+		case ROL: \
+			{ \
+				const uint32_t shift = src % REG_BITS; \
+				*dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \
+			} \
+			break; \
+		case XOR: \
+			*dst ^= src; \
+			break; \
+		case RET: \
+			return; \
+		default: \
+			UNREACHABLE_CODE; \
+			break; \
+		} \
+	}
+
+#define V4_EXEC_10(j) \
+	V4_EXEC(j + 0) \
+	V4_EXEC(j + 1) \
+	V4_EXEC(j + 2) \
+	V4_EXEC(j + 3) \
+	V4_EXEC(j + 4) \
+	V4_EXEC(j + 5) \
+	V4_EXEC(j + 6) \
+	V4_EXEC(j + 7) \
+	V4_EXEC(j + 8) \
+	V4_EXEC(j + 9)
+
+    // Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency
+    // I've checked all block heights < 10,000,000 and here is the distribution of program sizes:
+    //
+    // 60      27960
+    // 61      105054
+    // 62      2452759
+    // 63      5115997
+    // 64      1022269
+    // 65      1109635
+    // 66      153145
+    // 67      8550
+    // 68      4529
+    // 69      102
+
+    // Unroll 70 instructions here
+    V4_EXEC_10(0);		// instructions 0-9
+    V4_EXEC_10(10);		// instructions 10-19
+    V4_EXEC_10(20);		// instructions 20-29
+    V4_EXEC_10(30);		// instructions 30-39
+    V4_EXEC_10(40);		// instructions 40-49
+    V4_EXEC_10(50);		// instructions 50-59
+    V4_EXEC_10(60);		// instructions 60-69
+
+#undef V4_EXEC_10
+#undef V4_EXEC
+}
+
+// If we don't have enough data available, generate more
+static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size)
+{
+    if (*data_index + bytes_needed > data_size)
+    {
+        hash_extra_blake(data, data_size, (char*) data);
+        *data_index = 0;
+    }
+}
+
+// Generates as many random math operations as possible with given latency and ALU restrictions
+// "code" array must have space for NUM_INSTRUCTIONS_MAX+1 instructions
+static int v4_random_math_init(struct V4_Instruction* code, PowVariant variant, const uint64_t height)
+{
+    // MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle
+    // These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake
+    //
+    // AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors
+    // Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors
+    // AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same
+    // Source: https://www.agner.org/optimize/instruction_tables.pdf
+    const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 };
+
+    // Instruction latencies for theoretical ASIC implementation
+    const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 };
+
+    // Available ALUs for each instruction
+    const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
+
+    int8_t data[32];
+    memset(data, 0, sizeof(data));
+    uint64_t tmp = SWAP64LE(height);
+    memcpy(data, &tmp, sizeof(uint64_t));
+    if (variant == POW_V4)
+    {
+        data[20] = -38;
+    }
+
+    // Set data_index past the last byte in data
+    // to trigger full data update with blake hash
+    // before we start using it
+    size_t data_index = sizeof(data);
+
+    int code_size;
+
+    // There is a small chance (1.8%) that register R8 won't be used in the generated program
+    // So we keep track of it and try again if it's not used
+    bool r8_used;
+    do {
+        int latency[9];
+        int asic_latency[9];
+
+        // Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution
+        // byte 0: current value of the destination register
+        // byte 1: instruction opcode
+        // byte 2: current value of the source register
+        //
+        // Registers R4-R8 are constant and are treated as having the same value because when we do
+        // the same operation twice with two constant source registers, it can be optimized into a single operation
+        uint32_t inst_data[9] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF };
+
+        bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT];
+        bool is_rotation[V4_INSTRUCTION_COUNT];
+        bool rotated[4];
+        int rotate_count = 0;
+
+        memset(latency, 0, sizeof(latency));
+        memset(asic_latency, 0, sizeof(asic_latency));
+        memset(alu_busy, 0, sizeof(alu_busy));
+        memset(is_rotation, 0, sizeof(is_rotation));
+        memset(rotated, 0, sizeof(rotated));
+        is_rotation[ROR] = true;
+        is_rotation[ROL] = true;
+
+        int num_retries = 0;
+        code_size = 0;
+
+        int total_iterations = 0;
+        r8_used = (variant == POW_WOW);
+
+        // Generate random code to achieve minimal required latency for our abstract CPU
+        // Try to get this latency for all 4 registers
+        while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64))
+        {
+            // Fail-safe to guarantee loop termination
+            ++total_iterations;
+            if (total_iterations > 256)
+                break;
+
+            check_data(&data_index, 1, data, sizeof(data));
+
+            const uint8_t c = ((uint8_t*)data)[data_index++];
+
+            // MUL = opcodes 0-2
+            // ADD = opcode 3
+            // SUB = opcode 4
+            // ROR/ROL = opcode 5, shift direction is selected randomly
+            // XOR = opcodes 6-7
+            uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1);
+            if (opcode == 5)
+            {
+                check_data(&data_index, 1, data, sizeof(data));
+                opcode = (data[data_index++] >= 0) ? ROR : ROL;
+            }
+            else if (opcode >= 6)
+            {
+                opcode = XOR;
+            }
+            else
+            {
+                opcode = (opcode <= 2) ? MUL : (opcode - 2);
+            }
+
+            uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1);
+            uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1);
+
+            const int a = dst_index;
+            int b = src_index;
+
+            // Don't do ADD/SUB/XOR with the same register
+            if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b))
+            {
+                // a is always < 4, so we don't need to check bounds here
+                b = (variant == POW_WOW) ? (a + 4) : 8;
+                src_index = b;
+            }
+
+            // Don't do rotation with the same destination twice because it's equal to a single rotation
+            if (is_rotation[opcode] && rotated[a])
+            {
+                continue;
+            }
+
+            // Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized:
+            // 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations
+            // 2xXOR(a, b) = NOP
+            if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16)))
+            {
+                continue;
+            }
+
+            // Find which ALU is available (and when) for this instruction
+            int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b];
+            int alu_index = -1;
+            while (next_latency < TOTAL_LATENCY)
+            {
+                for (int i = op_ALUs[opcode] - 1; i >= 0; --i)
+                {
+                    if (!alu_busy[next_latency][i])
+                    {
+                        // ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check
+                        if ((opcode == ADD) && alu_busy[next_latency + 1][i])
+                        {
+                            continue;
+                        }
+
+                        // Rotation can only start when previous rotation is finished, so do an additional availability check
+                        if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode]))
+                        {
+                            continue;
+                        }
+
+                        alu_index = i;
+                        break;
+                    }
+                }
+                if (alu_index >= 0)
+                {
+                    break;
+                }
+                ++next_latency;
+            }
+
+            // Don't generate instructions that leave some register unchanged for more than 7 cycles
+            if (next_latency > latency[a] + 7)
+            {
+                continue;
+            }
+
+            next_latency += op_latency[opcode];
+
+            if (next_latency <= TOTAL_LATENCY)
+            {
+                if (is_rotation[opcode])
+                {
+                    ++rotate_count;
+                }
+
+                // Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined
+                alu_busy[next_latency - op_latency[opcode]][alu_index] = true;
+                latency[a] = next_latency;
+
+                // ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple
+                asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode];
+
+                rotated[a] = is_rotation[opcode];
+
+                inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16);
+
+                code[code_size].opcode = opcode;
+                code[code_size].dst_index = dst_index;
+                code[code_size].src_index = src_index;
+                code[code_size].C = 0;
+
+                if (src_index == 8)
+                {
+                    r8_used = true;
+                }
+
+                if (opcode == ADD)
+                {
+                    // ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too
+                    alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true;
+
+                    // ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
+                    check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
+                    uint32_t t;
+                    memcpy(&t, data + data_index, sizeof(uint32_t));
+                    code[code_size].C = SWAP32LE(t);
+                    data_index += sizeof(uint32_t);
+                }
+
+                ++code_size;
+                if (code_size >= NUM_INSTRUCTIONS_MIN)
+                {
+                    break;
+                }
+            }
+            else
+            {
+                ++num_retries;
+            }
+        }
+
+        // ASIC has more execution resources and can extract as much parallelism from the code as possible
+        // We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC
+        // Get this latency for at least 1 of the 4 registers
+        const int prev_code_size = code_size;
+        while ((code_size < NUM_INSTRUCTIONS_MAX) && (asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY))
+        {
+            int min_idx = 0;
+            int max_idx = 0;
+            for (int i = 1; i < 4; ++i)
+            {
+                if (asic_latency[i] < asic_latency[min_idx]) min_idx = i;
+                if (asic_latency[i] > asic_latency[max_idx]) max_idx = i;
+            }
+
+            const uint8_t pattern[3] = { ROR, MUL, MUL };
+            const uint8_t opcode = pattern[(code_size - prev_code_size) % 3];
+            latency[min_idx] = latency[max_idx] + op_latency[opcode];
+            asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode];
+
+            code[code_size].opcode = opcode;
+            code[code_size].dst_index = min_idx;
+            code[code_size].src_index = max_idx;
+            code[code_size].C = 0;
+            ++code_size;
+        }
+
+        // There is ~98.15% chance that loop condition is false, so this loop will execute only 1 iteration most of the time
+        // It never does more than 4 iterations for all block heights < 10,000,000
+    }  while (!r8_used || (code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX));
+
+    // It's guaranteed that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here
+    // Add final instruction to stop the interpreter
+    code[code_size].opcode = RET;
+    code[code_size].dst_index = 0;
+    code[code_size].src_index = 0;
+    code[code_size].C = 0;
+
+    return code_size;
+}
+
+#endif
\ No newline at end of file
diff --git a/src/net/Client.cpp b/src/net/Client.cpp
index 21e1c076..4a03780b 100644
--- a/src/net/Client.cpp
+++ b/src/net/Client.cpp
@@ -260,6 +260,14 @@ bool Client::parseJob(const rapidjson::Value &params, int *code)
 
     job.setPowVariant(powVariant);
 
+    if (params.HasMember("height")) {
+        const rapidjson::Value &variant = params["height"];
+
+        if (variant.IsUint64()) {
+            job.setHeight(variant.GetUint64());
+        }
+    }
+
     if (m_job != job) {
         m_jobs++;
         m_job = std::move(job);
diff --git a/src/net/Job.cpp b/src/net/Job.cpp
index 9835974c..4f162d00 100644
--- a/src/net/Job.cpp
+++ b/src/net/Job.cpp
@@ -62,6 +62,7 @@ Job::Job(int poolId, bool nicehash) :
     m_size(0),
     m_diff(0),
     m_target(0),
+    m_height(0),
     m_powVariant(PowVariant::POW_AUTODETECT)
 {
 }
diff --git a/src/net/Job.h b/src/net/Job.h
index 74fc5dc8..62ac26d3 100644
--- a/src/net/Job.h
+++ b/src/net/Job.h
@@ -54,9 +54,11 @@ public:
     inline uint32_t *nonce()               { return reinterpret_cast<uint32_t*>(m_blob + 39); }
     inline uint32_t diff() const           { return (uint32_t) m_diff; }
     inline uint64_t target() const         { return m_target; }
+    inline uint64_t height() const         { return m_height; }
     inline void setNicehash(bool nicehash) { m_nicehash = nicehash; }
     inline void setThreadId(int threadId)  { m_threadId = threadId; }
     inline void setPowVariant(PowVariant powVariant) { m_powVariant = powVariant; }
+    inline void setHeight(uint64_t height) { m_height = height; }
 
     static bool fromHex(const char* in, unsigned int len, unsigned char* out);
     static inline uint32_t *nonce(uint8_t *blob)   { return reinterpret_cast<uint32_t*>(blob + 39); }
@@ -76,6 +78,7 @@ private:
     size_t m_size;
     uint64_t m_diff;
     uint64_t m_target;
+    uint64_t m_height;
     PowVariant m_powVariant;
 };
 
diff --git a/src/net/Network.cpp b/src/net/Network.cpp
index f186b61a..78faa7e3 100644
--- a/src/net/Network.cpp
+++ b/src/net/Network.cpp
@@ -170,11 +170,11 @@ void Network::onResultAccepted(Client *client, const SubmitResult &result, const
 void Network::setJob(Client *client, const Job &job)
 {
     if (m_options->colors()) {
-        LOG_INFO("\x1B[01;35mnew job\x1B[0m from \x1B[01;37m%s:%d\x1B[0m with diff \x1B[01;37m%d\x1B[0m and PoW \x1B[01;37m%s",
+        LOG_INFO("\x1B[01;35mnew job\x1B[0m from \x1B[01;37m%s:%d\x1B[0m with diff \x1B[01;37m%d\x1B[0m variant \x1B[01;37m%s",
                  client->host(), client->port(), job.diff(), getPowVariantName(job.powVariant()).c_str());
     }
     else {
-        LOG_INFO("new job from %s:%d with diff %d and PoW %s", client->host(), client->port(), job.diff(), getPowVariantName(job.powVariant()).c_str());
+        LOG_INFO("new job from %s:%d with diff %d variant %s", client->host(), client->port(), job.diff(), getPowVariantName(job.powVariant()).c_str());
     }
 
     m_state.powVariant = job.powVariant();
diff --git a/src/version.h b/src/version.h
index 4be54f8f..60265e7d 100644
--- a/src/version.h
+++ b/src/version.h
@@ -36,14 +36,14 @@
 #define APP_DESC      "XMRigCC CPU miner"
 #define APP_COPYRIGHT "Copyright (C) 2017-  BenDr0id"
 #endif
-#define APP_VERSION   "1.8.13 (based on XMRig)"
+#define APP_VERSION   "1.9.0 (based on XMRig)"
 #define APP_DOMAIN    ""
 #define APP_SITE      "https://github.com/Bendr0id/xmrigCC"
 #define APP_KIND      "cpu"
 
 #define APP_VER_MAJOR  1
-#define APP_VER_MINOR  8
-#define APP_VER_BUILD  13
+#define APP_VER_MINOR  9
+#define APP_VER_BUILD  0
 #define APP_VER_REV    0
 
 #ifndef NDEBUG
diff --git a/src/workers/MultiWorker.cpp b/src/workers/MultiWorker.cpp
index a0dd6ff8..3fccc058 100644
--- a/src/workers/MultiWorker.cpp
+++ b/src/workers/MultiWorker.cpp
@@ -141,7 +141,7 @@ void MultiWorker::start()
                 *Job::nonce(m_state->blob + i * m_state->job.size()) = ++m_state->nonces[i];
             }
 
-            CryptoNight::hash(m_hashFactor, Options::i()->asmOptimization(), m_state->job.powVariant(), m_state->blob, m_state->job.size(), m_hash, scratchPads);
+            CryptoNight::hash(m_hashFactor, Options::i()->asmOptimization(), m_state->job.height(), m_state->job.powVariant(), m_state->blob, m_state->job.size(), m_hash, scratchPads);
 
             for (size_t i=0; i < m_hashFactor; ++i) {
                 if (*reinterpret_cast<uint64_t *>(m_hash + 24 + i * 32) < m_state->job.target()) {