diff --git a/CMakeLists.txt b/CMakeLists.txt
index 14dcc931..1f328ccc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,22 +1,26 @@
 cmake_minimum_required(VERSION 2.8)
-project(xmrig)
+project(ninjarig)
 
 option(WITH_LIBCPUID        "Use Libcpuid" ON)
-option(WITH_AEON            "CryptoNight-Lite support" ON)
-option(WITH_SUMO            "CryptoNight-Heavy support" ON)
-option(WITH_CN_PICO         "CryptoNight-Pico support" ON)
-option(WITH_CN_GPU          "CryptoNight-GPU support" ON)
 option(WITH_HTTPD           "HTTP REST API" ON)
 option(WITH_DEBUG_LOG       "Enable debug log output" OFF)
 option(WITH_TLS             "Enable OpenSSL support" ON)
-option(WITH_ASM             "Enable ASM PoW implementations" ON)
-option(BUILD_STATIC         "Build static binary" OFF)
-option(ARM_TARGET           "Force use specific ARM target 8 or 7" 0)
 option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
+option(WITH_CUDA            "Enable CUDA support" ON)
+option(WITH_OPENCL          "Enable OpenCL support" ON)
 
 include (CheckIncludeFile)
 include (cmake/cpu.cmake)
+include (cmake/TargetArch.cmake)
 
+target_architecture (ARCH)
+MESSAGE( STATUS "Target architecture is: " ${ARCH} )
+
+SET(CMAKE_SKIP_BUILD_RPATH FALSE)
+SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+SET(CMAKE_INSTALL_RPATH "./")
+SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+set (CMAKE_MACOSX_RPATH 0)
 
 set(HEADERS
     src/api/NetworkState.h
@@ -69,43 +73,23 @@ set(HEADERS
     src/core/ConfigLoader_default.h
     src/core/Controller.h
     src/interfaces/IJobResultListener.h
-    src/interfaces/IThread.h
     src/interfaces/IWorker.h
-    src/Mem.h
     src/net/JobResult.h
     src/net/Network.h
     src/net/strategies/DonateStrategy.h
+    src/net/strategies/Http.h
     src/Summary.h
     src/version.h
-    src/workers/CpuThread.h
+    src/core/HasherConfig.h
     src/workers/Handle.h
     src/workers/Hashrate.h
-    src/workers/MultiWorker.h
     src/workers/Worker.h
     src/workers/Workers.h
    )
 
 set(HEADERS_CRYPTO
-    src/crypto/c_blake256.h
-    src/crypto/c_groestl.h
-    src/crypto/c_jh.h
-    src/crypto/c_skein.h
-    src/crypto/CryptoNight.h
-    src/crypto/CryptoNight_constants.h
-    src/crypto/CryptoNight_monero.h
-    src/crypto/CryptoNight_test.h
-    src/crypto/groestl_tables.h
-    src/crypto/hash.h
-    src/crypto/skein_port.h
-    src/crypto/soft_aes.h
-    src/crypto/asm/CryptonightR_template.h
-   )
-
-if (XMRIG_ARM)
-    set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/CryptoNight_arm.h)
-else()
-    set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/CryptoNight_x86.h)
-endif()
+        src/crypto/Argon2_constants.h
+        )
 
 set(SOURCES
     src/api/NetworkState.cpp
@@ -138,25 +122,64 @@ set(SOURCES
     src/common/Platform.cpp
     src/core/Config.cpp
     src/core/Controller.cpp
-    src/Mem.cpp
     src/net/Network.cpp
     src/net/strategies/DonateStrategy.cpp
+    src/net/strategies/Http.cpp
+    src/net/strategies/http_parser/http_parser.c
     src/Summary.cpp
-    src/workers/CpuThread.cpp
     src/workers/Handle.cpp
     src/workers/Hashrate.cpp
-    src/workers/MultiWorker.cpp
     src/workers/Worker.cpp
     src/workers/Workers.cpp
     src/xmrig.cpp
    )
 
-set(SOURCES_CRYPTO
-    src/crypto/c_groestl.c
-    src/crypto/c_blake256.c
-    src/crypto/c_jh.c
-    src/crypto/c_skein.c
-   )
+set(HEADERS_COMMON
+        src/crypto/argon2_hasher/common/common.h
+        src/crypto/argon2_hasher/common/DLLExport.h
+        src/crypto/argon2_hasher/common/DLLImport.h
+        src/crypto/argon2_hasher/crypt/base64.h
+        src/crypto/argon2_hasher/crypt/hex.h
+        src/crypto/argon2_hasher/crypt/random_generator.h
+        src/crypto/argon2_hasher/crypt/sha512.h
+        src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h
+        src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h
+        src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h
+        src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h
+        src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h
+        src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h
+        src/crypto/argon2_hasher/hash/argon2/Argon2.h
+        src/crypto/argon2_hasher/hash/argon2/Defs.h
+        src/crypto/argon2_hasher/hash/Hasher.h
+        )
+
+set(SOURCES_COMMON
+        src/crypto/argon2_hasher/common/common.cpp
+        src/crypto/argon2_hasher/crypt/base64.cpp
+        src/crypto/argon2_hasher/crypt/hex.cpp
+        src/crypto/argon2_hasher/crypt/random_generator.cpp
+        src/crypto/argon2_hasher/crypt/sha512.cpp
+        src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c
+        src/crypto/argon2_hasher/hash/argon2/Argon2.cpp
+        src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c
+        src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c
+        src/crypto/argon2_hasher/hash/Hasher.cpp
+        src/core/HasherConfig.cpp)
+
+set(SOURCE_CPU_HASHER src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp src/crypto/argon2_hasher/hash/cpu/CpuHasher.h)
+
+set(SOURCE_OPENCL_HASHER src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h
+        src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h)
+
+set(SOURCE_CUDA_HASHER src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h
+        src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu)
+
+set(ARGON2_FILL_BLOCKS_SRC
+        src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c
+        src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h
+        src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h
+        src/crypto/argon2_hasher/hash/argon2/Defs.h
+        src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h)
 
 if (WIN32)
     set(SOURCES_OS
@@ -164,8 +187,7 @@ if (WIN32)
         src/App_win.cpp
         src/base/io/Json_win.cpp
         src/common/Platform_win.cpp
-        src/Mem_win.cpp
-        )
+            )
 
     add_definitions(/DWIN32)
     set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv)
@@ -174,15 +196,13 @@ elseif (APPLE)
         src/App_unix.cpp
         src/base/io/Json_unix.cpp
         src/common/Platform_mac.cpp
-        src/Mem_unix.cpp
-        )
+            )
 else()
     set(SOURCES_OS
         src/App_unix.cpp
         src/base/io/Json_unix.cpp
         src/common/Platform_unix.cpp
-        src/Mem_unix.cpp
-        )
+            )
 
     if (CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
         set(EXTRA_LIBS kvm pthread)
@@ -225,8 +245,6 @@ else()
 endif()
 
 include(cmake/OpenSSL.cmake)
-include(cmake/asm.cmake)
-include(cmake/cn-gpu.cmake)
 
 CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
 if (HAVE_SYSLOG_H)
@@ -234,22 +252,6 @@ if (HAVE_SYSLOG_H)
     set(SOURCES_SYSLOG src/common/log/SysLog.h src/common/log/SysLog.cpp)
 endif()
 
-if (NOT WITH_AEON)
-    add_definitions(/DXMRIG_NO_AEON)
-endif()
-
-if (NOT WITH_SUMO)
-    add_definitions(/DXMRIG_NO_SUMO)
-endif()
-
-if (NOT WITH_IPBC)
-    add_definitions(/DXMRIG_NO_IPBC)
-endif()
-
-if (NOT WITH_CN_PICO)
-    add_definitions(/DXMRIG_NO_CN_PICO)
-endif()
-
 if (WITH_EMBEDDED_CONFIG)
     add_definitions(/DXMRIG_FEATURE_EMBEDDED_CONFIG)
 endif()
@@ -284,14 +286,115 @@ endif()
 include_directories(src)
 include_directories(src/3rdparty)
 include_directories(${UV_INCLUDE_DIR})
+include_directories(src/crypto/argon2_hasher/hash/cpu/cpu_features/include)
 
-if (BUILD_STATIC)
-    set(CMAKE_EXE_LINKER_FLAGS " -static")
-endif()
+add_subdirectory(src/crypto/argon2_hasher/hash/cpu/cpu_features)
+set_property(TARGET cpu_features PROPERTY POSITION_INDEPENDENT_CODE ON)
 
 if (WITH_DEBUG_LOG)
     add_definitions(/DAPP_DEBUG)
 endif()
 
-add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES})
-target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
+add_library(argon2_common SHARED ${HEADERS_COMMON} ${SOURCES_COMMON})
+target_link_libraries(argon2_common ${CMAKE_DL_LIBS})
+
+add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES})
+target_link_libraries(${CMAKE_PROJECT_NAME} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB} argon2_common)
+
+add_library(cpu_hasher MODULE ${SOURCE_CPU_HASHER})
+set_target_properties(cpu_hasher
+        PROPERTIES
+        PREFIX ""
+        SUFFIX ".hsh"
+        LIBRARY_OUTPUT_DIRECTORY modules
+        )
+target_link_libraries(cpu_hasher argon2_common cpu_features)
+add_dependencies(${CMAKE_PROJECT_NAME} cpu_hasher)
+
+add_library(argon2_fill_blocks_REF MODULE ${ARGON2_FILL_BLOCKS_SRC})
+set_target_properties(argon2_fill_blocks_REF
+        PROPERTIES
+        PREFIX ""
+        SUFFIX ".opt"
+        LIBRARY_OUTPUT_DIRECTORY modules
+        )
+target_compile_definitions(argon2_fill_blocks_REF PRIVATE BUILD_REF=1)
+add_dependencies(cpu_hasher argon2_fill_blocks_REF)
+
+if(ARCH STREQUAL "x86_64")
+    add_library(argon2_fill_blocks_SSE2 MODULE ${ARGON2_FILL_BLOCKS_SRC})
+    add_library(argon2_fill_blocks_SSSE3 MODULE ${ARGON2_FILL_BLOCKS_SRC})
+    add_library(argon2_fill_blocks_AVX MODULE ${ARGON2_FILL_BLOCKS_SRC})
+    add_library(argon2_fill_blocks_AVX2 MODULE ${ARGON2_FILL_BLOCKS_SRC})
+    add_library(argon2_fill_blocks_AVX512F MODULE ${ARGON2_FILL_BLOCKS_SRC})
+    set_target_properties(argon2_fill_blocks_SSE2 argon2_fill_blocks_SSSE3 argon2_fill_blocks_AVX argon2_fill_blocks_AVX2 argon2_fill_blocks_AVX512F
+            PROPERTIES
+            PREFIX ""
+            SUFFIX ".opt"
+            LIBRARY_OUTPUT_DIRECTORY modules
+            )
+    target_compile_options(argon2_fill_blocks_SSE2 PRIVATE -msse2)
+    target_compile_options(argon2_fill_blocks_SSSE3 PRIVATE -mssse3)
+    target_compile_options(argon2_fill_blocks_AVX PRIVATE -mavx)
+    target_compile_options(argon2_fill_blocks_AVX2 PRIVATE -mavx2)
+    target_compile_options(argon2_fill_blocks_AVX512F PRIVATE -mavx512f)
+    add_dependencies(cpu_hasher argon2_fill_blocks_SSE2 argon2_fill_blocks_SSSE3 argon2_fill_blocks_AVX argon2_fill_blocks_AVX2 argon2_fill_blocks_AVX512F)
+endif()
+
+if(ARCH STREQUAL "arm" OR ARCH STREQUAL "aarch64")
+    add_library(argon2_fill_blocks_NEON MODULE ${ARGON2_FILL_BLOCKS_SRC})
+    set_target_properties(argon2_fill_blocks_NEON
+            PROPERTIES
+            PREFIX ""
+            SUFFIX ".opt"
+            LIBRARY_OUTPUT_DIRECTORY modules
+            )
+    target_compile_options(common PRIVATE -D__NEON__)
+    if(ARCH STREQUAL "arm")
+        target_compile_options(argon2_fill_blocks_NEON PRIVATE -D__NEON__ -mfpu=neon -funsafe-math-optimizations)
+    else()
+        target_compile_options(argon2_fill_blocks_NEON PRIVATE -D__NEON__)
+    endif(ARCH STREQUAL "arm")
+
+    add_dependencies(cpu_hasher argon2_fill_blocks_NEON)
+endif(ARCH STREQUAL "arm" OR ARCH STREQUAL "aarch64")
+
+if(WITH_OPENCL)
+    add_definitions(-DWITH_OPENCL)
+    find_package(OpenCL REQUIRED)
+    include_directories(${OpenCL_INCLUDE_DIR})
+    add_library(opencl_hasher MODULE ${SOURCE_OPENCL_HASHER})
+    set_target_properties(opencl_hasher
+            PROPERTIES
+            PREFIX ""
+            SUFFIX ".hsh"
+            LIBRARY_OUTPUT_DIRECTORY modules
+            )
+    target_link_libraries(opencl_hasher argon2_common ${OpenCL_LIBRARY})
+    add_dependencies(${CMAKE_PROJECT_NAME} opencl_hasher)
+endif()
+
+if(WITH_CUDA)
+    add_definitions(-DWITH_CUDA)
+    find_package(CUDA REQUIRED)
+    if(NOT WIN32)
+        add_definitions(-DPARALLEL_CUDA)
+    endif()
+    set(
+            CUDA_NVCC_FLAGS
+            ${CUDA_NVCC_FLAGS};
+            -O3 -arch=compute_35 -std=c++11
+    )
+    cuda_add_library(cuda_hasher MODULE ${SOURCE_CUDA_HASHER})
+    set_target_properties(cuda_hasher
+            PROPERTIES
+            PREFIX ""
+            SUFFIX ".hsh"
+            LIBRARY_OUTPUT_DIRECTORY modules
+            )
+    target_link_libraries(cuda_hasher argon2_common)
+    add_dependencies(${CMAKE_PROJECT_NAME} cuda_hasher)
+endif()
+
+
+
diff --git a/cmake/TargetArch.cmake b/cmake/TargetArch.cmake
new file mode 100644
index 00000000..be66b82f
--- /dev/null
+++ b/cmake/TargetArch.cmake
@@ -0,0 +1,116 @@
+# Based on the Qt 5 processor detection code, so should be very accurate
+# https://qt.gitorious.org/qt/qtbase/blobs/master/src/corelib/global/qprocessordetection.h
+# Currently handles arm (v5, v6, v7), x86 (32/64), ia64, and ppc (32/64)
+
+# Regarding POWER/PowerPC, just as is noted in the Qt source,
+# "There are many more known variants/revisions that we do not handle/detect."
+
+set(archdetect_c_code "
+#if defined(__arm__) || defined(__TARGET_ARCH_ARM)
+    #error cmake_ARCH arm
+#elif defined(__aarch64__)
+    #error cmake_ARCH aarch64
+#elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
+    #error cmake_ARCH i386
+#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64)
+    #error cmake_ARCH x86_64
+#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
+    #error cmake_ARCH ia64
+#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \\
+      || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC)  \\
+      || defined(_M_MPPC) || defined(_M_PPC)
+    #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
+        #error cmake_ARCH ppc64
+    #else
+        #error cmake_ARCH ppc
+    #endif
+#endif
+
+#error cmake_ARCH unknown
+")
+
+# Set ppc_support to TRUE before including this file or ppc and ppc64
+# will be treated as invalid architectures since they are no longer supported by Apple
+
+function(target_architecture output_var)
+    if(APPLE AND CMAKE_OSX_ARCHITECTURES)
+        # On OS X we use CMAKE_OSX_ARCHITECTURES *if* it was set
+        # First let's normalize the order of the values
+
+        # Note that it's not possible to compile PowerPC applications if you are using
+        # the OS X SDK version 10.6 or later - you'll need 10.4/10.5 for that, so we
+        # disable it by default
+        # See this page for more information:
+        # http://stackoverflow.com/questions/5333490/how-can-we-restore-ppc-ppc64-as-well-as-full-10-4-10-5-sdk-support-to-xcode-4
+
+        # Architecture defaults to i386 or ppc on OS X 10.5 and earlier, depending on the CPU type detected at runtime.
+        # On OS X 10.6+ the default is x86_64 if the CPU supports it, i386 otherwise.
+
+        foreach(osx_arch ${CMAKE_OSX_ARCHITECTURES})
+            if("${osx_arch}" STREQUAL "ppc" AND ppc_support)
+                set(osx_arch_ppc TRUE)
+            elseif("${osx_arch}" STREQUAL "i386")
+                set(osx_arch_i386 TRUE)
+            elseif("${osx_arch}" STREQUAL "x86_64")
+                set(osx_arch_x86_64 TRUE)
+            elseif("${osx_arch}" STREQUAL "ppc64" AND ppc_support)
+                set(osx_arch_ppc64 TRUE)
+            else()
+                message(FATAL_ERROR "Invalid OS X arch name: ${osx_arch}")
+            endif()
+        endforeach()
+
+        # Now add all the architectures in our normalized order
+        if(osx_arch_ppc)
+            list(APPEND ARCH ppc)
+        endif()
+
+        if(osx_arch_i386)
+            list(APPEND ARCH i386)
+        endif()
+
+        if(osx_arch_x86_64)
+            list(APPEND ARCH x86_64)
+        endif()
+
+        if(osx_arch_ppc64)
+            list(APPEND ARCH ppc64)
+        endif()
+    else()
+        file(WRITE "${CMAKE_BINARY_DIR}/arch.c" "${archdetect_c_code}")
+
+        enable_language(C)
+
+        # Detect the architecture in a rather creative way...
+        # This compiles a small C program which is a series of ifdefs that selects a
+        # particular #error preprocessor directive whose message string contains the
+        # target architecture. The program will always fail to compile (both because
+        # file is not a valid C program, and obviously because of the presence of the
+        # #error preprocessor directives... but by exploiting the preprocessor in this
+        # way, we can detect the correct target architecture even when cross-compiling,
+        # since the program itself never needs to be run (only the compiler/preprocessor)
+        try_run(
+                run_result_unused
+                compile_result_unused
+                "${CMAKE_BINARY_DIR}"
+                "${CMAKE_BINARY_DIR}/arch.c"
+                COMPILE_OUTPUT_VARIABLE ARCH
+                CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}
+        )
+
+        # Parse the architecture name from the compiler output
+        string(REGEX MATCH "cmake_ARCH ([a-zA-Z0-9_]+)" ARCH "${ARCH}")
+
+        # Get rid of the value marker leaving just the architecture name
+        string(REPLACE "cmake_ARCH " "" ARCH "${ARCH}")
+
+        # If we are compiling with an unknown architecture this variable should
+        # already be set to "unknown" but in the case that it's empty (i.e. due
+        # to a typo in the code), then set it to unknown
+        if (NOT ARCH)
+            set(ARCH unknown)
+        endif()
+    endif()
+
+    set(${output_var} "${ARCH}" PARENT_SCOPE)
+endfunction()
diff --git a/cmake/asm.cmake b/cmake/asm.cmake
deleted file mode 100644
index 389f6723..00000000
--- a/cmake/asm.cmake
+++ /dev/null
@@ -1,45 +0,0 @@
-if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
-    set(XMRIG_ASM_LIBRARY "xmrig-asm")
-
-    if (CMAKE_C_COMPILER_ID MATCHES MSVC)
-        enable_language(ASM_MASM)
-
-        if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141)
-            set(XMRIG_ASM_FILES
-                "src/crypto/asm/cn_main_loop.asm"
-                "src/crypto/asm/CryptonightR_template.asm"
-            )
-        else()
-            set(XMRIG_ASM_FILES
-                "src/crypto/asm/win64/cn_main_loop.asm"
-                "src/crypto/asm/win64/CryptonightR_template.asm"
-            )
-        endif()
-
-        set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY ASM_MASM)
-    else()
-        enable_language(ASM)
-
-        if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
-            set(XMRIG_ASM_FILES
-                "src/crypto/asm/win64/cn_main_loop.S"
-                "src/crypto/asm/CryptonightR_template.S"
-            )
-        else()
-            set(XMRIG_ASM_FILES
-                "src/crypto/asm/cn_main_loop.S"
-                "src/crypto/asm/CryptonightR_template.S"
-            )
-        endif()
-
-        set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY C)
-    endif()
-
-    add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES})
-    set(XMRIG_ASM_SOURCES src/crypto/Asm.h src/crypto/Asm.cpp src/crypto/CryptonightR_gen.cpp)
-    set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
-else()
-    set(XMRIG_ASM_SOURCES "")
-    set(XMRIG_ASM_LIBRARY "")
-    add_definitions(/DXMRIG_NO_ASM)
-endif()
diff --git a/cmake/cn-gpu.cmake b/cmake/cn-gpu.cmake
deleted file mode 100644
index b529f0b2..00000000
--- a/cmake/cn-gpu.cmake
+++ /dev/null
@@ -1,23 +0,0 @@
-if (WITH_CN_GPU AND CMAKE_SIZEOF_VOID_P EQUAL 8)
-
-    if (XMRIG_ARM)
-        set(CN_GPU_SOURCES src/crypto/cn_gpu_arm.cpp)
-
-        if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang)
-            set_source_files_properties(src/crypto/cn_gpu_arm.cpp PROPERTIES COMPILE_FLAGS "-O3")
-        endif()
-    else()
-        set(CN_GPU_SOURCES src/crypto/cn_gpu_avx.cpp src/crypto/cn_gpu_ssse3.cpp)
-
-        if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang)
-            set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "-O3 -mavx2")
-            set_source_files_properties(src/crypto/cn_gpu_ssse3.cpp PROPERTIES COMPILE_FLAGS "-O3")
-        elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
-            set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX")
-        endif()
-    endif()
-else()
-    set(CN_GPU_SOURCES "")
-
-    add_definitions(/DXMRIG_NO_CN_GPU)
-endif()
diff --git a/doc/ALGORITHMS.md b/doc/ALGORITHMS.md
index 835a1d49..9b42ead1 100644
--- a/doc/ALGORITHMS.md
+++ b/doc/ALGORITHMS.md
@@ -1,17 +1,17 @@
 # Algorithms
 
-XMRig uses a different way to specify algorithms, compared to other miners.
+NinjaRig uses a different way to specify algorithms, compared to other miners.
 
 Algorithm selection splitted to 2 parts:
 
- * Global base algorithm per miner or proxy instance, `algo` option. Possible values: `cryptonight`, `cryptonight-lite`, `cryptonight-heavy`.
+ * Global base algorithm per miner or proxy instance, `algo` option. Possible values: `argon2id`.
  * Algorithm variant specified separately for each pool, `variant` option.
  * [Full table for supported algorithm and variants.](https://github.com/xmrig/xmrig-proxy/blob/master/doc/STRATUM_EXT.md#14-algorithm-names-and-variants)
  
 #### Example
 ```json
 {
-  "algo": "cryptonight",
+  "algo": "argon2id",
   ...
   "pools": [
     {
diff --git a/doc/api/1/config.json b/doc/api/1/config.json
index 2c74cfba..560ff810 100644
--- a/doc/api/1/config.json
+++ b/doc/api/1/config.json
@@ -1,5 +1,5 @@
 {
-    "algo": "cryptonight",
+    "algo": "chukwa",
     "api": {
         "port": 44444,
         "access-token": "TOKEN",
@@ -19,16 +19,16 @@
     "max-cpu-usage": 75,
     "pools": [
         {
-            "url": "pool.monero.hashvault.pro:3333",
-            "user": "48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD",
+            "url": "publicnode.ydns.eu:4666",
+            "user": "WrkzZon3ZArBkZVqAH9n6MM2eq2tV6sN9GwqD73hTKuYAyhMYK48ukQPFQssEMXnFMFs3nwekTLiXa9obkxM6f1KA2i73gEcq8",
             "pass": "x",
             "keepalive": false,
             "nicehash": false,
             "variant": -1
         },
         {
-            "url": "pool.supportxmr.com:3333",
-            "user": "48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD",
+            "url": "testnet.wrkz.work:5555",
+            "user": "WrkzZon3ZArBkZVqAH9n6MM2eq2tV6sN9GwqD73hTKuYAyhMYK48ukQPFQssEMXnFMFs3nwekTLiXa9obkxM6f1KA2i73gEcq8",
             "pass": "x",
             "keepalive": false,
             "nicehash": false,
diff --git a/doc/api/1/summary.json b/doc/api/1/summary.json
index ed3cd128..95519d56 100644
--- a/doc/api/1/summary.json
+++ b/doc/api/1/summary.json
@@ -1,17 +1,16 @@
 {
     "id": "92f3104f9a2ee78c",
     "worker_id": "Ubuntu-1604-xenial-64-minimal",
-    "version": "2.6.0-beta3",
+    "version": "1.0.0-alpha",
     "kind": "cpu",
-    "ua": "XMRig/2.6.0-beta3 (Linux x86_64) libuv/1.8.0 gcc/5.4.0",
+    "ua": "NinjaRig/1.0.0-alpha (Linux x86_64) libuv/1.8.0 gcc/5.4.0",
     "cpu": {
         "brand": "Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz",
         "aes": true,
         "x64": true,
         "sockets": 1
     },
-    "algo": "cryptonight",
-    "hugepages": true,
+    "algo": "chukwa",
     "donate_level": 5,
     "hashrate": {
         "total": [
@@ -64,7 +63,7 @@
         "error_log": []
     },
     "connection": {
-        "pool": "pool.monero.hashvault.pro:3333",
+        "pool": "publicnode.ydns.eu:4666",
         "uptime": 953,
         "ping": 35,
         "failures": 0,
diff --git a/doc/api/1/threads.json b/doc/api/1/threads.json
index e536883d..5b302af6 100644
--- a/doc/api/1/threads.json
+++ b/doc/api/1/threads.json
@@ -1,14 +1,9 @@
 {
-    "hugepages": [
-        4,
-        4
-    ],
     "memory": 8388608,
     "threads": [
         {
             "type": "cpu",
-            "algo": "cryptonight",
-            "av": 1,
+            "algo": "chukwa",
             "low_power_mode": 1,
             "affine_to_cpu": 0,
             "priority": -1,
@@ -21,7 +16,7 @@
         },
         {
             "type": "cpu",
-            "algo": "cryptonight",
+            "algo": "chukwa",
             "av": 1,
             "low_power_mode": 1,
             "affine_to_cpu": 1,
@@ -35,7 +30,7 @@
         },
         {
             "type": "cpu",
-            "algo": "cryptonight",
+            "algo": "chukwa",
             "av": 1,
             "low_power_mode": 1,
             "affine_to_cpu": 2,
@@ -49,7 +44,7 @@
         },
         {
             "type": "cpu",
-            "algo": "cryptonight",
+            "algo": "chukwa",
             "av": 1,
             "low_power_mode": 1,
             "affine_to_cpu": 3,
diff --git a/res/app.rc b/res/app.rc
index 037d842a..84a9e90d 100644
--- a/res/app.rc
+++ b/res/app.rc
@@ -24,7 +24,7 @@ VS_VERSION_INFO VERSIONINFO
         VALUE "FileDescription",  APP_DESC
         VALUE "FileVersion",      APP_VERSION
         VALUE "LegalCopyright",   APP_COPYRIGHT
-        VALUE "OriginalFilename", "xmrig.exe"
+        VALUE "OriginalFilename", "ninjarig.exe"
         VALUE "ProductName",      APP_NAME
         VALUE "ProductVersion",   APP_VERSION
       END
diff --git a/src/App.cpp b/src/App.cpp
index e75766ac..0b69c884 100644
--- a/src/App.cpp
+++ b/src/App.cpp
@@ -27,23 +27,19 @@
 #include <stdlib.h>
 #include <uv.h>
 
-
 #include "api/Api.h"
 #include "App.h"
 #include "base/kernel/Signals.h"
 #include "common/Console.h"
-#include "common/cpu/Cpu.h"
 #include "common/log/Log.h"
 #include "common/Platform.h"
 #include "core/Config.h"
 #include "core/Controller.h"
-#include "crypto/CryptoNight.h"
-#include "Mem.h"
 #include "net/Network.h"
 #include "Summary.h"
-#include "version.h"
 #include "workers/Workers.h"
-
+#include <crypto/argon2_hasher/hash/Hasher.h>
+#include <base/kernel/Process.h>
 
 #ifndef XMRIG_NO_HTTPD
 #   include "common/api/Httpd.h"
@@ -55,6 +51,8 @@ xmrig::App::App(Process *process) :
     m_httpd(nullptr),
     m_signals(nullptr)
 {
+    srand(time(NULL));
+
     m_controller = new Controller(process);
     if (m_controller->init() != 0) {
         return;
@@ -63,6 +61,8 @@ xmrig::App::App(Process *process) :
     if (!m_controller->config()->isBackground()) {
         m_console = new Console(this);
     }
+
+    process->location(Process::ExeLocation, m_appFileName);
 }
 
 
@@ -90,7 +90,8 @@ int xmrig::App::exec()
 
     background();
 
-    Mem::init(m_controller->config()->isHugePages());
+    // load hasher modules
+    Hasher::loadHashers(m_appFileName);
 
     Summary::print(m_controller);
 
@@ -115,7 +116,8 @@ int xmrig::App::exec()
     m_httpd->start();
 #   endif
 
-    Workers::start(m_controller);
+    if(!Workers::start(m_controller))
+        return 0;
 
     m_controller->network()->connect();
 
diff --git a/src/App.h b/src/App.h
index fc944967..b1e9d8a3 100644
--- a/src/App.h
+++ b/src/App.h
@@ -64,6 +64,7 @@ private:
     Controller *m_controller;
     Httpd *m_httpd;
     Signals *m_signals;
+    char m_appFileName[512];
 };
 
 
diff --git a/src/Mem.cpp b/src/Mem.cpp
deleted file mode 100644
index 01a2157b..00000000
--- a/src/Mem.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include "common/utils/mm_malloc.h"
-#include "crypto/CryptoNight.h"
-#include "crypto/CryptoNight_constants.h"
-#include "Mem.h"
-
-
-bool Mem::m_enabled = true;
-int Mem::m_flags    = 0;
-
-
-MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count)
-{
-    using namespace xmrig;
-
-    MemInfo info;
-    info.size = cn_select_memory(algorithm) * count;
-
-    constexpr const size_t align_size = 2 * 1024 * 1024;
-    info.size  = ((info.size + align_size - 1) / align_size) * align_size;
-    info.pages = info.size / align_size;
-
-    allocate(info, m_enabled);
-
-    for (size_t i = 0; i < count; ++i) {
-        cryptonight_ctx *c = static_cast<cryptonight_ctx *>(_mm_malloc(sizeof(cryptonight_ctx), 4096));
-        c->memory          = info.memory + (i * cn_select_memory(algorithm));
-
-        uint8_t* p = reinterpret_cast<uint8_t*>(allocateExecutableMemory(0x4000));
-        c->generated_code  = reinterpret_cast<cn_mainloop_fun_ms_abi>(p);
-        c->generated_code_double = reinterpret_cast<cn_mainloop_fun_ms_abi>(p + 0x2000);
-
-        c->generated_code_data.variant = xmrig::VARIANT_MAX;
-        c->generated_code_data.height = (uint64_t)(-1);
-        c->generated_code_double_data = c->generated_code_data;
-
-        ctx[i] = c;
-    }
-
-    return info;
-}
-
-
-void Mem::release(cryptonight_ctx **ctx, size_t count, MemInfo &info)
-{
-    release(info);
-
-    for (size_t i = 0; i < count; ++i) {
-        _mm_free(ctx[i]);
-    }
-}
-
diff --git a/src/Mem.h b/src/Mem.h
deleted file mode 100644
index 9e39e963..00000000
--- a/src/Mem.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_MEM_H
-#define XMRIG_MEM_H
-
-
-#include <stddef.h>
-#include <stdint.h>
-
-
-#include "common/xmrig.h"
-
-
-struct cryptonight_ctx;
-
-
-struct MemInfo
-{
-    alignas(16) uint8_t *memory;
-
-    size_t hugePages;
-    size_t pages;
-    size_t size;
-};
-
-
-class Mem
-{
-public:
-    enum Flags {
-        HugepagesAvailable = 1,
-        HugepagesEnabled   = 2,
-        Lock               = 4
-    };
-
-    static MemInfo create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count);
-    static void init(bool enabled);
-    static void release(cryptonight_ctx **ctx, size_t count, MemInfo &info);
-
-    static void *allocateExecutableMemory(size_t size);
-    static void protectExecutableMemory(void *p, size_t size);
-    static void flushInstructionCache(void *p, size_t size);
-
-    static inline bool isHugepagesAvailable() { return (m_flags & HugepagesAvailable) != 0; }
-
-private:
-    static void allocate(MemInfo &info, bool enabled);
-    static void release(MemInfo &info);
-
-    static int m_flags;
-    static bool m_enabled;
-};
-
-
-#endif /* XMRIG_MEM_H */
diff --git a/src/Mem_unix.cpp b/src/Mem_unix.cpp
deleted file mode 100644
index 833c200c..00000000
--- a/src/Mem_unix.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include <stdlib.h>
-#include <sys/mman.h>
-
-
-#include "common/log/Log.h"
-#include "common/utils/mm_malloc.h"
-#include "common/xmrig.h"
-#include "crypto/CryptoNight.h"
-#include "Mem.h"
-
-
-void Mem::init(bool enabled)
-{
-    m_enabled = enabled;
-}
-
-
-void Mem::allocate(MemInfo &info, bool enabled)
-{
-    info.hugePages = 0;
-
-    if (!enabled) {
-        info.memory = static_cast<uint8_t*>(_mm_malloc(info.size, 4096));
-
-        return;
-    }
-
-#   if defined(__APPLE__)
-    info.memory = static_cast<uint8_t*>(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0));
-#   elif defined(__FreeBSD__)
-    info.memory = static_cast<uint8_t*>(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0));
-#   else
-    info.memory = static_cast<uint8_t*>(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0));
-#   endif
-
-    if (info.memory == MAP_FAILED) {
-        return allocate(info, false);;
-    }
-
-    info.hugePages = info.pages;
-
-    if (madvise(info.memory, info.size, MADV_RANDOM | MADV_WILLNEED) != 0) {
-        LOG_ERR("madvise failed");
-    }
-
-    if (mlock(info.memory, info.size) == 0) {
-        m_flags |= Lock;
-    }
-}
-
-
-void Mem::release(MemInfo &info)
-{
-    if (info.hugePages) {
-        if (m_flags & Lock) {
-            munlock(info.memory, info.size);
-        }
-
-        munmap(info.memory, info.size);
-    }
-    else {
-        _mm_free(info.memory);
-    }
-}
-
-
-void *Mem::allocateExecutableMemory(size_t size)
-{
-#   if defined(__APPLE__)
-    return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
-#   else
-    return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-#   endif
-}
-
-
-void Mem::protectExecutableMemory(void *p, size_t size)
-{
-    mprotect(p, size, PROT_READ | PROT_EXEC);
-}
-
-
-void Mem::flushInstructionCache(void *p, size_t size)
-{
-#   ifndef __FreeBSD__
-    __builtin___clear_cache(reinterpret_cast<char*>(p), reinterpret_cast<char*>(p) + size);
-#   endif
-}
diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp
deleted file mode 100644
index 27c1348b..00000000
--- a/src/Mem_win.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include <winsock2.h>
-#include <windows.h>
-#include <ntsecapi.h>
-#include <tchar.h>
-
-
-#include "common/log/Log.h"
-#include "common/utils/mm_malloc.h"
-#include "common/xmrig.h"
-#include "crypto/CryptoNight.h"
-#include "crypto/CryptoNight_constants.h"
-#include "Mem.h"
-
-
-/*****************************************************************
-SetLockPagesPrivilege: a function to obtain or
-release the privilege of locking physical pages.
-
-Inputs:
-
-HANDLE hProcess: Handle for the process for which the
-privilege is needed
-
-BOOL bEnable: Enable (TRUE) or disable?
-
-Return value: TRUE indicates success, FALSE failure.
-
-*****************************************************************/
-/**
- * AWE Example: https://msdn.microsoft.com/en-us/library/windows/desktop/aa366531(v=vs.85).aspx
- * Creating a File Mapping Using Large Pages: https://msdn.microsoft.com/en-us/library/aa366543(VS.85).aspx
- */
-static BOOL SetLockPagesPrivilege() {
-    HANDLE token;
-
-    if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token) != TRUE) {
-        return FALSE;
-    }
-
-    TOKEN_PRIVILEGES tp;
-    tp.PrivilegeCount = 1;
-    tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-
-    if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &(tp.Privileges[0].Luid)) != TRUE) {
-        return FALSE;
-    }
-
-    BOOL rc = AdjustTokenPrivileges(token, FALSE, (PTOKEN_PRIVILEGES) &tp, 0, NULL, NULL);
-    if (rc != TRUE || GetLastError() != ERROR_SUCCESS) {
-        return FALSE;
-    }
-
-    CloseHandle(token);
-
-    return TRUE;
-}
-
-
-static LSA_UNICODE_STRING StringToLsaUnicodeString(LPCTSTR string) {
-    LSA_UNICODE_STRING lsaString;
-
-    DWORD dwLen = (DWORD) wcslen(string);
-    lsaString.Buffer = (LPWSTR) string;
-    lsaString.Length = (USHORT)((dwLen) * sizeof(WCHAR));
-    lsaString.MaximumLength = (USHORT)((dwLen + 1) * sizeof(WCHAR));
-    return lsaString;
-}
-
-
-static BOOL ObtainLockPagesPrivilege() {
-    HANDLE token;
-    PTOKEN_USER user = NULL;
-
-    if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token) == TRUE) {
-        DWORD size = 0;
-
-        GetTokenInformation(token, TokenUser, NULL, 0, &size);
-        if (size) {
-            user = (PTOKEN_USER) LocalAlloc(LPTR, size);
-        }
-
-        GetTokenInformation(token, TokenUser, user, size, &size);
-        CloseHandle(token);
-    }
-
-    if (!user) {
-        return FALSE;
-    }
-
-    LSA_HANDLE handle;
-    LSA_OBJECT_ATTRIBUTES attributes;
-    ZeroMemory(&attributes, sizeof(attributes));
-
-    BOOL result = FALSE;
-    if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0) {
-        LSA_UNICODE_STRING str = StringToLsaUnicodeString(_T(SE_LOCK_MEMORY_NAME));
-
-        if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) {
-            LOG_NOTICE("Huge pages support was successfully enabled, but reboot required to use it");
-            result = TRUE;
-        }
-
-        LsaClose(handle);
-    }
-
-    LocalFree(user);
-    return result;
-}
-
-
-static BOOL TrySetLockPagesPrivilege() {
-    if (SetLockPagesPrivilege()) {
-        return TRUE;
-    }
-
-    return ObtainLockPagesPrivilege() && SetLockPagesPrivilege();
-}
-
-
-void Mem::init(bool enabled)
-{
-    m_enabled = enabled;
-
-    if (enabled && TrySetLockPagesPrivilege()) {
-        m_flags |= HugepagesAvailable;
-    }
-}
-
-
-void Mem::allocate(MemInfo &info, bool enabled)
-{
-    info.hugePages = 0;
-
-    if (!enabled) {
-        info.memory = static_cast<uint8_t*>(_mm_malloc(info.size, 4096));
-
-        return;
-    }
-
-    info.memory = static_cast<uint8_t*>(VirtualAlloc(nullptr, info.size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE));
-    if (info.memory) {
-        info.hugePages = info.pages;
-
-        return;
-    }
-
-    allocate(info, false);
-}
-
-
-void Mem::release(MemInfo &info)
-{
-    if (info.hugePages) {
-        VirtualFree(info.memory, 0, MEM_RELEASE);
-    }
-    else {
-        _mm_free(info.memory);
-    }
-}
-
-
-void *Mem::allocateExecutableMemory(size_t size)
-{
-    return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
-}
-
-
-void Mem::protectExecutableMemory(void *p, size_t size)
-{
-    DWORD oldProtect;
-    VirtualProtect(p, size, PAGE_EXECUTE_READ, &oldProtect);
-}
-
-
-void Mem::flushInstructionCache(void *p, size_t size)
-{
-    ::FlushInstructionCache(GetCurrentProcess(), p, size);
-}
diff --git a/src/Summary.cpp b/src/Summary.cpp
index 60a9278f..f9e80d1b 100644
--- a/src/Summary.cpp
+++ b/src/Summary.cpp
@@ -33,115 +33,9 @@
 #include "common/log/Log.h"
 #include "core/Config.h"
 #include "core/Controller.h"
-#include "crypto/Asm.h"
-#include "Mem.h"
 #include "Summary.h"
 #include "version.h"
 
-
-#ifndef XMRIG_NO_ASM
-static const char *coloredAsmNames[] = {
-    "\x1B[1;31mnone\x1B[0m",
-    "auto",
-    "\x1B[1;32mintel\x1B[0m",
-    "\x1B[1;32mryzen\x1B[0m",
-    "\x1B[1;32mbulldozer\x1B[0m"
-};
-
-
-inline static const char *asmName(xmrig::Assembly assembly, bool colors)
-{
-    return colors ? coloredAsmNames[assembly] : xmrig::Asm::toString(assembly);
-}
-#endif
-
-
-static void print_memory(xmrig::Config *config) {
-#   ifdef _WIN32
-    if (config->isColors()) {
-        Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s",
-                       "HUGE PAGES", Mem::isHugepagesAvailable() ? "\x1B[1;32mavailable" : "\x1B[01;31munavailable");
-    }
-    else {
-        Log::i()->text(" * %-13s%s", "HUGE PAGES", Mem::isHugepagesAvailable() ? "available" : "unavailable");
-    }
-#   endif
-}
-
-
-static void print_cpu(xmrig::Config *config)
-{
-    using namespace xmrig;
-
-    if (config->isColors()) {
-        Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%d)") " %sx64 %sAES %sAVX2",
-                       "CPU",
-                       Cpu::info()->brand(),
-                       Cpu::info()->sockets(),
-                       Cpu::info()->isX64()   ? "\x1B[1;32m" : "\x1B[1;31m-",
-                       Cpu::info()->hasAES()  ? "\x1B[1;32m" : "\x1B[1;31m-",
-                       Cpu::info()->hasAVX2() ? "\x1B[1;32m" : "\x1B[1;31m-");
-#       ifndef XMRIG_NO_LIBCPUID
-        Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%.1f MB/%.1f MB"), "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0);
-#       endif
-    }
-    else {
-        Log::i()->text(" * %-13s%s (%d) %sx64 %sAES %sAVX2",
-                       "CPU",
-                       Cpu::info()->brand(),
-                       Cpu::info()->sockets(),
-                       Cpu::info()->isX64()   ? "" : "-",
-                       Cpu::info()->hasAES()  ? "" : "-",
-                       Cpu::info()->hasAVX2() ? "" : "-");
-#       ifndef XMRIG_NO_LIBCPUID
-        Log::i()->text(" * %-13s%.1f MB/%.1f MB", "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0);
-#       endif
-    }
-}
-
-
-static void print_threads(xmrig::Config *config)
-{
-    if (config->threadsMode() != xmrig::Config::Advanced) {
-        char buf[32] = { 0 };
-        if (config->affinity() != -1L) {
-            snprintf(buf, sizeof buf, ", affinity=0x%" PRIX64, config->affinity());
-        }
-
-        Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("%d") WHITE_BOLD(", %s, av=%d, %sdonate=%d%%") WHITE_BOLD("%s")
-                                          : " * %-13s%d, %s, av=%d, %sdonate=%d%%%s",
-                       "THREADS",
-                       config->threadsCount(),
-                       config->algorithm().name(),
-                       config->algoVariant(),
-                       config->isColors() && config->donateLevel() == 0 ? "\x1B[1;31m" : "",
-                       config->donateLevel(),
-                       buf);
-    }
-    else {
-        Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("%d") WHITE_BOLD(", %s, %sdonate=%d%%")
-                                          : " * %-13s%d, %s, %sdonate=%d%%",
-                       "THREADS",
-                       config->threadsCount(),
-                       config->algorithm().name(),
-                       config->isColors() && config->donateLevel() == 0 ? "\x1B[1;31m" : "",
-                       config->donateLevel());
-    }
-
-#   ifndef XMRIG_NO_ASM
-    if (config->assembly() == xmrig::ASM_AUTO) {
-        const xmrig::Assembly assembly = xmrig::Cpu::info()->assembly();
-
-        Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13sauto:%s")
-                                          : " * %-13sauto:%s", "ASSEMBLY", asmName(assembly, config->isColors()));
-    }
-    else {
-        Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s") : " * %-13s%s", "ASSEMBLY", asmName(config->assembly(), config->isColors()));
-    }
-#   endif
-}
-
-
 static void print_commands(xmrig::Config *config)
 {
     if (config->isColors()) {
@@ -154,16 +48,24 @@ static void print_commands(xmrig::Config *config)
     }
 }
 
+static void print_donate(xmrig::Config *config)
+{
+    if (config->isColors()) {
+        Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("DONATE       ") MAGENTA_BOLD("%d%%") WHITE_BOLD(" (change with --donate-level option)"), config->donateLevel());
+    }
+    else {
+        Log::i()->text(" * DONATE       %d%% (change with --donate-level option)", config->donateLevel());
+    }
+}
 
 void Summary::print(xmrig::Controller *controller)
 {
     controller->config()->printVersions();
-    print_memory(controller->config());
-    print_cpu(controller->config());
-    print_threads(controller->config());
     controller->config()->printPools();
     controller->config()->printAPI();
 
+    print_donate(controller->config());
+
     print_commands(controller->config());
 }
 
diff --git a/src/api/ApiRouter.cpp b/src/api/ApiRouter.cpp
index beee8fd3..6cdd5f26 100644
--- a/src/api/ApiRouter.cpp
+++ b/src/api/ApiRouter.cpp
@@ -42,20 +42,20 @@
 #include "common/Platform.h"
 #include "core/Config.h"
 #include "core/Controller.h"
-#include "interfaces/IThread.h"
 #include "rapidjson/document.h"
 #include "rapidjson/prettywriter.h"
 #include "rapidjson/stringbuffer.h"
 #include "version.h"
 #include "workers/Hashrate.h"
 #include "workers/Workers.h"
+#include "workers/Handle.h"
 
 
-static inline rapidjson::Value normalize(double d)
+rapidjson::Value ApiRouter::normalize(double d)
 {
     using namespace rapidjson;
 
-    if (!isnormal(d)) {
+    if (!std::isnormal(d)) {
         return Value(kNullType);
     }
 
@@ -216,13 +216,16 @@ void ApiRouter::getHashrate(rapidjson::Document &doc) const
     total.PushBack(normalize(hr->calc(Hashrate::MediumInterval)), allocator);
     total.PushBack(normalize(hr->calc(Hashrate::LargeInterval)),  allocator);
 
-    for (size_t i = 0; i < Workers::threads(); i++) {
-        rapidjson::Value thread(rapidjson::kArrayType);
-        thread.PushBack(normalize(hr->calc(i, Hashrate::ShortInterval)),  allocator);
-        thread.PushBack(normalize(hr->calc(i, Hashrate::MediumInterval)), allocator);
-        thread.PushBack(normalize(hr->calc(i, Hashrate::LargeInterval)),  allocator);
+    vector<Handle *> workers = Workers::workers();
+    for (size_t i = 0; i < workers.size(); i++) {
+        for(size_t j = 0; j < workers[i]->hasher()->deviceCount(); j++) {
+            rapidjson::Value thread(rapidjson::kArrayType);
+            thread.PushBack(normalize(hr->calc(i, j, Hashrate::ShortInterval)),  allocator);
+            thread.PushBack(normalize(hr->calc(i, j, Hashrate::MediumInterval)), allocator);
+            thread.PushBack(normalize(hr->calc(i, j, Hashrate::LargeInterval)),  allocator);
 
-        threads.PushBack(thread, allocator);
+            threads.PushBack(thread, allocator);
+        }
     }
 
     hashrate.AddMember("total",   total, allocator);
@@ -244,18 +247,10 @@ void ApiRouter::getMiner(rapidjson::Document &doc) const
     using namespace xmrig;
     auto &allocator = doc.GetAllocator();
 
-    rapidjson::Value cpu(rapidjson::kObjectType);
-    cpu.AddMember("brand",   rapidjson::StringRef(Cpu::info()->brand()), allocator);
-    cpu.AddMember("aes",     Cpu::info()->hasAES(), allocator);
-    cpu.AddMember("x64",     Cpu::info()->isX64(), allocator);
-    cpu.AddMember("sockets", Cpu::info()->sockets(), allocator);
-
     doc.AddMember("version",      APP_VERSION, allocator);
     doc.AddMember("kind",         APP_KIND, allocator);
     doc.AddMember("ua",           rapidjson::StringRef(Platform::userAgent()), allocator);
-    doc.AddMember("cpu",          cpu, allocator);
     doc.AddMember("algo",         rapidjson::StringRef(m_controller->config()->algorithm().name()), allocator);
-    doc.AddMember("hugepages",    Workers::hugePages() > 0, allocator);
     doc.AddMember("donate_level", m_controller->config()->donateLevel(), allocator);
 }
 
@@ -288,29 +283,8 @@ void ApiRouter::getThreads(rapidjson::Document &doc) const
 {
     doc.SetObject();
     auto &allocator = doc.GetAllocator();
-    const Hashrate *hr = Workers::hashrate();
 
-    Workers::threadsSummary(doc);
-
-    const std::vector<xmrig::IThread *> &threads = m_controller->config()->threads();
-    rapidjson::Value list(rapidjson::kArrayType);
-
-    size_t i = 0;
-    for (const xmrig::IThread *thread : threads) {
-        rapidjson::Value value = thread->toAPI(doc);
-
-        rapidjson::Value hashrate(rapidjson::kArrayType);
-        hashrate.PushBack(normalize(hr->calc(i, Hashrate::ShortInterval)),  allocator);
-        hashrate.PushBack(normalize(hr->calc(i, Hashrate::MediumInterval)), allocator);
-        hashrate.PushBack(normalize(hr->calc(i, Hashrate::LargeInterval)),  allocator);
-
-        i++;
-
-        value.AddMember("hashrate", hashrate, allocator);
-        list.PushBack(value, allocator);
-    }
-
-    doc.AddMember("threads", list, allocator);
+    Workers::hashersSummary(doc);
 }
 
 
diff --git a/src/api/ApiRouter.h b/src/api/ApiRouter.h
index a92173ce..61b35f7d 100644
--- a/src/api/ApiRouter.h
+++ b/src/api/ApiRouter.h
@@ -52,6 +52,8 @@ public:
 
     void tick(const xmrig::NetworkState &results);
 
+    static rapidjson::Value normalize(double d);
+
 protected:
     void onConfigChanged(xmrig::Config *config, xmrig::Config *previousConfig) override;
 
diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp
index 9d4f2bde..fa442904 100644
--- a/src/base/net/Pool.cpp
+++ b/src/base/net/Pool.cpp
@@ -290,21 +290,7 @@ rapidjson::Value xmrig::Pool::toJSON(rapidjson::Document &doc) const
         obj.AddMember(StringRef(kKeepalive), m_keepAlive, allocator);
     }
 
-    switch (m_algorithm.variant()) {
-    case VARIANT_AUTO:
-    case VARIANT_0:
-    case VARIANT_1:
-        obj.AddMember(StringRef(kVariant), m_algorithm.variant(), allocator);
-        break;
-
-    case VARIANT_2:
-        obj.AddMember(StringRef(kVariant), 2, allocator);
-        break;
-
-    default:
-        obj.AddMember(StringRef(kVariant), StringRef(m_algorithm.variantName()), allocator);
-        break;
-    }
+    obj.AddMember(StringRef(kVariant), StringRef(m_algorithm.variantName()), allocator);
 
     obj.AddMember(StringRef(kEnabled),     m_enabled, allocator);
     obj.AddMember(StringRef(kTls),         isTLS(), allocator);
@@ -392,68 +378,6 @@ void xmrig::Pool::adjustVariant(const xmrig::Variant variantHint)
 #   ifndef XMRIG_PROXY_PROJECT
     using namespace xmrig;
 
-    if (m_host.contains(".nicehash.com")) {
-        m_keepAlive = false;
-        m_nicehash  = true;
-        bool valid  = true;
-
-        switch (m_port) {
-        case 3355:
-        case 33355:
-            valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonight.");
-            m_algorithm.setVariant(VARIANT_0);
-            break;
-
-        case 3363:
-        case 33363:
-            valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonightv7.");
-            m_algorithm.setVariant(VARIANT_1);
-            break;
-
-        case 3364:
-            valid = m_algorithm.algo() == CRYPTONIGHT_HEAVY && m_host.contains("cryptonightheavy.");
-            m_algorithm.setVariant(VARIANT_0);
-            break;
-
-        case 3367:
-        case 33367:
-            valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonightv8.");
-            m_algorithm.setVariant(VARIANT_2);
-            break;
-
-        default:
-            break;
-        }
-
-        if (!valid) {
-            m_algorithm.setAlgo(INVALID_ALGO);
-        }
-
-        m_tls = m_port > 33000;
-        return;
-    }
-
-    if (m_host.contains(".minergate.com")) {
-        m_keepAlive = false;
-        bool valid  = true;
-        m_algorithm.setVariant(VARIANT_1);
-
-        if (m_host.contains("xmr.pool.")) {
-            valid = m_algorithm.algo() == CRYPTONIGHT;
-            m_algorithm.setVariant(m_port == 45700 ? VARIANT_AUTO : VARIANT_0);
-        }
-        else if (m_host.contains("aeon.pool.") && m_port == 45690) {
-            valid = m_algorithm.algo() == CRYPTONIGHT_LITE;
-            m_algorithm.setVariant(VARIANT_1);
-        }
-
-        if (!valid) {
-            m_algorithm.setAlgo(INVALID_ALGO);
-        }
-
-        return;
-    }
-
     if (variantHint != VARIANT_AUTO) {
         m_algorithm.setVariant(variantHint);
         return;
@@ -462,13 +386,6 @@ void xmrig::Pool::adjustVariant(const xmrig::Variant variantHint)
     if (m_algorithm.variant() != VARIANT_AUTO) {
         return;
     }
-
-    if (m_algorithm.algo() == CRYPTONIGHT_HEAVY)  {
-        m_algorithm.setVariant(VARIANT_0);
-    }
-    else if (m_algorithm.algo() == CRYPTONIGHT_LITE) {
-        m_algorithm.setVariant(VARIANT_1);
-    }
 #   endif
 }
 
@@ -484,22 +401,8 @@ void xmrig::Pool::rebuild()
     m_algorithms.push_back(m_algorithm);
 
 #   ifndef XMRIG_PROXY_PROJECT
-    addVariant(VARIANT_4);
-    addVariant(VARIANT_WOW);
-    addVariant(VARIANT_2);
-    addVariant(VARIANT_1);
-    addVariant(VARIANT_0);
-    addVariant(VARIANT_HALF);
-    addVariant(VARIANT_XTL);
-    addVariant(VARIANT_TUBE);
-    addVariant(VARIANT_MSR);
-    addVariant(VARIANT_XHV);
-    addVariant(VARIANT_XAO);
-    addVariant(VARIANT_RTO);
-    addVariant(VARIANT_GPU);
-    addVariant(VARIANT_RWZ);
-    addVariant(VARIANT_ZLS);
-    addVariant(VARIANT_DOUBLE);
     addVariant(VARIANT_AUTO);
+    addVariant(VARIANT_CHUKWA);
+    addVariant(VARIANT_CHUKWA_LITE);
 #   endif
 }
diff --git a/src/base/tools/String.cpp b/src/base/tools/String.cpp
index 7ed61d01..ccffc2a8 100644
--- a/src/base/tools/String.cpp
+++ b/src/base/tools/String.cpp
@@ -68,19 +68,25 @@ xmrig::String::String(const String &other) :
 }
 
 
-bool xmrig::String::isEqual(const char *str) const
+bool xmrig::String::isEqual(const char *str, bool caseInsensitive) const
 {
-    return (m_data != nullptr && str != nullptr && strcmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr);
+    if(caseInsensitive)
+        return (m_data != nullptr && str != nullptr && strcasecmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr);
+    else
+        return (m_data != nullptr && str != nullptr && strcmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr);
 }
 
 
-bool xmrig::String::isEqual(const String &other) const
+bool xmrig::String::isEqual(const String &other, bool caseInsensitive) const
 {
     if (m_size != other.m_size) {
         return false;
     }
 
-    return (m_data != nullptr && other.m_data != nullptr && memcmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr);
+    if(caseInsensitive)
+        return (m_data != nullptr && other.m_data != nullptr && strncasecmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr);
+    else
+        return (m_data != nullptr && other.m_data != nullptr && memcmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr);
 }
 
 
diff --git a/src/base/tools/String.h b/src/base/tools/String.h
index 0c191dfd..b25c0a64 100644
--- a/src/base/tools/String.h
+++ b/src/base/tools/String.h
@@ -56,8 +56,8 @@ public:
     inline ~String() { delete [] m_data; }
 
 
-    bool isEqual(const char *str) const;
-    bool isEqual(const String &other) const;
+    bool isEqual(const char *str, bool caseInsensitive = false) const;
+    bool isEqual(const String &other, bool caseInsensitive = false) const;
 
 
     inline bool contains(const char *str) const { return isNull() ? false : strstr(m_data, str) != nullptr; }
diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp
index 36d156a3..94c68350 100644
--- a/src/common/config/CommonConfig.cpp
+++ b/src/common/config/CommonConfig.cpp
@@ -65,7 +65,7 @@
 
 
 xmrig::CommonConfig::CommonConfig() :
-    m_algorithm(CRYPTONIGHT, VARIANT_AUTO),
+    m_algorithm(ARGON2, VARIANT_AUTO),
     m_adjusted(false),
     m_apiIPv6(false),
     m_apiRestricted(true),
@@ -168,7 +168,7 @@ void xmrig::CommonConfig::printVersions()
 bool xmrig::CommonConfig::save()
 {
     if (m_fileName.isNull()) {
-        return false;
+        m_fileName = "config.json";
     }
 
     rapidjson::Document doc;
diff --git a/src/common/cpu/BasicCpuInfo.cpp b/src/common/cpu/BasicCpuInfo.cpp
index d7778bdd..990b12ff 100644
--- a/src/common/cpu/BasicCpuInfo.cpp
+++ b/src/common/cpu/BasicCpuInfo.cpp
@@ -121,7 +121,6 @@ static inline bool has_ossave()
 
 
 xmrig::BasicCpuInfo::BasicCpuInfo() :
-    m_assembly(ASM_NONE),
     m_aes(has_aes_ni()),
     m_avx2(has_avx2() && has_ossave()),
     m_brand(),
@@ -129,7 +128,6 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
 {
     cpu_brand_string(m_brand);
 
-#   ifndef XMRIG_NO_ASM
     if (hasAES()) {
         char vendor[13] = { 0 };
         int32_t data[4] = { 0 };
@@ -139,19 +137,11 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
         memcpy(vendor + 0, &data[1], 4);
         memcpy(vendor + 4, &data[3], 4);
         memcpy(vendor + 8, &data[2], 4);
-
-        if (memcmp(vendor, "GenuineIntel", 12) == 0) {
-            m_assembly = ASM_INTEL;
-        }
-        else if (memcmp(vendor, "AuthenticAMD", 12) == 0) {
-            m_assembly = ASM_RYZEN;
-        }
     }
-#   endif
 }
 
 
-size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const
+size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize) const
 {
     const size_t count = threads() / 2;
 
diff --git a/src/common/cpu/BasicCpuInfo.h b/src/common/cpu/BasicCpuInfo.h
index 95857ed2..9f34c7b9 100644
--- a/src/common/cpu/BasicCpuInfo.h
+++ b/src/common/cpu/BasicCpuInfo.h
@@ -38,9 +38,8 @@ public:
     BasicCpuInfo();
 
 protected:
-    size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override;
+    size_t optimalThreadsCount(size_t memSize) const override;
 
-    inline Assembly assembly() const override       { return m_assembly; }
     inline bool hasAES() const override             { return m_aes; }
     inline bool hasAVX2() const override            { return m_avx2; }
     inline bool isSupported() const override        { return true; }
@@ -59,7 +58,6 @@ protected:
 #   endif
 
 private:
-    Assembly m_assembly;
     bool m_aes;
     bool m_avx2;
     char m_brand[64];
diff --git a/src/common/cpu/BasicCpuInfo_arm.cpp b/src/common/cpu/BasicCpuInfo_arm.cpp
index 33961346..26979e11 100644
--- a/src/common/cpu/BasicCpuInfo_arm.cpp
+++ b/src/common/cpu/BasicCpuInfo_arm.cpp
@@ -52,7 +52,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
 }
 
 
-size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const
+size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize) const
 {
     return threads();
 }
diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp
index f14d034d..197db5a3 100644
--- a/src/common/crypto/Algorithm.cpp
+++ b/src/common/crypto/Algorithm.cpp
@@ -26,8 +26,6 @@
 
 #include <assert.h>
 #include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
 
 
 #include "common/crypto/Algorithm.h"
@@ -54,47 +52,10 @@ struct AlgoData
 
 
 static AlgoData const algorithms[] = {
-    { "cryptonight",           "cn",           xmrig::CRYPTONIGHT,       xmrig::VARIANT_AUTO   },
-    { "cryptonight/0",         "cn/0",         xmrig::CRYPTONIGHT,       xmrig::VARIANT_0      },
-    { "cryptonight/1",         "cn/1",         xmrig::CRYPTONIGHT,       xmrig::VARIANT_1      },
-    { "cryptonight/xtl",       "cn/xtl",       xmrig::CRYPTONIGHT,       xmrig::VARIANT_XTL    },
-    { "cryptonight/msr",       "cn/msr",       xmrig::CRYPTONIGHT,       xmrig::VARIANT_MSR    },
-    { "cryptonight/xao",       "cn/xao",       xmrig::CRYPTONIGHT,       xmrig::VARIANT_XAO    },
-    { "cryptonight/rto",       "cn/rto",       xmrig::CRYPTONIGHT,       xmrig::VARIANT_RTO    },
-    { "cryptonight/2",         "cn/2",         xmrig::CRYPTONIGHT,       xmrig::VARIANT_2      },
-    { "cryptonight/half",      "cn/half",      xmrig::CRYPTONIGHT,       xmrig::VARIANT_HALF   },
-    { "cryptonight/xtlv9",     "cn/xtlv9",     xmrig::CRYPTONIGHT,       xmrig::VARIANT_HALF   },
-    { "cryptonight/wow",       "cn/wow",       xmrig::CRYPTONIGHT,       xmrig::VARIANT_WOW    },
-    { "cryptonight/r",         "cn/r",         xmrig::CRYPTONIGHT,       xmrig::VARIANT_4      },
-    { "cryptonight/rwz",       "cn/rwz",       xmrig::CRYPTONIGHT,       xmrig::VARIANT_RWZ    },
-    { "cryptonight/zls",       "cn/zls",       xmrig::CRYPTONIGHT,       xmrig::VARIANT_ZLS    },
-    { "cryptonight/double",    "cn/double",    xmrig::CRYPTONIGHT,       xmrig::VARIANT_DOUBLE },
-
-#   ifndef XMRIG_NO_AEON
-    { "cryptonight-lite",      "cn-lite",      xmrig::CRYPTONIGHT_LITE,  xmrig::VARIANT_AUTO },
-    { "cryptonight-light",     "cn-light",     xmrig::CRYPTONIGHT_LITE,  xmrig::VARIANT_AUTO },
-    { "cryptonight-lite/0",    "cn-lite/0",    xmrig::CRYPTONIGHT_LITE,  xmrig::VARIANT_0    },
-    { "cryptonight-lite/1",    "cn-lite/1",    xmrig::CRYPTONIGHT_LITE,  xmrig::VARIANT_1    },
-#   endif
-
-#   ifndef XMRIG_NO_SUMO
-    { "cryptonight-heavy",      "cn-heavy",      xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_AUTO },
-    { "cryptonight-heavy/0",    "cn-heavy/0",    xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_0    },
-    { "cryptonight-heavy/xhv",  "cn-heavy/xhv",  xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_XHV  },
-    { "cryptonight-heavy/tube", "cn-heavy/tube", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_TUBE },
-#   endif
-
-#   ifndef XMRIG_NO_CN_PICO
-    { "cryptonight-pico/trtl",  "cn-pico/trtl",  xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
-    { "cryptonight-pico",       "cn-pico",       xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
-    { "cryptonight-turtle",     "cn-trtl",       xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
-    { "cryptonight-ultralite",  "cn-ultralite",  xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
-    { "cryptonight_turtle",     "cn_turtle",     xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL },
-#   endif
-
-#   ifndef XMRIG_NO_CN_GPU
-    { "cryptonight/gpu",        "cn/gpu",  xmrig::CRYPTONIGHT, xmrig::VARIANT_GPU },
-#   endif
+    { "chukwa",                 "trtl-chukwa",    xmrig::ARGON2,          xmrig::VARIANT_CHUKWA },
+//    { "argon2/trtl",            "trtl-chukwa",    xmrig::ARGON2,          xmrig::VARIANT_CHUKWA },
+    { "chukwa/wrkz",            "wrkz-chukwa",    xmrig::ARGON2,          xmrig::VARIANT_CHUKWA_LITE },
+    { "argon2/wrkz",            "wrkz-chukwa",    xmrig::ARGON2,          xmrig::VARIANT_CHUKWA_LITE },
 };
 
 
@@ -122,23 +83,8 @@ static AlgoData const xmrStakAlgorithms[] = {
 
 
 static const char *variants[] = {
-    "0",
-    "1",
-    "tube",
-    "xtl",
-    "msr",
-    "xhv",
-    "xao",
-    "rto",
-    "2",
-    "half",
-    "trtl",
-    "gpu",
-    "wow",
-    "r",
-    "rwz",
-    "zls",
-    "double"
+    "chukwa",
+    "wrkz",
 };
 
 
@@ -170,7 +116,6 @@ const char *xmrig::Algorithm::variantName() const
     return variants[m_variant];
 }
 
-
 void xmrig::Algorithm::parseAlgorithm(const char *algo)
 {
     m_algo    = INVALID_ALGO;
@@ -221,41 +166,20 @@ void xmrig::Algorithm::parseVariant(const char *variant)
             return;
         }
     }
-
-    if (strcasecmp(variant, "xtlv9") == 0) {
-        m_variant = VARIANT_HALF;
-    }
 }
 
 
 void xmrig::Algorithm::parseVariant(int variant)
 {
-    assert(variant >= -1 && variant <= 2);
+    assert(variant >= VARIANT_AUTO && variant < VARIANT_MAX);
 
-    switch (variant) {
-    case -1:
-    case 0:
-    case 1:
-        m_variant = static_cast<Variant>(variant);
-        break;
-
-    case 2:
-        m_variant = VARIANT_2;
-        break;
-
-    default:
-        break;
-    }
+    m_variant = static_cast<Variant>(variant);
 }
 
 
 void xmrig::Algorithm::setAlgo(Algo algo)
 {
     m_algo = algo;
-
-    if (m_algo == CRYPTONIGHT_PICO && m_variant == VARIANT_AUTO) {
-        m_variant = xmrig::VARIANT_TRTL;
-    }
 }
 
 
diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h
index 7e6931a8..ba2d88ce 100644
--- a/src/common/interfaces/IConfig.h
+++ b/src/common/interfaces/IConfig.h
@@ -71,33 +71,20 @@ public:
         AutoSaveKey       = 1016,
 
         // xmrig common
-        CPUPriorityKey    = 1021,
+        PriorityKey       = 1021,
         NicehashKey       = 1006,
         PrintTimeKey      = 1007,
 
         // xmrig cpu
-        AVKey             = 'v',
+        CPUThreadsKey     = 't',
+        CPUOptimizationKey = 5004,
         CPUAffinityKey    = 1020,
         DryRunKey         = 5000,
-        HugePagesKey      = 1009,
-        MaxCPUUsageKey    = 1004,
-        SafeKey           = 1005,
-        ThreadsKey        = 't',
-        HardwareAESKey    = 1011,
-        AssemblyKey       = 1015,
 
-        // xmrig amd
-        OclPlatformKey    = 1400,
-        OclAffinityKey    = 1401,
-        OclDevicesKey     = 1402,
-        OclLaunchKey      = 1403,
-        OclCacheKey       = 1404,
-        OclPrintKey       = 1405,
-        OclLoaderKey      = 1406,
-        OclSridedIndexKey = 1407,
-        OclMemChunkKey    = 1408,
-        OclUnrollKey      = 1409,
-        OclCompModeKey    = 1410,
+        // ninjarig gpu
+        UseGPUKey         = 5001,
+        GPUIntensityKey   = 5002,
+        GPUFilterKey      = 5003,
 
         // xmrig-proxy
         AccessLogFileKey   = 'A',
@@ -117,15 +104,6 @@ public:
         TlsCiphersKey      = 1112,
         TlsCipherSuitesKey = 1113,
         TlsProtocolsKey    = 1114,
-
-        // xmrig nvidia
-        CudaMaxThreadsKey = 1200,
-        CudaBFactorKey    = 1201,
-        CudaBSleepKey     = 1202,
-        CudaDevicesKey    = 1203,
-        CudaLaunchKey     = 1204,
-        CudaAffinityKey   = 1205,
-        CudaMaxUsageKey   = 1206,
     };
 
     virtual ~IConfig() = default;
diff --git a/src/common/interfaces/ICpuInfo.h b/src/common/interfaces/ICpuInfo.h
index dd4034b3..c25ecc4f 100644
--- a/src/common/interfaces/ICpuInfo.h
+++ b/src/common/interfaces/ICpuInfo.h
@@ -52,8 +52,7 @@ public:
     virtual int32_t nodes() const                                             = 0;
     virtual int32_t sockets() const                                           = 0;
     virtual int32_t threads() const                                           = 0;
-    virtual size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const = 0;
-    virtual xmrig::Assembly assembly() const                                  = 0;
+    virtual size_t optimalThreadsCount(size_t memSize) const                  = 0;
 };
 
 
diff --git a/src/common/net/Job.cpp b/src/common/net/Job.cpp
index cb6be4e6..62aeeeb9 100644
--- a/src/common/net/Job.cpp
+++ b/src/common/net/Job.cpp
@@ -230,19 +230,5 @@ char *xmrig::Job::toHex(const unsigned char* in, unsigned int len)
 
 xmrig::Variant xmrig::Job::variant() const
 {
-    switch (m_algorithm.algo()) {
-    case CRYPTONIGHT:
-        return (m_blob[0] >= 10) ? VARIANT_4 : ((m_blob[0] >= 8) ? VARIANT_2 : VARIANT_1);
-
-    case CRYPTONIGHT_LITE:
-        return VARIANT_1;
-
-    case CRYPTONIGHT_HEAVY:
-        return VARIANT_0;
-
-    default:
-        break;
-    }
-
     return m_algorithm.variant();
 }
diff --git a/src/common/xmrig.h b/src/common/xmrig.h
index e8ca8857..7a639a1e 100644
--- a/src/common/xmrig.h
+++ b/src/common/xmrig.h
@@ -25,97 +25,22 @@
 #ifndef XMRIG_XMRIG_H
 #define XMRIG_XMRIG_H
 
-
 namespace xmrig
 {
 
-
 enum Algo {
     INVALID_ALGO = -1,
-    CRYPTONIGHT,        /* CryptoNight (2 MB) */
-    CRYPTONIGHT_LITE,   /* CryptoNight (1 MB) */
-    CRYPTONIGHT_HEAVY,  /* CryptoNight (4 MB) */
-    CRYPTONIGHT_PICO,   /* CryptoNight (256 KB) */
+    ARGON2,             /* Argon2 */
     ALGO_MAX
 };
 
-
-//--av=1 For CPUs with hardware AES.
-//--av=2 Lower power mode (double hash) of 1.
-//--av=3 Software AES implementation.
-//--av=4 Lower power mode (double hash) of 3.
-enum AlgoVariant {
-    AV_AUTO,        // --av=0 Automatic mode.
-    AV_SINGLE,      // --av=1  Single hash mode
-    AV_DOUBLE,      // --av=2  Double hash mode
-    AV_SINGLE_SOFT, // --av=3  Single hash mode (Software AES)
-    AV_DOUBLE_SOFT, // --av=4  Double hash mode (Software AES)
-    AV_TRIPLE,      // --av=5  Triple hash mode
-    AV_QUAD,        // --av=6  Quard hash mode
-    AV_PENTA,       // --av=7  Penta hash mode
-    AV_TRIPLE_SOFT, // --av=8  Triple hash mode (Software AES)
-    AV_QUAD_SOFT,   // --av=9  Quard hash mode  (Software AES)
-    AV_PENTA_SOFT,  // --av=10 Penta hash mode  (Software AES)
-    AV_MAX
-};
-
-
 enum Variant {
     VARIANT_AUTO   = -1, // Autodetect
-    VARIANT_0      = 0,  // Original CryptoNight or CryptoNight-Heavy
-    VARIANT_1      = 1,  // CryptoNight variant 1 also known as Monero7 and CryptoNightV7
-    VARIANT_TUBE   = 2,  // Modified CryptoNight-Heavy (TUBE only)
-    VARIANT_XTL    = 3,  // Modified CryptoNight variant 1 (Stellite only)
-    VARIANT_MSR    = 4,  // Modified CryptoNight variant 1 (Masari only)
-    VARIANT_XHV    = 5,  // Modified CryptoNight-Heavy (Haven Protocol only)
-    VARIANT_XAO    = 6,  // Modified CryptoNight variant 0 (Alloy only)
-    VARIANT_RTO    = 7,  // Modified CryptoNight variant 1 (Arto only)
-    VARIANT_2      = 8,  // CryptoNight variant 2
-    VARIANT_HALF   = 9,  // CryptoNight variant 2 with half iterations (Masari/Stellite)
-    VARIANT_TRTL   = 10, // CryptoNight Turtle (TRTL)
-    VARIANT_GPU    = 11, // CryptoNight-GPU (Ryo)
-    VARIANT_WOW    = 12, // CryptoNightR (Wownero)
-    VARIANT_4      = 13, // CryptoNightR (Monero's variant 4)
-    VARIANT_RWZ    = 14, // CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft)
-    VARIANT_ZLS    = 15, // CryptoNight variant 2 with 3/4 iterations (Zelerius)
-    VARIANT_DOUBLE = 16, // CryptoNight variant 2 with double iterations (X-CASH)
+    VARIANT_CHUKWA = 0, // Argon2 Chukwa for TurtleCoin
+    VARIANT_CHUKWA_LITE = 1, // Argon2 Chukwa Lite for WrkzCoin
     VARIANT_MAX
 };
 
-
-enum AlgoVerify {
-    VERIFY_HW_AES   = 1,
-    VERIFY_SOFT_AES = 2
-};
-
-
-enum AesMode {
-    AES_AUTO,
-    AES_HW,
-    AES_SOFT
-};
-
-
-enum OclVendor {
-    OCL_VENDOR_UNKNOWN = -2,
-    OCL_VENDOR_MANUAL  = -1,
-    OCL_VENDOR_AMD     = 0,
-    OCL_VENDOR_NVIDIA  = 1,
-    OCL_VENDOR_INTEL   = 2
-};
-
-
-enum Assembly {
-    ASM_NONE,
-    ASM_AUTO,
-    ASM_INTEL,
-    ASM_RYZEN,
-    ASM_BULLDOZER,
-    ASM_MAX
-};
-
-
 } /* namespace xmrig */
 
-
 #endif /* XMRIG_XMRIG_H */
diff --git a/src/config.json b/src/config.json
index 5018db51..f36136ed 100644
--- a/src/config.json
+++ b/src/config.json
@@ -1,5 +1,5 @@
 {
-    "algo": "cryptonight",
+    "algo": "chukwa/wrkz",
     "api": {
         "port": 0,
         "access-token": null,
@@ -8,27 +8,26 @@
         "ipv6": false,
         "restricted": true
     },
-    "asm": true,
     "autosave": true,
-    "av": 0,
     "background": false,
     "colors": true,
+    "threads": "all",
     "cpu-affinity": null,
     "cpu-priority": null,
-    "donate-level": 5,
-    "huge-pages": true,
-    "hw-aes": null,
-    "log-file": null,
-    "max-cpu-usage": 100,
+    "use-gpu": "CUDA",
+    "gpu-intensity": 50,
+    "donate-level": 1,
+    "log-file": "./log.txt",
     "pools": [
         {
-            "url": "donate.v2.xmrig.com:3333",
-            "user": "YOUR_WALLET_ADDRESS",
+            "url": "testnet.wrkz.work:5555",
+            "user": "WrkzRNDQDwFCBynKPc459v3LDa1gEGzG3j962tMUBko1fw9xgdaS9mNiGMgA9s1q7hS1Z8SGRVWzcGc8Sh8xsvfZ6u2wJEtoZB",
             "pass": "x",
             "rig-id": null,
             "nicehash": false,
-            "keepalive": false,
-            "variant": -1,
+            "keepalive": true,
+            "variant": "wrkz",
+            "enabled": true,
             "tls": false,
             "tls-fingerprint": null
         }
@@ -37,7 +36,6 @@
     "retries": 5,
     "retry-pause": 5,
     "safe": false,
-    "threads": null,
     "user-agent": null,
     "watch": true
 }
\ No newline at end of file
diff --git a/src/core/Config.cpp b/src/core/Config.cpp
index 9216027a..7ddb5d70 100644
--- a/src/core/Config.cpp
+++ b/src/core/Config.cpp
@@ -27,31 +27,24 @@
 #include <uv.h>
 #include <inttypes.h>
 
-
 #include "common/config/ConfigLoader.h"
 #include "common/cpu/Cpu.h"
 #include "core/Config.h"
 #include "core/ConfigCreator.h"
-#include "crypto/Asm.h"
-#include "crypto/CryptoNight_constants.h"
+#include "crypto/Argon2_constants.h"
 #include "rapidjson/document.h"
 #include "rapidjson/filewritestream.h"
 #include "rapidjson/prettywriter.h"
-#include "workers/CpuThread.h"
+#include "HasherConfig.h"
 
 
 static char affinity_tmp[20] = { 0 };
 
 
 xmrig::Config::Config() : xmrig::CommonConfig(),
-    m_aesMode(AES_AUTO),
-    m_algoVariant(AV_AUTO),
-    m_assembly(ASM_AUTO),
-    m_hugePages(true),
-    m_safe(false),
     m_shouldSave(false),
-    m_maxCpuUsage(100),
-    m_priority(-1)
+    m_priority(-1),
+    m_mask(-1)
 {
 }
 
@@ -81,47 +74,31 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const
     api.AddMember("restricted",   isApiRestricted(), allocator);
     doc.AddMember("api",          api, allocator);
 
-#   ifndef XMRIG_NO_ASM
-    doc.AddMember("asm",          Asm::toJSON(m_assembly), allocator);
-#   endif
-
     doc.AddMember("autosave",     isAutoSave(), allocator);
-    doc.AddMember("av",           algoVariant(), allocator);
     doc.AddMember("background",   isBackground(), allocator);
     doc.AddMember("colors",       isColors(), allocator);
 
-    if (affinity() != -1L) {
-        snprintf(affinity_tmp, sizeof(affinity_tmp) - 1, "0x%" PRIX64, affinity());
+    doc.AddMember("cpu-threads", cpuThreads(), allocator);
+    if(cpuOptimization().isNull() || cpuOptimization().isEmpty())
+        doc.AddMember("cpu-optimization", kNullType, allocator);
+    else
+        doc.AddMember("cpu-optimization", StringRef(cpuOptimization().data()), allocator);
+
+    if (cpuAffinity() != -1L) {
+        snprintf(affinity_tmp, sizeof(affinity_tmp) - 1, "0x%" PRIX64, cpuAffinity());
         doc.AddMember("cpu-affinity", StringRef(affinity_tmp), allocator);
     }
     else {
         doc.AddMember("cpu-affinity", kNullType, allocator);
     }
 
-    doc.AddMember("cpu-priority",  priority() != -1 ? Value(priority()) : Value(kNullType), allocator);
+    doc.AddMember("priority",  priority() != -1 ? Value(priority()) : Value(kNullType), allocator);
     doc.AddMember("donate-level",  donateLevel(), allocator);
-    doc.AddMember("huge-pages",    isHugePages(), allocator);
-    doc.AddMember("hw-aes",        m_aesMode == AES_AUTO ? Value(kNullType) : Value(m_aesMode == AES_HW), allocator);
     doc.AddMember("log-file",      logFile()             ? Value(StringRef(logFile())).Move() : Value(kNullType).Move(), allocator);
-    doc.AddMember("max-cpu-usage", m_maxCpuUsage, allocator);
     doc.AddMember("pools",         m_pools.toJSON(doc), allocator);
     doc.AddMember("print-time",    printTime(), allocator);
     doc.AddMember("retries",       m_pools.retries(), allocator);
     doc.AddMember("retry-pause",   m_pools.retryPause(), allocator);
-    doc.AddMember("safe",          m_safe, allocator);
-
-    if (threadsMode() != Simple) {
-        Value threads(kArrayType);
-
-        for (const IThread *thread : m_threads.list) {
-            threads.PushBack(thread->toConfig(doc), allocator);
-        }
-
-        doc.AddMember("threads", threads, allocator);
-    }
-    else {
-        doc.AddMember("threads", threadsCount(), allocator);
-    }
 
     doc.AddMember("user-agent", userAgent() ? Value(StringRef(userAgent())).Move() : Value(kNullType).Move(), allocator);
 
@@ -130,6 +107,30 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const
 #   endif
 
     doc.AddMember("watch", m_watch, allocator);
+
+    Value gpuEngines(kArrayType);
+
+    for (const String gpuEngine : m_gpuEngine) {
+        gpuEngines.PushBack(gpuEngine.toJSON(doc), allocator);
+    }
+
+    doc.AddMember("use-gpu", gpuEngines, allocator);
+
+    Value gpuIntensities(kArrayType);
+
+    for (const double gpuIntensity : m_gpuIntensity) {
+        gpuIntensities.PushBack(gpuIntensity, allocator);
+    }
+
+    doc.AddMember("gpu-intensity", gpuIntensities, allocator);
+
+    Value gpuFilters(kArrayType);
+
+    for (const GPUFilter gpuFilter : m_gpuFilter) {
+        gpuFilters.PushBack(toGPUFilterConfig(gpuFilter, doc), allocator);
+    }
+
+    doc.AddMember("gpu-filter", gpuFilters, allocator);
 }
 
 
@@ -149,37 +150,20 @@ bool xmrig::Config::finalize()
         return false;
     }
 
-    if (!m_threads.cpu.empty()) {
-        m_threads.mode     = Advanced;
-        const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT;
+    if(m_gpuIntensity.size() == 0)
+        m_gpuIntensity.push_back(50);
 
-        for (size_t i = 0; i < m_threads.cpu.size(); ++i) {
-            m_threads.list.push_back(CpuThread::createFromData(i, m_algorithm.algo(), m_threads.cpu[i], m_priority, softAES));
-        }
+    HasherConfig hasherConfig(m_algorithm.algo(), m_algorithm.variant(), m_priority, m_cpuThreads, m_mask, m_cpuOptimization.isNull() ? "" : m_cpuOptimization.data(), m_gpuIntensity, m_gpuFilter);
 
-        return true;
-    }
+    if(m_cpuThreads > 0)
+        m_hashers.push_back(hasherConfig.clone(m_hashers.size(), "CPU"));
 
-    const AlgoVariant av = getAlgoVariant();   
-    m_threads.mode = m_threads.count ? Simple : Automatic;
+    if(m_gpuEngine.size() > 0)
+        for(String gpuEngine : m_gpuEngine)
+            m_hashers.push_back(hasherConfig.clone(m_hashers.size(), gpuEngine.data()));
 
-    const size_t size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo()) / 1024;
+    m_shouldSave = true;
 
-    if (!m_threads.count) {
-        m_threads.count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage);
-    }
-    else if (m_safe) {
-        const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage);
-        if (m_threads.count > count) {
-            m_threads.count = count;
-        }
-    }
-
-    for (size_t i = 0; i < m_threads.count; ++i) {
-        m_threads.list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads.mask, m_priority, m_assembly));
-    }
-
-    m_shouldSave = m_threads.mode == Automatic;
     return true;
 }
 
@@ -190,29 +174,6 @@ bool xmrig::Config::parseBoolean(int key, bool enable)
         return false;
     }
 
-    switch (key) {
-    case SafeKey: /* --safe */
-        m_safe = enable;
-        break;
-
-    case HugePagesKey: /* --no-huge-pages */
-        m_hugePages = enable;
-        break;
-
-    case HardwareAESKey: /* hw-aes config only */
-        m_aesMode = enable ? AES_HW : AES_SOFT;
-        break;
-
-#   ifndef XMRIG_NO_ASM
-    case AssemblyKey:
-        m_assembly = Asm::parse(enable);
-        break;
-#   endif
-
-    default:
-        break;
-    }
-
     return true;
 }
 
@@ -224,36 +185,92 @@ bool xmrig::Config::parseString(int key, const char *arg)
     }
 
     switch (key) {
-    case AVKey:          /* --av */
-    case MaxCPUUsageKey: /* --max-cpu-usage */
-    case CPUPriorityKey: /* --cpu-priority */
+    case PriorityKey: /* --cpu-priority */
         return parseUint64(key, strtol(arg, nullptr, 10));
 
-    case SafeKey: /* --safe */
-        return parseBoolean(key, true);
-
-    case HugePagesKey: /* --no-huge-pages */
-        return parseBoolean(key, false);
-
-    case ThreadsKey:  /* --threads */
+    case CPUThreadsKey:  /* --threads */
         if (strncmp(arg, "all", 3) == 0) {
-            m_threads.count = Cpu::info()->threads();
+            m_cpuThreads = Cpu::info()->threads();
             return true;
         }
 
         return parseUint64(key, strtol(arg, nullptr, 10));
 
+    case CPUOptimizationKey:
+        {
+            String value = arg;
+            if(value.isEqual("REF", true))
+                value = "REF";
+            else if(value.isEqual("SSE2", true))
+                value = "SSE2";
+            else if(value.isEqual("SSSE3", true))
+                value = "SSSE3";
+            else if(value.isEqual("AVX", true))
+                value = "AVX";
+            else if(value.isEqual("AVX2", true))
+                value = "AVX2";
+            else if(value.isEqual("AVX512F", true))
+                value = "AVX512F";
+            else if(value.isEqual("NEON", true))
+                value = "NEON";
+            else {
+                printf("Invalid CPU optimization %s.\n", arg);
+                return false;
+            }
+            m_cpuOptimization = value;
+            return true;
+        }
+
     case CPUAffinityKey: /* --cpu-affinity */
         {
             const char *p  = strstr(arg, "0x");
             return parseUint64(key, p ? strtoull(p, nullptr, 16) : strtoull(arg, nullptr, 10));
         }
 
-#   ifndef XMRIG_NO_ASM
-    case AssemblyKey: /* --asm */
-        m_assembly = Asm::parse(arg);
-        break;
-#   endif
+    case UseGPUKey:
+        {
+            String strArg = arg;
+            std::vector<String> gpuEngines = strArg.split(',');
+            m_gpuEngine.clear();
+            for(String engine : gpuEngines) {
+                if(engine.isEqual("OPENCL", true))
+                    m_gpuEngine.push_back("OPENCL");
+                else if(engine.isEqual("CUDA", true))
+                    m_gpuEngine.push_back("CUDA");
+                else {
+                    printf("Invalid GPU hasher %s, ignoring.\n", engine.data());
+                }
+            }
+
+            return m_gpuEngine.size() > 0;
+        }
+
+    case GPUIntensityKey:
+        {
+            String strArg = arg;
+            std::vector<String> gpuIntensities = strArg.split(',');
+            for (const String intensity : gpuIntensities) {
+                double value = strtod(intensity.data(), NULL);
+                if(value > 100) value = 100;
+                if(value < 0) value = 0;
+                m_gpuIntensity.push_back(value);
+            }
+            return true;
+        }
+
+    case GPUFilterKey:
+        {
+            String strArg = arg;
+            std::vector<String> gpuFilters = strArg.split(',');
+            for (const String filter : gpuFilters) {
+                std::vector<String> explodedFilter = filter.split(':');
+                if(explodedFilter.size() == 1)
+                    m_gpuFilter.push_back(GPUFilter("", explodedFilter[0].data()));
+                else if(explodedFilter.size() >= 2)
+                    m_gpuFilter.push_back(GPUFilter(explodedFilter[0].data(), explodedFilter[1].data()));
+            }
+            return true;
+        }
 
     default:
         break;
@@ -272,7 +289,7 @@ bool xmrig::Config::parseUint64(int key, uint64_t arg)
     switch (key) {
     case CPUAffinityKey: /* --cpu-affinity */
         if (arg) {
-            m_threads.mask = arg;
+            m_mask = arg;
         }
         break;
 
@@ -288,20 +305,89 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc)
 {
     CommonConfig::parseJSON(doc);
 
-    const rapidjson::Value &threads = doc["threads"];
+    const rapidjson::Value &threads = doc["cpu-threads"];
 
-    if (threads.IsArray()) {
-        for (const rapidjson::Value &value : threads.GetArray()) {
-            if (!value.IsObject()) {
+    if (threads.IsUint())
+        m_cpuThreads = threads.GetUint();
+    else if(threads.IsString() && strcasecmp(threads.GetString(), "all") == 0)
+        m_cpuThreads = Cpu::info()->threads();
+
+    const rapidjson::Value &cpuOptimization = doc["cpu-optimization"];
+
+    if (cpuOptimization.IsString()) {
+        String value = cpuOptimization.GetString();
+        if(value.isEqual("REF", true))
+            value = "REF";
+        else if(value.isEqual("SSE2", true))
+            value = "SSE2";
+        else if(value.isEqual("SSSE3", true))
+            value = "SSSE3";
+        else if(value.isEqual("AVX", true))
+            value = "AVX";
+        else if(value.isEqual("AVX2", true))
+            value = "AVX2";
+        else if(value.isEqual("AVX512F", true))
+            value = "AVX512F";
+        else if(value.isEqual("NEON", true))
+            value = "NEON";
+        else {
+            printf("Invalid CPU optimization %s, ignoring.\n", value.data());
+            value = "";
+        }
+
+        if(!value.isEqual(""))
+            m_cpuOptimization = value;
+    }
+
+    const rapidjson::Value &gpuEngines = doc["use-gpu"];
+
+    if(gpuEngines.IsArray()) {
+        m_gpuEngine.clear();
+
+        for(const rapidjson::Value &value : gpuEngines.GetArray()) {
+            if(!value.IsString()) {
                 continue;
             }
 
-            if (value.HasMember("low_power_mode")) {
-                auto data = CpuThread::parse(value);
+            String engine = value.GetString();
+            if(engine.isEqual("OPENCL", true))
+                m_gpuEngine.push_back("OPENCL");
+            else if(engine.isEqual("CUDA", true))
+                m_gpuEngine.push_back("CUDA");
+            else {
+                printf("Invalid GPU hasher %s, ignoring.\n", engine.data());
+            }
+        }
+    }
 
-                if (data.valid) {
-                    m_threads.cpu.push_back(std::move(data));
-                }
+    const rapidjson::Value &gpuIntensities = doc["gpu-intensity"];
+
+    if(gpuIntensities.IsArray()) {
+        for(const rapidjson::Value &value : gpuIntensities.GetArray()) {
+            if(!value.IsDouble()) {
+                continue;
+            }
+
+            double intensity = value.GetDouble();
+            if(intensity > 100) intensity = 100;
+            if(intensity < 0) intensity = 0;
+
+            m_gpuIntensity.push_back(intensity);
+        }
+    }
+
+    const rapidjson::Value &gpuFilters = doc["gpu-filter"];
+
+    if(gpuFilters.IsArray()) {
+        for(const rapidjson::Value &value : gpuFilters.GetArray()) {
+            if(!value.IsObject()) {
+                continue;
+            }
+
+            if(value.HasMember("filter")) {
+                auto data = parseGPUFilterConfig(value);
+
+                m_gpuFilter.push_back(data);
             }
         }
     }
@@ -311,25 +397,13 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc)
 bool xmrig::Config::parseInt(int key, int arg)
 {
     switch (key) {
-    case ThreadsKey: /* --threads */
+    case CPUThreadsKey: /* --threads */
         if (arg >= 0 && arg < 1024) {
-            m_threads.count = arg;
+            m_cpuThreads = arg;
         }
         break;
 
-    case AVKey: /* --av */
-        if (arg >= AV_AUTO && arg < AV_MAX) {
-            m_algoVariant = static_cast<AlgoVariant>(arg);
-        }
-        break;
-
-    case MaxCPUUsageKey: /* --max-cpu-usage */
-        if (m_maxCpuUsage > 0 && arg <= 100) {
-            m_maxCpuUsage = arg;
-        }
-        break;
-
-    case CPUPriorityKey: /* --cpu-priority */
+    case PriorityKey: /* --cpu-priority */
         if (arg >= 0 && arg <= 5) {
             m_priority = arg;
         }
@@ -341,39 +415,3 @@ bool xmrig::Config::parseInt(int key, int arg)
 
     return true;
 }
-
-
-xmrig::AlgoVariant xmrig::Config::getAlgoVariant() const
-{
-#   ifndef XMRIG_NO_AEON
-    if (m_algorithm.algo() == xmrig::CRYPTONIGHT_LITE) {
-        return getAlgoVariantLite();
-    }
-#   endif
-
-    if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) {
-        return Cpu::info()->hasAES() ? AV_SINGLE : AV_SINGLE_SOFT;
-    }
-
-    if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) {
-        return static_cast<AlgoVariant>(m_algoVariant + 2);
-    }
-
-    return m_algoVariant;
-}
-
-
-#ifndef XMRIG_NO_AEON
-xmrig::AlgoVariant xmrig::Config::getAlgoVariantLite() const
-{
-    if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) {
-        return Cpu::info()->hasAES() ? AV_DOUBLE : AV_DOUBLE_SOFT;
-    }
-
-    if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) {
-        return static_cast<AlgoVariant>(m_algoVariant + 2);
-    }
-
-    return m_algoVariant;
-}
-#endif
diff --git a/src/core/Config.h b/src/core/Config.h
index d2e8c166..f12db222 100644
--- a/src/core/Config.h
+++ b/src/core/Config.h
@@ -28,18 +28,16 @@
 #include <stdint.h>
 #include <vector>
 
-
 #include "common/config/CommonConfig.h"
 #include "common/xmrig.h"
 #include "rapidjson/fwd.h"
-#include "workers/CpuThread.h"
+#include "rapidjson/schema.h"
+#include "HasherConfig.h"
 
 
 namespace xmrig {
 
 
-class ConfigLoader;
-class IThread;
 class IConfigListener;
 class Process;
 
@@ -58,29 +56,22 @@ class Process;
 class Config : public CommonConfig
 {
 public:
-    enum ThreadsMode {
-        Automatic,
-        Simple,
-        Advanced
-    };
-
-
     Config();
 
     bool reload(const char *json);
 
     void getJSON(rapidjson::Document &doc) const override;
 
-    inline AesMode aesMode() const                       { return m_aesMode; }
-    inline AlgoVariant algoVariant() const               { return m_algoVariant; }
-    inline Assembly assembly() const                     { return m_assembly; }
-    inline bool isHugePages() const                      { return m_hugePages; }
     inline bool isShouldSave() const                     { return m_shouldSave && isAutoSave(); }
-    inline const std::vector<IThread *> &threads() const { return m_threads.list; }
+    inline const std::vector<HasherConfig *> &hasherConfigs() const { return m_hashers; }
     inline int priority() const                          { return m_priority; }
-    inline int threadsCount() const                      { return m_threads.list.size(); }
-    inline int64_t affinity() const                      { return m_threads.mask; }
-    inline ThreadsMode threadsMode() const               { return m_threads.mode; }
+    inline int hashersCount() const                      { return m_hashers.size(); }
+    inline int cpuThreads() const                        { return m_cpuThreads; }
+    inline String cpuOptimization() const                { return m_cpuOptimization; }
+    inline int64_t cpuAffinity() const                   { return m_mask; }
+    inline std::vector<String> gpuEngine() const         { return m_gpuEngine; }
+    inline std::vector<double> gpuIntensity() const      { return m_gpuIntensity; }
+    inline std::vector<GPUFilter> gpuFilter() const      { return m_gpuFilter; }
 
     static Config *load(Process *process, IConfigListener *listener);
 
@@ -94,36 +85,42 @@ protected:
 private:
     bool parseInt(int key, int arg);
 
-    AlgoVariant getAlgoVariant() const;
-#   ifndef XMRIG_NO_AEON
-    AlgoVariant getAlgoVariantLite() const;
-#   endif
+    static rapidjson::Value toGPUFilterConfig(const GPUFilter &filter, rapidjson::Document &doc) {
+        using namespace rapidjson;
+        Value obj(kObjectType);
+        auto &allocator = doc.GetAllocator();
+        if(!filter.engine.empty() && filter.engine != "*")
+            obj.AddMember("engine", Value(filter.engine.data(), doc.GetAllocator()), allocator);
+        obj.AddMember("filter", Value(filter.filter.data(), doc.GetAllocator()), allocator);
 
+        return obj;
+    }
 
-    struct Threads
-    {
-       inline Threads() : mask(-1L), count(0), mode(Automatic) {}
+    static GPUFilter parseGPUFilterConfig(const rapidjson::Value &object) {
+        std::string engineInfo;
+        std::string filterInfo;
+        const auto &filter = object["filter"];
+        if (filter.IsString()) {
+            filterInfo = filter.GetString();
+        }
+        const auto &engine = object["engine"];
+        if (engine.IsString()) {
+            engineInfo = engine.GetString();
+        }
 
-       int64_t mask;
-       size_t count;
-       std::vector<CpuThread::Data> cpu;
-       std::vector<IThread *> list;
-       ThreadsMode mode;
-    };
-
-
-    AesMode m_aesMode;
-    AlgoVariant m_algoVariant;
-    Assembly m_assembly;
-    bool m_hugePages;
-    bool m_safe;
+        return GPUFilter(engineInfo, filterInfo);
+    }
     bool m_shouldSave;
-    int m_maxCpuUsage;
     int m_priority;
-    Threads m_threads;
+    int64_t m_mask;
+    int m_cpuThreads;
+    String m_cpuOptimization;
+    std::vector<String> m_gpuEngine;
+    std::vector<double> m_gpuIntensity;
+    std::vector<GPUFilter> m_gpuFilter;
+    std::vector<HasherConfig *> m_hashers;
 };
 
-
 } /* namespace xmrig */
 
 #endif /* XMRIG_CONFIG_H */
diff --git a/src/core/ConfigLoader_default.h b/src/core/ConfigLoader_default.h
index 8fd0502b..a0f098fc 100644
--- a/src/core/ConfigLoader_default.h
+++ b/src/core/ConfigLoader_default.h
@@ -33,7 +33,7 @@ namespace xmrig {
 const static char *default_config =
 R"===(
 {
-    "algo": "cryptonight",
+    "algo": "argon2",
     "api": {
         "port": 0,
         "access-token": null,
@@ -42,16 +42,13 @@ R"===(
         "ipv6": false,
         "restricted": true
     },
-    "asm": true,
     "autosave": true,
-    "av": 0,
     "background": false,
     "colors": true,
     "cpu-affinity": null,
     "cpu-priority": null,
     "donate-level": 5,
     "huge-pages": true,
-    "hw-aes": null,
     "log-file": null,
     "max-cpu-usage": 100,
     "pools": [
diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h
index 0b71c3fd..ecfd9844 100644
--- a/src/core/ConfigLoader_platform.h
+++ b/src/core/ConfigLoader_platform.h
@@ -40,7 +40,7 @@
 namespace xmrig {
 
 
-static char const short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:S";
+static char const short_options[] = "a:c:Bp:Px:r:R:s:t:T:o:u:O:v:l:S";
 
 
 static struct option const options[] = {
@@ -51,28 +51,28 @@ static struct option const options[] = {
     { "api-id",            1, nullptr, xmrig::IConfig::ApiIdKey          },
     { "api-ipv6",          0, nullptr, xmrig::IConfig::ApiIPv6Key        },
     { "api-no-restricted", 0, nullptr, xmrig::IConfig::ApiRestrictedKey  },
-    { "av",                1, nullptr, xmrig::IConfig::AVKey             },
     { "background",        0, nullptr, xmrig::IConfig::BackgroundKey     },
     { "config",            1, nullptr, xmrig::IConfig::ConfigKey         },
+    { "cpu-threads",       1, nullptr, xmrig::IConfig::CPUThreadsKey     },
+    { "cpu-optimization",  1, nullptr, xmrig::IConfig::CPUOptimizationKey},
     { "cpu-affinity",      1, nullptr, xmrig::IConfig::CPUAffinityKey    },
-    { "cpu-priority",      1, nullptr, xmrig::IConfig::CPUPriorityKey    },
+    { "use-gpu",           1, nullptr, xmrig::IConfig::UseGPUKey         },
+    { "gpu-intensity",     1, nullptr, xmrig::IConfig::GPUIntensityKey   },
+    { "gpu-filter",        1, nullptr, xmrig::IConfig::GPUFilterKey      },
+    { "priority",          1, nullptr, xmrig::IConfig::PriorityKey       },
     { "donate-level",      1, nullptr, xmrig::IConfig::DonateLevelKey    },
     { "dry-run",           0, nullptr, xmrig::IConfig::DryRunKey         },
     { "keepalive",         0, nullptr, xmrig::IConfig::KeepAliveKey      },
     { "log-file",          1, nullptr, xmrig::IConfig::LogFileKey        },
-    { "max-cpu-usage",     1, nullptr, xmrig::IConfig::MaxCPUUsageKey    },
     { "nicehash",          0, nullptr, xmrig::IConfig::NicehashKey       },
     { "no-color",          0, nullptr, xmrig::IConfig::ColorKey          },
     { "no-watch",          0, nullptr, xmrig::IConfig::WatchKey          },
-    { "no-huge-pages",     0, nullptr, xmrig::IConfig::HugePagesKey      },
     { "variant",           1, nullptr, xmrig::IConfig::VariantKey        },
     { "pass",              1, nullptr, xmrig::IConfig::PasswordKey       },
     { "print-time",        1, nullptr, xmrig::IConfig::PrintTimeKey      },
     { "retries",           1, nullptr, xmrig::IConfig::RetriesKey        },
     { "retry-pause",       1, nullptr, xmrig::IConfig::RetryPauseKey     },
-    { "safe",              0, nullptr, xmrig::IConfig::SafeKey           },
     { "syslog",            0, nullptr, xmrig::IConfig::SyslogKey         },
-    { "threads",           1, nullptr, xmrig::IConfig::ThreadsKey        },
     { "url",               1, nullptr, xmrig::IConfig::UrlKey            },
     { "user",              1, nullptr, xmrig::IConfig::UserKey           },
     { "user-agent",        1, nullptr, xmrig::IConfig::UserAgentKey      },
@@ -80,33 +80,30 @@ static struct option const options[] = {
     { "rig-id",            1, nullptr, xmrig::IConfig::RigIdKey          },
     { "tls",               0, nullptr, xmrig::IConfig::TlsKey            },
     { "tls-fingerprint",   1, nullptr, xmrig::IConfig::FingerprintKey    },
-    { "asm",               1, nullptr, xmrig::IConfig::AssemblyKey       },
     { nullptr,             0, nullptr, 0 }
 };
 
 
 static struct option const config_options[] = {
     { "algo",          1, nullptr, xmrig::IConfig::AlgorithmKey   },
-    { "av",            1, nullptr, xmrig::IConfig::AVKey          },
     { "background",    0, nullptr, xmrig::IConfig::BackgroundKey  },
     { "colors",        0, nullptr, xmrig::IConfig::ColorKey       },
+    { "cpu-threads",   1, nullptr, xmrig::IConfig::CPUThreadsKey  },
+    { "cpu-optimization",1, nullptr, xmrig::IConfig::CPUOptimizationKey },
     { "cpu-affinity",  1, nullptr, xmrig::IConfig::CPUAffinityKey },
-    { "cpu-priority",  1, nullptr, xmrig::IConfig::CPUPriorityKey },
+    { "use-gpu",       1, nullptr, xmrig::IConfig::UseGPUKey      },
+    { "gpu-intensity", 1, nullptr, xmrig::IConfig::GPUIntensityKey},
+    { "gpu-filter",    1, nullptr, xmrig::IConfig::GPUFilterKey   },
+    { "priority",      1, nullptr, xmrig::IConfig::PriorityKey    },
     { "donate-level",  1, nullptr, xmrig::IConfig::DonateLevelKey },
     { "dry-run",       0, nullptr, xmrig::IConfig::DryRunKey      },
-    { "huge-pages",    0, nullptr, xmrig::IConfig::HugePagesKey   },
     { "log-file",      1, nullptr, xmrig::IConfig::LogFileKey     },
-    { "max-cpu-usage", 1, nullptr, xmrig::IConfig::MaxCPUUsageKey },
     { "print-time",    1, nullptr, xmrig::IConfig::PrintTimeKey   },
     { "retries",       1, nullptr, xmrig::IConfig::RetriesKey     },
     { "retry-pause",   1, nullptr, xmrig::IConfig::RetryPauseKey  },
-    { "safe",          0, nullptr, xmrig::IConfig::SafeKey        },
     { "syslog",        0, nullptr, xmrig::IConfig::SyslogKey      },
-    { "threads",       1, nullptr, xmrig::IConfig::ThreadsKey     },
     { "user-agent",    1, nullptr, xmrig::IConfig::UserAgentKey   },
     { "watch",         0, nullptr, xmrig::IConfig::WatchKey       },
-    { "hw-aes",        0, nullptr, xmrig::IConfig::HardwareAESKey },
-    { "asm",           1, nullptr, xmrig::IConfig::AssemblyKey    },
     { "autosave",      0, nullptr, xmrig::IConfig::AutoSaveKey    },
     { nullptr,         0, nullptr, 0 }
 };
diff --git a/src/core/HasherConfig.cpp b/src/core/HasherConfig.cpp
new file mode 100644
index 00000000..901fa65e
--- /dev/null
+++ b/src/core/HasherConfig.cpp
@@ -0,0 +1,112 @@
+/* XMRig
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <string>
+#include <vector>
+#include <cstdint>
+
+#include "crypto/argon2_hasher/common/DLLExport.h"
+
+#include "HasherConfig.h"
+
+int xmrig::HasherConfig::m_gpuCardsCount = 0;
+
+xmrig::HasherConfig::HasherConfig(xmrig::Algo algorithm, xmrig::Variant variant, int priority, int cpuThreads,
+                                  int64_t cpuAffinity, std::string cpuOptimization,
+                                  std::vector<double> &gpuIntensity, std::vector<GPUFilter> &gpuFilter) :
+        m_index(-1),
+        m_type(""),
+        m_algorithm(algorithm),
+        m_variant(variant),
+        m_priority(priority),
+        m_cpuThreads(cpuThreads),
+        m_cpuAffinity(cpuAffinity),
+        m_cpuOptimization(cpuOptimization),
+        m_gpuIntensity(gpuIntensity),
+        m_gpuFilter(gpuFilter){
+
+}
+
+xmrig::HasherConfig::HasherConfig(int index, std::string type, xmrig::Algo algorithm, xmrig::Variant variant, int priority, int cpuThreads,
+                                  int64_t cpuAffinity,  std::string cpuOptimization,
+                                  std::vector<double> &gpuIntensity, std::vector<GPUFilter> &gpuFilter) :
+        m_index(index),
+        m_type(type),
+        m_algorithm(algorithm),
+        m_variant(variant),
+        m_priority(priority),
+        m_cpuThreads(cpuThreads),
+        m_cpuAffinity(cpuAffinity),
+        m_cpuOptimization(cpuOptimization),
+        m_gpuIntensity(gpuIntensity) {
+    for(GPUFilter filter : gpuFilter) {
+        if(filter.engine.empty() || filter.engine == "*" || filter.engine == type) {
+            m_gpuFilter.push_back(filter);
+        }
+    }
+}
+
+double xmrig::HasherConfig::getGPUIntensity(int cardIndex) {
+    if(cardIndex < m_gpuIntensity.size())
+        return m_gpuIntensity[cardIndex];
+    else if(m_gpuIntensity.size() > 0)
+        return m_gpuIntensity[0];
+    else
+        return 50;
+}
+
+int64_t xmrig::HasherConfig::getCPUAffinity(int cpuIndex) {
+    int64_t cpuId = -1L;
+
+    if (m_cpuAffinity != -1L) {
+        size_t idx = 0;
+
+        for (size_t i = 0; i < 64; i++) {
+            if (!(m_cpuAffinity & (1ULL << i))) {
+                continue;
+            }
+
+            if (idx == cpuIndex) {
+                cpuId = i;
+                break;
+            }
+
+            idx++;
+        }
+    }
+
+    return cpuId;
+}
+
+xmrig::HasherConfig *xmrig::HasherConfig::clone(int index, std::string hasherType) {
+    return new HasherConfig(index, hasherType, m_algorithm, m_variant, m_priority, m_cpuThreads, m_cpuAffinity, m_cpuOptimization, m_gpuIntensity, m_gpuFilter);
+}
+
+double xmrig::HasherConfig::getAverageGPUIntensity() {
+    double result = 0;
+    for(double intensity : m_gpuIntensity) result += intensity;
+    return result / (m_gpuIntensity.size() > 0 ? m_gpuIntensity.size() : 1);
+}
+
diff --git a/src/core/HasherConfig.h b/src/core/HasherConfig.h
new file mode 100644
index 00000000..03fb4073
--- /dev/null
+++ b/src/core/HasherConfig.h
@@ -0,0 +1,98 @@
+/* XMRig
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XMRIG_HASHERCONFIG_H
+#define XMRIG_HASHERCONFIG_H
+
+
+#include "common/xmrig.h"
+#include "crypto/argon2_hasher/common/common.h"
+
+namespace xmrig {
+
+struct GPUFilter {
+    GPUFilter(std::string engine, std::string filter) : engine(engine), filter(filter) {}
+    std::string engine;
+    std::string filter;
+};
+
+class DLLEXPORT HasherConfig
+{
+public:
+    HasherConfig(Algo algorithm,
+                 Variant variant,
+                 int priority,
+                 int cpuThreads,
+                 int64_t cpuAffinity,
+                 std::string cpuOptimization,
+                 std::vector<double> &gpuIntensity,
+                 std::vector<GPUFilter> &gpuFilter);
+
+    HasherConfig *clone(int index, std::string hasherType);
+
+    inline size_t index() const         { return m_index; }
+    inline std::string type() const     { return m_type; }
+    inline Algo algorithm() const       { return m_algorithm; }
+    inline Variant variant() const      { return m_variant; }
+    inline int priority() const         { return m_priority; }
+    inline int cpuThreads() const    { return m_cpuThreads; }
+    inline std::string cpuOptimization() const { return m_cpuOptimization; }
+    inline std::vector<GPUFilter> &gpuFilter() { return m_gpuFilter; }
+
+    double getAverageGPUIntensity();
+    double getGPUIntensity(int cardIndex);
+    int64_t getCPUAffinity(int cpuIndex);
+
+    inline void addGPUCardsCount(int count) { m_gpuCardsCount += count; }
+    inline int getGPUCardsCount() { return m_gpuCardsCount; }
+
+private:
+    HasherConfig(int index,
+                 std::string type,
+                 Algo algorithm,
+                 Variant variant,
+                 int priority,
+                 int cpuThreads,
+                 int64_t cpuAffinity,
+                 std::string cpuOptimization,
+                 std::vector<double> &gpuIntensity,
+                 std::vector<GPUFilter> &gpuFilter);
+
+    const size_t m_index;
+    const std::string m_type;
+    const Algo m_algorithm;
+    const Variant m_variant;
+    const int m_priority;
+    const int m_cpuThreads;
+    const int64_t m_cpuAffinity;
+    const std::string m_cpuOptimization;
+    std::vector<double> m_gpuIntensity;
+    std::vector<GPUFilter> m_gpuFilter;
+
+    static int m_gpuCardsCount;
+};
+
+} /* namespace xmrig */
+
+#endif /*XMRIG_HASHERCONFIG_H*/
diff --git a/src/core/cpu/AdvancedCpuInfo.cpp b/src/core/cpu/AdvancedCpuInfo.cpp
index df6a385e..d844e798 100644
--- a/src/core/cpu/AdvancedCpuInfo.cpp
+++ b/src/core/cpu/AdvancedCpuInfo.cpp
@@ -31,7 +31,6 @@
 
 
 xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
-    m_assembly(ASM_NONE),
     m_aes(false),
     m_avx2(false),
     m_L2_exclusive(false),
@@ -76,20 +75,13 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
 
     if (data.flags[CPU_FEATURE_AES]) {
         m_aes = true;
-
-        if (data.vendor == VENDOR_AMD) {
-            m_assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER;
-        }
-        else if (data.vendor == VENDOR_INTEL) {
-            m_assembly = ASM_INTEL;
-        }
     }
 
     m_avx2 = data.flags[CPU_FEATURE_AVX2] && data.flags[CPU_FEATURE_OSXSAVE];
 }
 
 
-size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const
+size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize) const
 {
     if (threads() == 1) {
         return 1;
@@ -120,9 +112,5 @@ size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsa
         count = threads();
     }
 
-    if (((float) count / threads() * 100) > maxCpuUsage) {
-        count = (int) ceil((float) threads() * (maxCpuUsage / 100.0));
-    }
-
     return count < 1 ? 1 : count;
 }
diff --git a/src/core/cpu/AdvancedCpuInfo.h b/src/core/cpu/AdvancedCpuInfo.h
index 0765da33..8377189c 100644
--- a/src/core/cpu/AdvancedCpuInfo.h
+++ b/src/core/cpu/AdvancedCpuInfo.h
@@ -38,9 +38,8 @@ public:
     AdvancedCpuInfo();
 
 protected:
-    size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override;
+    size_t optimalThreadsCount(size_t memSize) const override;
 
-    inline Assembly assembly() const override       { return m_assembly; }
     inline bool hasAES() const override             { return m_aes; }
     inline bool hasAVX2() const override            { return m_avx2; }
     inline bool isSupported() const override        { return true; }
@@ -59,7 +58,6 @@ protected:
 #   endif
 
 private:
-    Assembly m_assembly;
     bool m_aes;
     bool m_avx2;
     bool m_L2_exclusive;
diff --git a/src/core/usage.h b/src/core/usage.h
index 0d5c4781..f85a04dd 100644
--- a/src/core/usage.h
+++ b/src/core/usage.h
@@ -36,32 +36,26 @@ static char const usage[] = "\
 Usage: " APP_ID " [OPTIONS]\n\
 Options:\n\
   -a, --algo=ALGO          specify the algorithm to use\n\
-                             cryptonight\n"
-#ifndef XMRIG_NO_AEON
-"\
-                             cryptonight-lite\n"
-#endif
-#ifndef XMRIG_NO_SUMO
-"\
-                             cryptonight-heavy\n"
-#endif
-"\
+                                   chukwa\n\
+                                   chukwa/wrkz\n\
   -o, --url=URL            URL of mining server\n\
   -O, --userpass=U:P       username:password pair for mining server\n\
   -u, --user=USERNAME      username for mining server\n\
   -p, --pass=PASSWORD      password for mining server\n\
       --rig-id=ID          rig identifier for pool-side statistics (needs pool support)\n\
-  -t, --threads=N          number of miner threads\n\
-  -v, --av=N               algorithm variation, 0 auto select\n\
+  -t, --cpu-threads=N      number of cpu miner threads - use 0 to disable\n\
+      --cpu-affinity       set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
+      --cpu-optimization=REF|SSE2|SSSE3|AVX|AVX2|AVX512F|NEON force specific optimization for cpu mining\n\
+      --use-gpu=CUDA,OPENCL       gpu engine to use, ignore this param to disable gpu support\n\
+      --gpu-intensity=v1,v2...    percent of gpu memory to use - you can have different values for each card (default 50)\n\
+      --gpu-filter=<filter1>,CUDA:<filter2>,OPENCL:<filter3>  gpu filters to select cards\n\
   -k, --keepalive          send keepalived packet for prevent timeout (needs pool support)\n\
       --nicehash           enable nicehash.com support\n\
       --tls                enable SSL/TLS support (needs pool support)\n\
       --tls-fingerprint=F  pool TLS certificate fingerprint, if set enable strict certificate pinning\n\
   -r, --retries=N          number of times to retry before switch to backup server (default: 5)\n\
   -R, --retry-pause=N      time to pause between retries (default: 5)\n\
-      --cpu-affinity       set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
-      --cpu-priority       set process priority (0 idle, 2 normal to 5 highest)\n\
-      --no-huge-pages      disable huge pages support\n\
+      --priority           set process priority (0 idle, 2 normal to 5 highest)\n\
       --no-color           disable colored output\n\
       --variant            algorithm PoW variant\n\
       --donate-level=N     donate level, default 5%% (5 minutes in 100 minutes)\n\
@@ -74,9 +68,6 @@ Options:\n\
   -S, --syslog             use system log for output messages\n"
 # endif
 "\
-      --max-cpu-usage=N    maximum CPU usage for automatic threads mode (default 75)\n\
-      --safe               safe adjust threads and av settings for current CPU\n\
-      --asm=ASM            ASM code for cn/2, possible values: auto, none, intel, ryzen, bulldozer.\n\
       --print-time=N       print hashrate report every N seconds\n\
       --api-port=N         port for the miner API\n\
       --api-access-token=T access token for API\n\
diff --git a/src/crypto/Argon2_constants.h b/src/crypto/Argon2_constants.h
new file mode 100644
index 00000000..fc1982f4
--- /dev/null
+++ b/src/crypto/Argon2_constants.h
@@ -0,0 +1,85 @@
+#ifndef XMRIG_ARGON2_CONSTANTS_H
+#define XMRIG_ARGON2_CONSTANTS_H
+
+
+#include <stddef.h>
+#include <stdint.h>
+
+
+#include "common/xmrig.h"
+
+namespace xmrig
+{
+    enum Argon2Algo {
+        I = 0,
+        D = 1,
+        ID = 2
+    };
+
+    constexpr const size_t    ARGON2_SALTLEN                  = 16;
+    constexpr const size_t    ARGON2_HASHLEN                  = 32;
+
+    constexpr const size_t    ARGON2_MEMORY_CHUKWA            = 512;
+    constexpr const size_t    ARGON2_ITERS_CHUKWA             = 3;
+    constexpr const size_t    ARGON2_PARALLELISM_CHUKWA       = 1;
+
+    constexpr const size_t    ARGON2_MEMORY_CHUKWA_LITE       = 256;
+    constexpr const size_t    ARGON2_ITERS_CHUKWA_LITE        = 4;
+    constexpr const size_t    ARGON2_PARALLELISM_CHUKWA_LITE  = 1;
+
+    constexpr const int       ARGON2_ALGO_CHUKWA              = Argon2Algo::ID;
+
+    inline int argon2_select_algo(Variant variant)
+    {
+        switch (variant)
+        {
+            case VARIANT_CHUKWA:
+                return ARGON2_ALGO_CHUKWA;
+            case VARIANT_CHUKWA_LITE:
+                return ARGON2_ALGO_CHUKWA;
+        }
+
+        return 0;
+    }
+
+    inline uint64_t argon2_select_memory(Variant variant)
+    {
+        switch (variant)
+        {
+            case VARIANT_CHUKWA:
+                return ARGON2_MEMORY_CHUKWA;
+            case VARIANT_CHUKWA_LITE:
+                return ARGON2_MEMORY_CHUKWA_LITE;
+        }
+
+        return 0;
+    }
+
+    inline uint32_t argon2_select_iters(Variant variant)
+    {
+        switch (variant)
+        {
+            case VARIANT_CHUKWA:
+                return ARGON2_ITERS_CHUKWA;
+            case VARIANT_CHUKWA_LITE:
+                return ARGON2_ITERS_CHUKWA_LITE;
+        }
+
+        return 0;
+    }
+
+    inline uint32_t argon2_select_parallelism(Variant variant)
+    {
+        switch (variant)
+        {
+            case VARIANT_CHUKWA:
+                return ARGON2_PARALLELISM_CHUKWA;
+            case VARIANT_CHUKWA_LITE:
+                return ARGON2_PARALLELISM_CHUKWA_LITE;
+        }
+
+        return 0;
+    }
+}
+
+#endif
\ No newline at end of file
diff --git a/src/crypto/Argon2_test.h b/src/crypto/Argon2_test.h
new file mode 100644
index 00000000..e69de29b
diff --git a/src/crypto/Asm.cpp b/src/crypto/Asm.cpp
deleted file mode 100644
index 88812c6c..00000000
--- a/src/crypto/Asm.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2018 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include <assert.h>
-#include <string.h>
-
-
-#ifdef _MSC_VER
-#   define strncasecmp _strnicmp
-#   define strcasecmp  _stricmp
-#endif
-
-
-#include "crypto/Asm.h"
-#include "rapidjson/document.h"
-
-
-static const char *asmNames[] = {
-    "none",
-    "auto",
-    "intel",
-    "ryzen",
-    "bulldozer"
-};
-
-
-xmrig::Assembly xmrig::Asm::parse(const char *assembly, Assembly defaultValue)
-{
-    constexpr size_t const size = sizeof(asmNames) / sizeof((asmNames)[0]);
-    assert(assembly != nullptr);
-    assert(ASM_MAX == size);
-
-    if (assembly == nullptr) {
-        return defaultValue;
-    }
-
-    for (size_t i = 0; i < size; i++) {
-        if (strcasecmp(assembly, asmNames[i]) == 0) {
-            return static_cast<Assembly>(i);
-        }
-    }
-
-    return defaultValue;
-}
-
-
-xmrig::Assembly xmrig::Asm::parse(const rapidjson::Value &value, Assembly defaultValue)
-{
-    if (value.IsBool()) {
-        return parse(value.GetBool());
-    }
-
-    if (value.IsString()) {
-        return parse(value.GetString(), defaultValue);
-    }
-
-    return defaultValue;
-}
-
-
-const char *xmrig::Asm::toString(Assembly assembly)
-{
-    return asmNames[assembly];
-}
-
-
-rapidjson::Value xmrig::Asm::toJSON(Assembly assembly)
-{
-    using namespace rapidjson;
-
-    if (assembly == ASM_NONE) {
-        return Value(false);
-    }
-
-    if (assembly == ASM_AUTO) {
-        return Value(true);
-    }
-
-    return Value(StringRef(toString(assembly)));
-}
diff --git a/src/crypto/Asm.h b/src/crypto/Asm.h
deleted file mode 100644
index 3b755fd6..00000000
--- a/src/crypto/Asm.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2016-2018 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_ASM_H
-#define XMRIG_ASM_H
-
-
-#include "common/xmrig.h"
-#include "rapidjson/fwd.h"
-
-
-namespace xmrig {
-
-
-class Asm
-{
-public:
-    static Assembly parse(const char *assembly, Assembly defaultValue = ASM_AUTO);
-    static Assembly parse(const rapidjson::Value &value, Assembly defaultValue = ASM_AUTO);
-    static const char *toString(Assembly assembly);
-    static rapidjson::Value toJSON(Assembly assembly);
-
-    inline static Assembly parse(bool enable) { return enable ? ASM_AUTO : ASM_NONE; }
-};
-
-
-} /* namespace xmrig */
-
-
-#endif /* XMRIG_ASM_H */
diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h
deleted file mode 100644
index b1ec2371..00000000
--- a/src/crypto/CryptoNight.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2016-2018 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_CRYPTONIGHT_H
-#define XMRIG_CRYPTONIGHT_H
-
-
-#include <stddef.h>
-#include <stdint.h>
-
-#if defined _MSC_VER || defined XMRIG_ARM
-#define ABI_ATTRIBUTE
-#else
-#define ABI_ATTRIBUTE __attribute__((ms_abi))
-#endif
-
-struct cryptonight_ctx;
-typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx**) ABI_ATTRIBUTE;
-
-struct cryptonight_r_data {
-    int variant;
-    uint64_t height;
-
-    bool match(const int v, const uint64_t h) const { return (v == variant) && (h == height); }
-};
-
-struct cryptonight_ctx {
-    alignas(16) uint8_t state[224];
-    alignas(16) uint8_t *memory;
-
-    uint8_t unused[40];
-    const uint32_t* saes_table;
-
-    cn_mainloop_fun_ms_abi generated_code;
-    cn_mainloop_fun_ms_abi generated_code_double;
-    cryptonight_r_data generated_code_data;
-    cryptonight_r_data generated_code_double_data;
-};
-
-
-#endif /* XMRIG_CRYPTONIGHT_H */
diff --git a/src/crypto/CryptoNight_arm.h b/src/crypto/CryptoNight_arm.h
deleted file mode 100644
index d762929c..00000000
--- a/src/crypto/CryptoNight_arm.h
+++ /dev/null
@@ -1,844 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik  <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler       <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones  <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466     <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee    <jayddee246@gmail.com>
- * Copyright 2016      Imran Yusuff <https://github.com/imranyusuff>
- * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_CRYPTONIGHT_ARM_H
-#define XMRIG_CRYPTONIGHT_ARM_H
-
-
-#include "common/crypto/keccak.h"
-#include "common/utils/mm_malloc.h"
-#include "crypto/CryptoNight.h"
-#include "crypto/CryptoNight_constants.h"
-#include "crypto/CryptoNight_monero.h"
-#include "crypto/soft_aes.h"
-
-
-extern "C"
-{
-#include "crypto/c_groestl.h"
-#include "crypto/c_blake256.h"
-#include "crypto/c_jh.h"
-#include "crypto/c_skein.h"
-}
-
-
-static inline void do_blake_hash(const uint8_t *input, size_t len, uint8_t *output) {
-    blake256_hash(output, input, len);
-}
-
-
-static inline void do_groestl_hash(const uint8_t *input, size_t len, uint8_t *output) {
-    groestl(input, len * 8, output);
-}
-
-
-static inline void do_jh_hash(const uint8_t *input, size_t len, uint8_t *output) {
-    jh_hash(32 * 8, input, 8 * len, output);
-}
-
-
-static inline void do_skein_hash(const uint8_t *input, size_t len, uint8_t *output) {
-    xmr_skein(input, output);
-}
-
-
-void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
-
-
-static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b)
-{
-    return vcombine_u64(vcreate_u64(b), vcreate_u64(a));
-}
-
-
-#if __ARM_FEATURE_CRYPTO
-static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
-{
-    alignas(16) const __m128i zero = { 0 };
-    return veorq_u8(vaesmcq_u8(vaeseq_u8(v, zero)), rkey );
-}
-#else
-static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey)
-{
-    alignas(16) const __m128i zero = { 0 };
-    return zero;
-}
-#endif
-
-
-/* this one was not implemented yet so here it is */
-static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i a)
-{
-    return vgetq_lane_u64(a, 0);
-}
-
-
-#if defined (__arm64__) || defined (__aarch64__)
-static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
-{
-    unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
-    *hi = r >> 64;
-    return (uint64_t) r;
-}
-#else
-static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
-    // multiplier   = ab = a * 2^32 + b
-    // multiplicand = cd = c * 2^32 + d
-    // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
-    uint64_t a = multiplier >> 32;
-    uint64_t b = multiplier & 0xFFFFFFFF;
-    uint64_t c = multiplicand >> 32;
-    uint64_t d = multiplicand & 0xFFFFFFFF;
-
-    //uint64_t ac = a * c;
-    uint64_t ad = a * d;
-    //uint64_t bc = b * c;
-    uint64_t bd = b * d;
-
-    uint64_t adbc = ad + (b * c);
-    uint64_t adbc_carry = adbc < ad ? 1 : 0;
-
-    // multiplier * multiplicand = product_hi * 2^64 + product_lo
-    uint64_t product_lo = bd + (adbc << 32);
-    uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
-    *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
-
-    return product_lo;
-}
-#endif
-
-
-// This will shift and xor tmp1 into itself as 4 32-bit vals such as
-// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
-static inline __m128i sl_xor(__m128i tmp1)
-{
-    __m128i tmp4;
-    tmp4 = _mm_slli_si128(tmp1, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    tmp4 = _mm_slli_si128(tmp4, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    tmp4 = _mm_slli_si128(tmp4, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    return tmp1;
-}
-
-
-template<uint8_t rcon>
-static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2)
-{
-    __m128i xout1 = soft_aeskeygenassist<rcon>(*xout2);
-    xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
-    *xout0 = sl_xor(*xout0);
-    *xout0 = _mm_xor_si128(*xout0, xout1);
-    xout1  = soft_aeskeygenassist<0x00>(*xout0);
-    xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
-    *xout2 = sl_xor(*xout2);
-    *xout2 = _mm_xor_si128(*xout2, xout1);
-}
-
-
-template<bool SOFT_AES>
-static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
-{
-    __m128i xout0 = _mm_load_si128(memory);
-    __m128i xout2 = _mm_load_si128(memory + 1);
-    *k0 = xout0;
-    *k1 = xout2;
-
-    soft_aes_genkey_sub<0x01>(&xout0, &xout2);
-    *k2 = xout0;
-    *k3 = xout2;
-
-    soft_aes_genkey_sub<0x02>(&xout0, &xout2);
-    *k4 = xout0;
-    *k5 = xout2;
-
-    soft_aes_genkey_sub<0x04>(&xout0, &xout2);
-    *k6 = xout0;
-    *k7 = xout2;
-
-    soft_aes_genkey_sub<0x08>(&xout0, &xout2);
-    *k8 = xout0;
-    *k9 = xout2;
-}
-
-
-template<bool SOFT_AES>
-static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
-{
-    if (SOFT_AES) {
-        *x0 = soft_aesenc((uint32_t*)x0, key);
-        *x1 = soft_aesenc((uint32_t*)x1, key);
-        *x2 = soft_aesenc((uint32_t*)x2, key);
-        *x3 = soft_aesenc((uint32_t*)x3, key);
-        *x4 = soft_aesenc((uint32_t*)x4, key);
-        *x5 = soft_aesenc((uint32_t*)x5, key);
-        *x6 = soft_aesenc((uint32_t*)x6, key);
-        *x7 = soft_aesenc((uint32_t*)x7, key);
-    }
-    else {
-        *x0 = _mm_aesenc_si128(*x0, key);
-        *x1 = _mm_aesenc_si128(*x1, key);
-        *x2 = _mm_aesenc_si128(*x2, key);
-        *x3 = _mm_aesenc_si128(*x3, key);
-        *x4 = _mm_aesenc_si128(*x4, key);
-        *x5 = _mm_aesenc_si128(*x5, key);
-        *x6 = _mm_aesenc_si128(*x6, key);
-        *x7 = _mm_aesenc_si128(*x7, key);
-    }
-}
-
-
-inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7)
-{
-    __m128i tmp0 = x0;
-    x0 = _mm_xor_si128(x0, x1);
-    x1 = _mm_xor_si128(x1, x2);
-    x2 = _mm_xor_si128(x2, x3);
-    x3 = _mm_xor_si128(x3, x4);
-    x4 = _mm_xor_si128(x4, x5);
-    x5 = _mm_xor_si128(x5, x6);
-    x6 = _mm_xor_si128(x6, x7);
-    x7 = _mm_xor_si128(x7, tmp0);
-}
-
-
-template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES>
-static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
-{
-    __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
-    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-
-    aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
-
-    xin0 = _mm_load_si128(input + 4);
-    xin1 = _mm_load_si128(input + 5);
-    xin2 = _mm_load_si128(input + 6);
-    xin3 = _mm_load_si128(input + 7);
-    xin4 = _mm_load_si128(input + 8);
-    xin5 = _mm_load_si128(input + 9);
-    xin6 = _mm_load_si128(input + 10);
-    xin7 = _mm_load_si128(input + 11);
-
-    if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-        for (size_t i = 0; i < 16; i++) {
-            aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-
-            mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
-        }
-    }
-
-    for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
-        aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-
-        _mm_store_si128(output + i + 0, xin0);
-        _mm_store_si128(output + i + 1, xin1);
-        _mm_store_si128(output + i + 2, xin2);
-        _mm_store_si128(output + i + 3, xin3);
-        _mm_store_si128(output + i + 4, xin4);
-        _mm_store_si128(output + i + 5, xin5);
-        _mm_store_si128(output + i + 6, xin6);
-        _mm_store_si128(output + i + 7, xin7);
-    }
-}
-
-
-#ifndef XMRIG_NO_CN_GPU
-template<xmrig::Algo ALGO, size_t MEM>
-void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
-{
-    constexpr size_t hash_size = 200; // 25x8 bytes
-    alignas(16) uint64_t hash[25];
-
-    for (uint64_t i = 0; i < MEM / 512; i++)
-    {
-        memcpy(hash, input, hash_size);
-        hash[0] ^= i;
-
-        xmrig::keccakf(hash, 24);
-        memcpy(output, hash, 160);
-        output += 160;
-
-        xmrig::keccakf(hash, 24);
-        memcpy(output, hash, 176);
-        output += 176;
-
-        xmrig::keccakf(hash, 24);
-        memcpy(output, hash, 176);
-        output += 176;
-    }
-}
-#endif
-
-
-template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES>
-static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
-{
-    __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
-    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-
-    aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
-
-    xout0 = _mm_load_si128(output + 4);
-    xout1 = _mm_load_si128(output + 5);
-    xout2 = _mm_load_si128(output + 6);
-    xout3 = _mm_load_si128(output + 7);
-    xout4 = _mm_load_si128(output + 8);
-    xout5 = _mm_load_si128(output + 9);
-    xout6 = _mm_load_si128(output + 10);
-    xout7 = _mm_load_si128(output + 11);
-
-    for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
-    {
-        xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
-        xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
-        xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
-        xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
-        xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
-        xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
-        xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
-        xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
-
-        aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-
-        if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-            mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
-        }
-    }
-
-    if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-        for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
-            xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
-            xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
-            xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
-            xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
-            xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
-            xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
-            xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
-            xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
-
-            aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-
-            mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
-        }
-
-        for (size_t i = 0; i < 16; i++) {
-            aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-
-            mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
-        }
-    }
-
-    _mm_store_si128(output + 4, xout0);
-    _mm_store_si128(output + 5, xout1);
-    _mm_store_si128(output + 6, xout2);
-    _mm_store_si128(output + 7, xout3);
-    _mm_store_si128(output + 8, xout4);
-    _mm_store_si128(output + 9, xout5);
-    _mm_store_si128(output + 10, xout6);
-    _mm_store_si128(output + 11, xout7);
-}
-
-
-static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
-{
-    alignas(16) uint32_t k[4];
-    alignas(16) uint32_t x[4];
-
-    _mm_store_si128((__m128i*) k, key);
-    _mm_store_si128((__m128i*) x, _mm_xor_si128(in, _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffffffff)));
-
-    #define BYTE(p, i) ((unsigned char*)&x[p])[i]
-    k[0] ^= saes_table[0][BYTE(0, 0)] ^ saes_table[1][BYTE(1, 1)] ^ saes_table[2][BYTE(2, 2)] ^ saes_table[3][BYTE(3, 3)];
-    x[0] ^= k[0];
-    k[1] ^= saes_table[0][BYTE(1, 0)] ^ saes_table[1][BYTE(2, 1)] ^ saes_table[2][BYTE(3, 2)] ^ saes_table[3][BYTE(0, 3)];
-    x[1] ^= k[1];
-    k[2] ^= saes_table[0][BYTE(2, 0)] ^ saes_table[1][BYTE(3, 1)] ^ saes_table[2][BYTE(0, 2)] ^ saes_table[3][BYTE(1, 3)];
-    x[2] ^= k[2];
-    k[3] ^= saes_table[0][BYTE(3, 0)] ^ saes_table[1][BYTE(0, 1)] ^ saes_table[2][BYTE(1, 2)] ^ saes_table[3][BYTE(2, 3)];
-    #undef BYTE
-
-    return _mm_load_si128((__m128i*)k);
-}
-
-
-template<xmrig::Variant VARIANT, xmrig::Variant BASE>
-static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx)
-{
-    uint64_t* mem_out = (uint64_t*)&l[idx];
-
-    if (BASE == xmrig::VARIANT_2) {
-        VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
-        _mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
-    } else {
-        __m128i tmp = _mm_xor_si128(bx0, cx);
-        mem_out[0] = _mm_cvtsi128_si64(tmp);
-
-        uint64_t vh = vgetq_lane_u64(tmp, 1);
-
-        uint8_t x = vh >> 24;
-        static const uint16_t table = 0x7531;
-        const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1;
-        vh ^= ((table >> index) & 0x3) << 28;
-
-        mem_out[1] = vh;
-    }
-}
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MASK         = xmrig::cn_select_mask<ALGO>();
-    constexpr size_t ITERATIONS   = xmrig::cn_select_iter<ALGO, VARIANT>();
-    constexpr size_t MEM          = xmrig::cn_select_memory<ALGO>();
-    constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
-
-    if (BASE == xmrig::VARIANT_1 && size < 43) {
-        memset(output, 0, 32);
-        return;
-    }
-
-    xmrig::keccak(input, size, ctx[0]->state);
-
-    cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
-
-    const uint8_t* l0 = ctx[0]->memory;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
-
-    VARIANT1_INIT(0);
-    VARIANT2_INIT(0);
-    VARIANT4_RANDOM_MATH_INIT(0);
-
-    uint64_t al0 = h0[0] ^ h0[4];
-    uint64_t ah0 = h0[1] ^ h0[5];
-    __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-    __m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
-
-    uint64_t idx0 = al0;
-
-    for (size_t i = 0; i < ITERATIONS; i++) {
-        __m128i cx;
-        if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) {
-            cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
-        }
-
-        const __m128i ax0 = _mm_set_epi64x(ah0, al0);
-        if (VARIANT == xmrig::VARIANT_TUBE) {
-            cx = aes_round_tweak_div(cx, ax0);
-        }
-        else if (SOFT_AES) {
-            cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
-        }
-        else {
-            cx = _mm_aesenc_si128(cx, ax0);
-        }
-
-        if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) {
-            cryptonight_monero_tweak<VARIANT, BASE>(l0, idx0 & MASK, ax0, bx0, bx1, cx);
-        } else {
-            _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
-        }
-
-        idx0 = _mm_cvtsi128_si64(cx);
-
-        uint64_t hi, lo, cl, ch;
-        cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-        ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-
-        if (BASE == xmrig::VARIANT_2) {
-            if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) {
-                VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
-                if (VARIANT == xmrig::VARIANT_4) {
-                    al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
-                    ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
-                }
-            } else {
-                VARIANT2_INTEGER_MATH(0, cl, cx);
-            }
-        }
-
-        lo = __umul128(idx0, cl, &hi);
-
-        if (BASE == xmrig::VARIANT_2) {
-            if (VARIANT == xmrig::VARIANT_4) {
-                VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
-            } else {
-                VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
-            }
-        }
-
-        al0 += hi;
-        ah0 += lo;
-
-        ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
-
-        if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
-            ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
-        } else if (BASE == xmrig::VARIANT_1) {
-            ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
-        } else {
-            ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
-        }
-
-        al0 ^= cl;
-        ah0 ^= ch;
-        idx0 = al0;
-
-        if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-            const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK]));
-            const int64_t n   = vgetq_lane_s64(x, 0);
-            const int32_t d   = vgetq_lane_s32(x, 2);
-            const int64_t q   = n / (d | 0x5);
-
-            ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
-
-            if (VARIANT == xmrig::VARIANT_XHV) {
-                idx0 = (~d) ^ q;
-            }
-            else {
-                idx0 = d ^ q;
-            }
-        }
-
-        if (BASE == xmrig::VARIANT_2) {
-            bx1 = bx0;
-        }
-
-        bx0 = cx;
-    }
-
-    cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
-
-    xmrig::keccakf(h0, 24);
-    extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
-}
-
-
-#ifndef XMRIG_NO_CN_GPU
-template<size_t ITER, uint32_t MASK>
-void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad);
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MASK         = xmrig::CRYPTONIGHT_GPU_MASK;
-    constexpr size_t ITERATIONS   = xmrig::cn_select_iter<ALGO, VARIANT>();
-    constexpr size_t MEM          = xmrig::cn_select_memory<ALGO>();
-
-    static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant");
-
-    xmrig::keccak(input, size, ctx[0]->state);
-    cn_explode_scratchpad_gpu<ALGO, MEM>(ctx[0]->state, ctx[0]->memory);
-
-    fesetround(FE_TONEAREST);
-
-    cn_gpu_inner_arm<ITERATIONS, MASK>(ctx[0]->state, ctx[0]->memory);
-
-    cn_implode_scratchpad<xmrig::CRYPTONIGHT_HEAVY, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
-
-    xmrig::keccakf((uint64_t*) ctx[0]->state, 24);
-    memcpy(output, ctx[0]->state, 32);
-}
-#endif
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MASK         = xmrig::cn_select_mask<ALGO>();
-    constexpr size_t ITERATIONS   = xmrig::cn_select_iter<ALGO, VARIANT>();
-    constexpr size_t MEM          = xmrig::cn_select_memory<ALGO>();
-    constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
-
-    if (BASE == xmrig::VARIANT_1 && size < 43) {
-        memset(output, 0, 64);
-        return;
-    }
-
-    xmrig::keccak(input,        size, ctx[0]->state);
-    xmrig::keccak(input + size, size, ctx[1]->state);
-
-    const uint8_t* l0 = ctx[0]->memory;
-    const uint8_t* l1 = ctx[1]->memory;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
-    uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
-
-    VARIANT1_INIT(0);
-    VARIANT1_INIT(1);
-    VARIANT2_INIT(0);
-    VARIANT2_INIT(1);
-    VARIANT4_RANDOM_MATH_INIT(0);
-    VARIANT4_RANDOM_MATH_INIT(1);
-
-    cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
-    cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
-
-    uint64_t al0 = h0[0] ^ h0[4];
-    uint64_t al1 = h1[0] ^ h1[4];
-    uint64_t ah0 = h0[1] ^ h0[5];
-    uint64_t ah1 = h1[1] ^ h1[5];
-
-    __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-    __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
-    __m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-    __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
-
-    uint64_t idx0 = al0;
-    uint64_t idx1 = al1;
-
-    for (size_t i = 0; i < ITERATIONS; i++) {
-        __m128i cx0, cx1;
-        if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) {
-            cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
-            cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
-        }
-
-        const __m128i ax0 = _mm_set_epi64x(ah0, al0);
-        const __m128i ax1 = _mm_set_epi64x(ah1, al1);
-        if (VARIANT == xmrig::VARIANT_TUBE) {
-            cx0 = aes_round_tweak_div(cx0, ax0);
-            cx1 = aes_round_tweak_div(cx1, ax1);
-        }
-        else if (SOFT_AES) {
-            cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0);
-            cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1);
-        }
-        else {
-            cx0 = _mm_aesenc_si128(cx0, ax0);
-            cx1 = _mm_aesenc_si128(cx1, ax1);
-        }
-
-        if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) {
-            cryptonight_monero_tweak<VARIANT, BASE>(l0, idx0 & MASK, ax0, bx00, bx01, cx0);
-            cryptonight_monero_tweak<VARIANT, BASE>(l1, idx1 & MASK, ax1, bx10, bx11, cx1);
-        } else {
-            _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
-            _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
-        }
-
-        idx0 = _mm_cvtsi128_si64(cx0);
-        idx1 = _mm_cvtsi128_si64(cx1);
-
-        uint64_t hi, lo, cl, ch;
-        cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-        ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-
-        if (BASE == xmrig::VARIANT_2) {
-            if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) {
-                VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
-                if (VARIANT == xmrig::VARIANT_4) {
-                    al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
-                    ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
-                }
-            } else {
-                VARIANT2_INTEGER_MATH(0, cl, cx0);
-            }
-        }
-
-        lo = __umul128(idx0, cl, &hi);
-
-        if (BASE == xmrig::VARIANT_2) {
-            if (VARIANT == xmrig::VARIANT_4) {
-                VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
-            } else {
-                VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
-            }
-        }
-
-        al0 += hi;
-        ah0 += lo;
-
-        ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
-
-        if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
-            ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
-        } else if (BASE == xmrig::VARIANT_1) {
-            ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
-        } else {
-            ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
-        }
-
-        al0 ^= cl;
-        ah0 ^= ch;
-        idx0 = al0;
-
-        if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-            const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l0[idx0 & MASK]));
-            const int64_t n   = vgetq_lane_s64(x, 0);
-            const int32_t d   = vgetq_lane_s32(x, 2);
-            const int64_t q   = n / (d | 0x5);
-
-            ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
-
-            if (VARIANT == xmrig::VARIANT_XHV) {
-                idx0 = (~d) ^ q;
-            }
-            else {
-                idx0 = d ^ q;
-            }
-        }
-
-        cl = ((uint64_t*) &l1[idx1 & MASK])[0];
-        ch = ((uint64_t*) &l1[idx1 & MASK])[1];
-
-        if (BASE == xmrig::VARIANT_2) {
-            if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) {
-                VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
-                if (VARIANT == xmrig::VARIANT_4) {
-                    al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
-                    ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
-                }
-            } else {
-                VARIANT2_INTEGER_MATH(1, cl, cx1);
-            }
-        }
-
-        lo = __umul128(idx1, cl, &hi);
-
-        if (BASE == xmrig::VARIANT_2) {
-            if (VARIANT == xmrig::VARIANT_4) {
-                VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
-            } else {
-                VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
-            }
-        }
-
-        al1 += hi;
-        ah1 += lo;
-
-        ((uint64_t*)&l1[idx1 & MASK])[0] = al1;
-
-        if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
-            ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
-        } else if (BASE == xmrig::VARIANT_1) {
-            ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
-        } else {
-            ((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
-        }
-
-        al1 ^= cl;
-        ah1 ^= ch;
-        idx1 = al1;
-
-        if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-            const int64x2_t x = vld1q_s64(reinterpret_cast<const int64_t *>(&l1[idx1 & MASK]));
-            const int64_t n   = vgetq_lane_s64(x, 0);
-            const int32_t d   = vgetq_lane_s32(x, 2);
-            const int64_t q   = n / (d | 0x5);
-
-            ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
-
-            if (VARIANT == xmrig::VARIANT_XHV) {
-                idx1 = (~d) ^ q;
-            }
-            else {
-                idx1 = d ^ q;
-            }
-        }
-        if (BASE == xmrig::VARIANT_2) {
-            bx01 = bx00;
-            bx11 = bx10;
-        }
-        bx00 = cx0;
-        bx10 = cx1;
-    }
-
-    cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
-    cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
-
-    xmrig::keccakf(h0, 24);
-    xmrig::keccakf(h1, 24);
-
-    extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
-    extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
-}
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-}
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-}
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-}
-
-#endif /* __CRYPTONIGHT_ARM_H__ */
diff --git a/src/crypto/CryptoNight_constants.h b/src/crypto/CryptoNight_constants.h
deleted file mode 100644
index 58a3915f..00000000
--- a/src/crypto/CryptoNight_constants.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_CRYPTONIGHT_CONSTANTS_H
-#define XMRIG_CRYPTONIGHT_CONSTANTS_H
-
-
-#include <stddef.h>
-#include <stdint.h>
-
-
-#include "common/xmrig.h"
-
-
-namespace xmrig
-{
-
-constexpr const size_t   CRYPTONIGHT_MEMORY       = 2 * 1024 * 1024;
-constexpr const uint32_t CRYPTONIGHT_MASK         = 0x1FFFF0;
-constexpr const uint32_t CRYPTONIGHT_ITER         = 0x80000;
-constexpr const uint32_t CRYPTONIGHT_HALF_ITER    = 0x40000;
-constexpr const uint32_t CRYPTONIGHT_XAO_ITER     = 0x100000;
-constexpr const uint32_t CRYPTONIGHT_DOUBLE_ITER  = 0x100000;
-constexpr const uint32_t CRYPTONIGHT_WALTZ_ITER   = 0x60000;
-constexpr const uint32_t CRYPTONIGHT_ZLS_ITER     = 0x60000;
-
-constexpr const uint32_t CRYPTONIGHT_GPU_ITER     = 0xC000;
-constexpr const uint32_t CRYPTONIGHT_GPU_MASK     = 0x1FFFC0;
-
-constexpr const size_t   CRYPTONIGHT_LITE_MEMORY  = 1 * 1024 * 1024;
-constexpr const uint32_t CRYPTONIGHT_LITE_MASK    = 0xFFFF0;
-constexpr const uint32_t CRYPTONIGHT_LITE_ITER    = 0x40000;
-
-constexpr const size_t   CRYPTONIGHT_HEAVY_MEMORY = 4 * 1024 * 1024;
-constexpr const uint32_t CRYPTONIGHT_HEAVY_MASK   = 0x3FFFF0;
-constexpr const uint32_t CRYPTONIGHT_HEAVY_ITER   = 0x40000;
-
-constexpr const size_t   CRYPTONIGHT_PICO_MEMORY = 256 * 1024;
-constexpr const uint32_t CRYPTONIGHT_PICO_MASK   = 0x1FFF0;
-constexpr const uint32_t CRYPTONIGHT_PICO_ITER   = 0x40000;
-constexpr const uint32_t CRYPTONIGHT_TRTL_ITER   = 0x10000;
-
-
-template<Algo ALGO> inline constexpr size_t cn_select_memory()           { return 0; }
-template<> inline constexpr size_t cn_select_memory<CRYPTONIGHT>()       { return CRYPTONIGHT_MEMORY; }
-template<> inline constexpr size_t cn_select_memory<CRYPTONIGHT_LITE>()  { return CRYPTONIGHT_LITE_MEMORY; }
-template<> inline constexpr size_t cn_select_memory<CRYPTONIGHT_HEAVY>() { return CRYPTONIGHT_HEAVY_MEMORY; }
-template<> inline constexpr size_t cn_select_memory<CRYPTONIGHT_PICO>()  { return CRYPTONIGHT_PICO_MEMORY; }
-
-
-inline size_t cn_select_memory(Algo algorithm)
-{
-    switch(algorithm)
-    {
-    case CRYPTONIGHT:
-        return CRYPTONIGHT_MEMORY;
-
-    case CRYPTONIGHT_LITE:
-        return CRYPTONIGHT_LITE_MEMORY;
-
-    case CRYPTONIGHT_HEAVY:
-        return CRYPTONIGHT_HEAVY_MEMORY;
-
-    case CRYPTONIGHT_PICO:
-        return CRYPTONIGHT_PICO_MEMORY;
-
-    default:
-        break;
-    }
-
-    return 0;
-}
-
-
-template<Algo ALGO> inline constexpr uint32_t cn_select_mask()           { return 0; }
-template<> inline constexpr uint32_t cn_select_mask<CRYPTONIGHT>()       { return CRYPTONIGHT_MASK; }
-template<> inline constexpr uint32_t cn_select_mask<CRYPTONIGHT_LITE>()  { return CRYPTONIGHT_LITE_MASK; }
-template<> inline constexpr uint32_t cn_select_mask<CRYPTONIGHT_HEAVY>() { return CRYPTONIGHT_HEAVY_MASK; }
-template<> inline constexpr uint32_t cn_select_mask<CRYPTONIGHT_PICO>()  { return CRYPTONIGHT_PICO_MASK; }
-
-
-inline uint32_t cn_select_mask(Algo algorithm)
-{
-    switch(algorithm)
-    {
-    case CRYPTONIGHT:
-        return CRYPTONIGHT_MASK;
-
-    case CRYPTONIGHT_LITE:
-        return CRYPTONIGHT_LITE_MASK;
-
-    case CRYPTONIGHT_HEAVY:
-        return CRYPTONIGHT_HEAVY_MASK;
-
-    case CRYPTONIGHT_PICO:
-        return CRYPTONIGHT_PICO_MASK;
-
-    default:
-        break;
-    }
-
-    return 0;
-}
-
-
-template<Algo ALGO, Variant variant> inline constexpr uint32_t cn_select_iter()        { return 0; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_0>()          { return CRYPTONIGHT_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_1>()          { return CRYPTONIGHT_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_2>()          { return CRYPTONIGHT_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_WOW>()        { return CRYPTONIGHT_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_4>()          { return CRYPTONIGHT_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XTL>()        { return CRYPTONIGHT_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_HALF>()       { return CRYPTONIGHT_HALF_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_MSR>()        { return CRYPTONIGHT_HALF_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_XAO>()        { return CRYPTONIGHT_XAO_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_RTO>()        { return CRYPTONIGHT_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_GPU>()        { return CRYPTONIGHT_GPU_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_RWZ>()        { return CRYPTONIGHT_WALTZ_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_ZLS>()        { return CRYPTONIGHT_ZLS_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT, VARIANT_DOUBLE>()     { return CRYPTONIGHT_DOUBLE_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_0>()     { return CRYPTONIGHT_LITE_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_LITE, VARIANT_1>()     { return CRYPTONIGHT_LITE_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_HEAVY, VARIANT_0>()    { return CRYPTONIGHT_HEAVY_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_HEAVY, VARIANT_XHV>()  { return CRYPTONIGHT_HEAVY_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_HEAVY, VARIANT_TUBE>() { return CRYPTONIGHT_HEAVY_ITER; }
-template<> inline constexpr uint32_t cn_select_iter<CRYPTONIGHT_PICO, VARIANT_TRTL>()  { return CRYPTONIGHT_TRTL_ITER; }
-
-
-inline uint32_t cn_select_iter(Algo algorithm, Variant variant)
-{
-    switch (variant) {
-    case VARIANT_MSR:
-    case VARIANT_HALF:
-        return CRYPTONIGHT_HALF_ITER;
-
-    case VARIANT_GPU:
-        return CRYPTONIGHT_GPU_ITER;
-
-    case VARIANT_RTO:
-    case VARIANT_DOUBLE:
-        return CRYPTONIGHT_XAO_ITER;
-
-    case VARIANT_TRTL:
-        return CRYPTONIGHT_TRTL_ITER;
-
-    case VARIANT_RWZ:
-    case VARIANT_ZLS:
-        return CRYPTONIGHT_WALTZ_ITER;
-
-    default:
-        break;
-    }
-
-    switch(algorithm)
-    {
-    case CRYPTONIGHT:
-        return CRYPTONIGHT_ITER;
-
-    case CRYPTONIGHT_LITE:
-        return CRYPTONIGHT_LITE_ITER;
-
-    case CRYPTONIGHT_HEAVY:
-        return CRYPTONIGHT_HEAVY_ITER;
-
-    case CRYPTONIGHT_PICO:
-        return CRYPTONIGHT_TRTL_ITER;
-
-    default:
-        break;
-    }
-
-    return 0;
-}
-
-
-template<Variant variant> inline constexpr Variant cn_base_variant()  { return VARIANT_0; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_0>()      { return VARIANT_0; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_1>()      { return VARIANT_1; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_TUBE>()   { return VARIANT_1; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_XTL>()    { return VARIANT_1; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_MSR>()    { return VARIANT_1; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_XHV>()    { return VARIANT_0; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_XAO>()    { return VARIANT_0; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_RTO>()    { return VARIANT_1; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_2>()      { return VARIANT_2; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_HALF>()   { return VARIANT_2; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_TRTL>()   { return VARIANT_2; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_GPU>()    { return VARIANT_GPU; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_WOW>()    { return VARIANT_2; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_4>()      { return VARIANT_2; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_RWZ>()    { return VARIANT_2; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_ZLS>()    { return VARIANT_2; }
-template<> inline constexpr Variant cn_base_variant<VARIANT_DOUBLE>() { return VARIANT_2; }
-
-
-template<Variant variant> inline constexpr bool cn_is_cryptonight_r() { return false; }
-template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_WOW>()   { return true; }
-template<> inline constexpr bool cn_is_cryptonight_r<VARIANT_4>()     { return true; }
-
-} /* namespace xmrig */
-
-
-#endif /* XMRIG_CRYPTONIGHT_CONSTANTS_H */
diff --git a/src/crypto/CryptoNight_monero.h b/src/crypto/CryptoNight_monero.h
deleted file mode 100644
index 4e84ac5d..00000000
--- a/src/crypto/CryptoNight_monero.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018      SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_CRYPTONIGHT_MONERO_H
-#define XMRIG_CRYPTONIGHT_MONERO_H
-
-#include <fenv.h>
-#include <math.h>
-
-// VARIANT ALTERATIONS
-#ifndef XMRIG_ARM
-#   define VARIANT1_INIT(part) \
-    uint64_t tweak1_2_##part = 0; \
-    if (BASE == xmrig::VARIANT_1) { \
-        tweak1_2_##part = (*reinterpret_cast<const uint64_t*>(input + 35 + part * size) ^ \
-                          *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24)); \
-    }
-#else
-#   define VARIANT1_INIT(part) \
-    uint64_t tweak1_2_##part = 0; \
-    if (BASE == xmrig::VARIANT_1) { \
-        memcpy(&tweak1_2_##part, input + 35 + part * size, sizeof tweak1_2_##part); \
-        tweak1_2_##part ^= *(reinterpret_cast<const uint64_t*>(ctx[part]->state) + 24); \
-    }
-#endif
-
-#define VARIANT1_1(p) \
-    if (BASE == xmrig::VARIANT_1) { \
-        const uint8_t tmp = reinterpret_cast<const uint8_t*>(p)[11]; \
-        static const uint32_t table = 0x75310; \
-        const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
-        ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
-    }
-
-#define VARIANT1_2(p, part) \
-    if (BASE == xmrig::VARIANT_1) { \
-        (p) ^= tweak1_2_##part; \
-    }
-
-
-#ifndef XMRIG_ARM
-#   define VARIANT2_INIT(part) \
-    __m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \
-    __m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]);
-
-#ifdef _MSC_VER
-#   define VARIANT2_SET_ROUNDING_MODE() if (BASE == xmrig::VARIANT_2) { _control87(RC_DOWN, MCW_RC); }
-#else
-#   define VARIANT2_SET_ROUNDING_MODE() if (BASE == xmrig::VARIANT_2) { fesetround(FE_DOWNWARD); }
-#endif
-
-#   define VARIANT2_INTEGER_MATH(part, cl, cx) \
-    do { \
-        const uint64_t sqrt_result = static_cast<uint64_t>(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \
-        const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
-        cl ^= static_cast<uint64_t>(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \
-        const uint32_t d = static_cast<uint32_t>(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \
-        const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
-        const uint64_t division_result = static_cast<uint32_t>(cx_1 / d) + ((cx_1 % d) << 32); \
-        division_result_xmm_##part = _mm_cvtsi64_si128(static_cast<int64_t>(division_result)); \
-        sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
-    } while (0)
-
-#   define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \
-    do { \
-        const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \
-        const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
-        const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \
-        _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
-        _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
-        _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
-        if (VARIANT == xmrig::VARIANT_4) { \
-            _c = _mm_xor_si128(_mm_xor_si128(_c, chunk3), _mm_xor_si128(chunk1, chunk2)); \
-        } \
-    } while (0)
-
-#   define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \
-    do { \
-        const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
-        const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
-        hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
-        lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
-        const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
-        if (reverse) { \
-            _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk1, _b1)); \
-            _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk3, _b)); \
-        } else { \
-            _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
-            _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
-        } \
-        _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
-    } while (0)
-
-#else
-#   define VARIANT2_INIT(part) \
-    uint64_t division_result_##part = h##part[12]; \
-    uint64_t sqrt_result_##part = h##part[13];
-
-#   define VARIANT2_INTEGER_MATH(part, cl, cx) \
-    do { \
-        const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
-        cl ^= division_result_##part ^ (sqrt_result_##part << 32); \
-        const uint32_t d = static_cast<uint32_t>(cx_0 + (sqrt_result_##part << 1)) | 0x80000001UL; \
-        const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
-        division_result_##part = static_cast<uint32_t>(cx_1 / d) + ((cx_1 % d) << 32); \
-        const uint64_t sqrt_input = cx_0 + division_result_##part; \
-        sqrt_result_##part = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \
-        const uint64_t s = sqrt_result_##part >> 1; \
-        const uint64_t b = sqrt_result_##part & 1; \
-        const uint64_t r2 = (uint64_t)(s) * (s + b) + (sqrt_result_##part << 32); \
-        sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
-    } while (0)
-
-#   define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \
-    do { \
-        const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \
-        const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
-        const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \
-        vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
-        vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
-        vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
-        if (VARIANT == xmrig::VARIANT_4) { \
-            _c = veorq_u64(veorq_u64(_c, chunk3), veorq_u64(chunk1, chunk2)); \
-        } \
-    } while (0)
-
-#   define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \
-    do { \
-        const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \
-        const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \
-        hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
-        lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
-        const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \
-        if (reverse) { \
-            vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b1))); \
-            vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b))); \
-        } else { \
-            vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \
-            vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \
-        } \
-        vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \
-    } while (0)
-#endif
-
-#define SWAP32LE(x) x
-#define SWAP64LE(x) x
-#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
-
-#ifndef NOINLINE
-#ifdef __GNUC__
-#define NOINLINE __attribute__ ((noinline))
-#elif _MSC_VER
-#define NOINLINE __declspec(noinline)
-#else
-#define NOINLINE
-#endif
-#endif
-
-#include "common/xmrig.h"
-#include "variant4_random_math.h"
-
-#define VARIANT4_RANDOM_MATH_INIT(part) \
-  uint32_t r##part[9]; \
-  struct V4_Instruction code##part[256]; \
-  if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { \
-    r##part[0] = (uint32_t)(h##part[12]); \
-    r##part[1] = (uint32_t)(h##part[12] >> 32); \
-    r##part[2] = (uint32_t)(h##part[13]); \
-    r##part[3] = (uint32_t)(h##part[13] >> 32); \
-  } \
-  v4_random_math_init<VARIANT>(code##part, height);
-
-#define VARIANT4_RANDOM_MATH(part, al, ah, cl, bx0, bx1) \
-  if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { \
-    cl ^= (r##part[0] + r##part[1]) | ((uint64_t)(r##part[2] + r##part[3]) << 32); \
-    r##part[4] = static_cast<uint32_t>(al); \
-    r##part[5] = static_cast<uint32_t>(ah); \
-    r##part[6] = static_cast<uint32_t>(_mm_cvtsi128_si32(bx0)); \
-    r##part[7] = static_cast<uint32_t>(_mm_cvtsi128_si32(bx1)); \
-    r##part[8] = static_cast<uint32_t>(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \
-    v4_random_math(code##part, r##part); \
-  }
-
-#endif /* XMRIG_CRYPTONIGHT_MONERO_H */
diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h
deleted file mode 100644
index 6fa9dd28..00000000
--- a/src/crypto/CryptoNight_test.h
+++ /dev/null
@@ -1,388 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_CRYPTONIGHT_TEST_H
-#define XMRIG_CRYPTONIGHT_TEST_H
-
-
-#include <stdint.h>
-
-
-const static uint8_t test_input[380] = {
-    0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
-    0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
-    0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
-    0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
-    0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
-    0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
-    0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
-    0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
-    0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
-    0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
-    0x07, 0x07, 0xB4, 0x87, 0xD0, 0xD6, 0x05, 0x26, 0xE0, 0xC6, 0xDD, 0x9B, 0xC7, 0x18, 0xC3, 0xCF,
-    0x52, 0x04, 0xBD, 0x4F, 0x9B, 0x27, 0xF6, 0x73, 0xB9, 0x3F, 0xEF, 0x7B, 0xB2, 0xF7, 0x2B, 0xBB,
-    0x3F, 0x3E, 0x9C, 0x3E, 0x9D, 0x33, 0x1E, 0xDE, 0xAD, 0xBE, 0xEF, 0x4E, 0x00, 0x91, 0x81, 0x29,
-    0x74, 0xB2, 0x70, 0xE7, 0x6D, 0xD2, 0x2A, 0x5F, 0x52, 0x04, 0x93, 0xE6, 0x18, 0x89, 0x40, 0xD8,
-    0xC6, 0xE3, 0x90, 0x6E, 0xAA, 0x6A, 0xB7, 0xE2, 0x08, 0x7E, 0x78, 0x0E,
-    0x01, 0x00, 0xEE, 0xB2, 0xD1, 0xD6, 0x05, 0xFF, 0x27, 0x7F, 0x26, 0xDB, 0xAA, 0xB2, 0xC9, 0x26,
-    0x30, 0xC6, 0xCF, 0x11, 0x64, 0xEA, 0x6C, 0x8A, 0xE0, 0x98, 0x01, 0xF8, 0x75, 0x4B, 0x49, 0xAF,
-    0x79, 0x70, 0xAE, 0xEE, 0xA7, 0x62, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x47, 0x8C, 0x63, 0xE7, 0xD8,
-    0x40, 0x02, 0x3C, 0xDA, 0xEA, 0x92, 0x52, 0x53, 0xAC, 0xFD, 0xC7, 0x8A, 0x4C, 0x31, 0xB2, 0xF2,
-    0xEC, 0x72, 0x7B, 0xFF, 0xCE, 0xC0, 0xE7, 0x12, 0xD4, 0xE9, 0x2A, 0x01,
-    0x07, 0x07, 0xA9, 0xB7, 0xD1, 0xD6, 0x05, 0x3F, 0x0D, 0x5E, 0xFD, 0xC7, 0x03, 0xFC, 0xFC, 0xD2,
-    0xCE, 0xBC, 0x44, 0xD8, 0xAB, 0x44, 0xA6, 0xA0, 0x3A, 0xE4, 0x4D, 0x8F, 0x15, 0xAF, 0x62, 0x17,
-    0xD1, 0xE0, 0x92, 0x85, 0xE4, 0x73, 0xF9, 0x00, 0x00, 0x00, 0xA0, 0xFC, 0x09, 0xDE, 0xAB, 0xF5,
-    0x8B, 0x6F, 0x1D, 0xCA, 0xA8, 0xBA, 0xAC, 0x74, 0xDD, 0x74, 0x19, 0xD5, 0xD6, 0x10, 0xEC, 0x38,
-    0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04
-};
-
-
-struct cn_r_test_input_data
-{
-    uint64_t height;
-    size_t size;
-    uint8_t data[64];
-};
-
-
-const static cn_r_test_input_data cn_r_test_input[] = {
-    { 1806260, 44, { 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74 } },
-    { 1806261, 50, { 0x4c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6e, 0x67 } },
-    { 1806262, 48, { 0x65, 0x6c, 0x69, 0x74, 0x2c, 0x20, 0x73, 0x65, 0x64, 0x20, 0x64, 0x6f, 0x20, 0x65, 0x69, 0x75, 0x73, 0x6d, 0x6f, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x63, 0x69, 0x64, 0x69, 0x64, 0x75, 0x6e, 0x74, 0x20, 0x75, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x65 } },
-    { 1806263, 48, { 0x65, 0x74, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x6d, 0x61, 0x67, 0x6e, 0x61, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x61, 0x2e, 0x20, 0x55, 0x74, 0x20, 0x65, 0x6e, 0x69, 0x6d, 0x20, 0x61, 0x64, 0x20, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x20, 0x76, 0x65, 0x6e, 0x69, 0x61, 0x6d, 0x2c } },
-    { 1806264, 46, { 0x71, 0x75, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x73, 0x74, 0x72, 0x75, 0x64, 0x20, 0x65, 0x78, 0x65, 0x72, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x75, 0x6c, 0x6c, 0x61, 0x6d, 0x63, 0x6f, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x69, 0x73, 0x20, 0x6e, 0x69, 0x73, 0x69 } },
-    { 1806265, 45, { 0x75, 0x74, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x69, 0x70, 0x20, 0x65, 0x78, 0x20, 0x65, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x64, 0x6f, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x71, 0x75, 0x61, 0x74, 0x2e, 0x20, 0x44, 0x75, 0x69, 0x73, 0x20, 0x61, 0x75, 0x74, 0x65 } },
-    { 1806266, 47, { 0x69, 0x72, 0x75, 0x72, 0x65, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x72, 0x65, 0x68, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x76, 0x6f, 0x6c, 0x75, 0x70, 0x74, 0x61, 0x74, 0x65, 0x20, 0x76, 0x65, 0x6c, 0x69, 0x74 } },
-    { 1806267, 44, { 0x65, 0x73, 0x73, 0x65, 0x20, 0x63, 0x69, 0x6c, 0x6c, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x65, 0x75, 0x20, 0x66, 0x75, 0x67, 0x69, 0x61, 0x74, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x20, 0x70, 0x61, 0x72, 0x69, 0x61, 0x74, 0x75, 0x72, 0x2e } },
-    { 1806268, 47, { 0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x65, 0x75, 0x72, 0x20, 0x73, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x63, 0x63, 0x61, 0x65, 0x63, 0x61, 0x74, 0x20, 0x63, 0x75, 0x70, 0x69, 0x64, 0x61, 0x74, 0x61, 0x74, 0x20, 0x6e, 0x6f, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x2c } },
-    { 1806269, 62, { 0x73, 0x75, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x63, 0x75, 0x6c, 0x70, 0x61, 0x20, 0x71, 0x75, 0x69, 0x20, 0x6f, 0x66, 0x66, 0x69, 0x63, 0x69, 0x61, 0x20, 0x64, 0x65, 0x73, 0x65, 0x72, 0x75, 0x6e, 0x74, 0x20, 0x6d, 0x6f, 0x6c, 0x6c, 0x69, 0x74, 0x20, 0x61, 0x6e, 0x69, 0x6d, 0x20, 0x69, 0x64, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x75, 0x6d, 0x2e } },
-};
-
-
-// "cn/wow"
-const static uint8_t test_output_wow[] = {
-    0x9d, 0x47, 0xbf, 0x4c, 0x41, 0xb7, 0xe8, 0xe7, 0x27, 0xe6, 0x81, 0x71, 0x5a, 0xcb, 0x47, 0xfa, 0x16, 0x77, 0xcd, 0xba, 0x9c, 0xa7, 0xbc, 0xb0, 0x5a, 0xd8, 0xcc, 0x8a, 0xbd, 0x5d, 0xaa, 0x66,
-    0x0d, 0x4a, 0x49, 0x5c, 0xb8, 0x44, 0xa3, 0xca, 0x8b, 0xa4, 0xed, 0xb8, 0xe6, 0xbc, 0xf8, 0x29, 0xef, 0x1c, 0x06, 0xd9, 0xcd, 0xea, 0x2b, 0x62, 0xca, 0x46, 0xc2, 0xa2, 0x1b, 0x8b, 0x0a, 0x79,
-    0xa1, 0xd6, 0xd8, 0x48, 0xb5, 0xc5, 0x91, 0x5f, 0xcc, 0xd2, 0xf6, 0x4c, 0xf2, 0x16, 0xc6, 0xb1, 0xa0, 0x2c, 0xf7, 0xc7, 0x7b, 0xc8, 0x0d, 0x8d, 0x4e, 0x51, 0xb4, 0x19, 0xe8, 0x8f, 0xf0, 0xdd,
-    0xaf, 0x3a, 0x85, 0x44, 0xa0, 0x22, 0x1a, 0x14, 0x8c, 0x2a, 0xc9, 0x04, 0x84, 0xb1, 0x98, 0x61, 0xe3, 0xaf, 0xca, 0x33, 0xfe, 0x17, 0x02, 0x1e, 0xfb, 0x8a, 0xd6, 0x49, 0x6b, 0x56, 0x79, 0x15,
-    0x31, 0x33, 0x99, 0xe0, 0x96, 0x3a, 0xe8, 0xa9, 0x9d, 0xab, 0x8a, 0xf6, 0x6d, 0x34, 0x3e, 0x09, 0x7d, 0xae, 0x0c, 0x0f, 0xeb, 0x08, 0xdb, 0xc4, 0x3c, 0xcd, 0xaf, 0xef, 0x55, 0x15, 0xf4, 0x13,
-    0x60, 0x21, 0xc6, 0xef, 0x90, 0xbf, 0xf9, 0xae, 0x94, 0xa7, 0x50, 0x6d, 0x62, 0x3d, 0x3a, 0x7a, 0x86, 0xc1, 0x75, 0x6d, 0x65, 0x5f, 0x50, 0xdd, 0x55, 0x8f, 0x71, 0x6d, 0x64, 0x62, 0x2a, 0x34,
-    0x2b, 0x13, 0x00, 0x05, 0x35, 0xf3, 0xdb, 0x5f, 0x9b, 0x9b, 0x84, 0xa6, 0x5c, 0x43, 0x51, 0xf3, 0x86, 0xcd, 0x2c, 0xde, 0xde, 0xbb, 0x8c, 0x3a, 0xd2, 0xea, 0xb0, 0x86, 0xe6, 0xa3, 0xfe, 0xe5,
-    0xfc, 0x0e, 0x1d, 0xad, 0x8e, 0x89, 0x57, 0x49, 0xdc, 0x90, 0xeb, 0x69, 0x0b, 0xc1, 0xba, 0x05, 0x9a, 0x1c, 0xd7, 0x72, 0xaf, 0xaa, 0xf6, 0x5a, 0x10, 0x6b, 0xf9, 0xe5, 0xe6, 0xb8, 0x05, 0x03,
-    0xb6, 0x0b, 0x0a, 0xfe, 0x14, 0x4d, 0xef, 0xf7, 0xd9, 0x03, 0xed, 0x2d, 0x55, 0x45, 0xe7, 0x7e, 0xbe, 0x66, 0xa3, 0xc5, 0x1f, 0xee, 0x70, 0x16, 0xee, 0xb8, 0xfe, 0xe9, 0xeb, 0x63, 0x0c, 0x0f,
-    0x64, 0x77, 0x4b, 0x27, 0xe7, 0xd5, 0xfe, 0xc8, 0x62, 0xfc, 0x4c, 0x0c, 0x13, 0xac, 0x6b, 0xf0, 0x91, 0x23, 0xb6, 0xf0, 0x5b, 0xb0, 0xe4, 0xb7, 0x5c, 0x97, 0xf3, 0x79, 0xa2, 0xb3, 0xa6, 0x79,
-};
-
-
-// "cn/r"
-const static uint8_t test_output_r[] = {
-    0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc,
-    0x5b, 0xb8, 0x33, 0xde, 0xca, 0x2b, 0xdd, 0x72, 0x52, 0xa9, 0xcc, 0xd7, 0xb4, 0xce, 0x0b, 0x6a, 0x48, 0x54, 0x51, 0x57, 0x94, 0xb5, 0x6c, 0x20, 0x72, 0x62, 0xf7, 0xa5, 0xb9, 0xbd, 0xb5, 0x66,
-    0x1e, 0xe6, 0x72, 0x8d, 0xa6, 0x0f, 0xbd, 0x8d, 0x7d, 0x55, 0xb2, 0xb1, 0xad, 0xe4, 0x87, 0xa3, 0xcf, 0x52, 0xa2, 0xc3, 0xac, 0x6f, 0x52, 0x0d, 0xb1, 0x2c, 0x27, 0xd8, 0x92, 0x1f, 0x6c, 0xab,
-    0x69, 0x69, 0xfe, 0x2d, 0xdf, 0xb7, 0x58, 0x43, 0x8d, 0x48, 0x04, 0x9f, 0x30, 0x2f, 0xc2, 0x10, 0x8a, 0x4f, 0xcc, 0x93, 0xe3, 0x76, 0x69, 0x17, 0x0e, 0x6d, 0xb4, 0xb0, 0xb9, 0xb4, 0xc4, 0xcb,
-    0x7f, 0x30, 0x48, 0xb4, 0xe9, 0x0d, 0x0c, 0xbe, 0x7a, 0x57, 0xc0, 0x39, 0x4f, 0x37, 0x33, 0x8a, 0x01, 0xfa, 0xe3, 0xad, 0xfd, 0xc0, 0xe5, 0x12, 0x6d, 0x86, 0x3a, 0x89, 0x5e, 0xb0, 0x4e, 0x02,
-    0x1d, 0x29, 0x04, 0x43, 0xa4, 0xb5, 0x42, 0xaf, 0x04, 0xa8, 0x2f, 0x6b, 0x24, 0x94, 0xa6, 0xee, 0x7f, 0x20, 0xf2, 0x75, 0x4c, 0x58, 0xe0, 0x84, 0x90, 0x32, 0x48, 0x3a, 0x56, 0xe8, 0xe2, 0xef,
-    0xc4, 0x3c, 0xc6, 0x56, 0x74, 0x36, 0xa8, 0x6a, 0xfb, 0xd6, 0xaa, 0x9e, 0xaa, 0x7c, 0x27, 0x6e, 0x98, 0x06, 0x83, 0x03, 0x34, 0xb6, 0x14, 0xb2, 0xbe, 0xe2, 0x3c, 0xc7, 0x66, 0x34, 0xf6, 0xfd,
-    0x87, 0xbe, 0x24, 0x79, 0xc0, 0xc4, 0xe8, 0xed, 0xfd, 0xfa, 0xa5, 0x60, 0x3e, 0x93, 0xf4, 0x26, 0x5b, 0x3f, 0x82, 0x24, 0xc1, 0xc5, 0x94, 0x6f, 0xeb, 0x42, 0x48, 0x19, 0xd1, 0x89, 0x90, 0xa4,
-    0xdd, 0x9d, 0x6a, 0x6d, 0x8e, 0x47, 0x46, 0x5c, 0xce, 0xac, 0x08, 0x77, 0xef, 0x88, 0x9b, 0x93, 0xe7, 0xeb, 0xa9, 0x79, 0x55, 0x7e, 0x39, 0x35, 0xd7, 0xf8, 0x6d, 0xce, 0x11, 0xb0, 0x70, 0xf3,
-    0x75, 0xc6, 0xf2, 0xae, 0x49, 0xa2, 0x05, 0x21, 0xde, 0x97, 0x28, 0x5b, 0x43, 0x1e, 0x71, 0x71, 0x25, 0x84, 0x7f, 0xb8, 0x93, 0x5e, 0xd8, 0x4a, 0x61, 0xe7, 0xf8, 0xd3, 0x6a, 0x2c, 0x3d, 0x8e,
-};
-
-
-// "cn/0"
-const static uint8_t test_output_v0[160] = {
-    0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
-    0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
-    0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
-    0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
-    0xA1, 0xB4, 0xFA, 0xE3, 0xE5, 0x76, 0xCE, 0xCF, 0xB7, 0x9C, 0xAF, 0x3E, 0x29, 0x92, 0xE4, 0xE0,
-    0x31, 0x24, 0x05, 0x48, 0xBF, 0x8D, 0x5F, 0x7B, 0x11, 0x03, 0x60, 0xAA, 0xD7, 0x50, 0x3F, 0x0C,
-    0x2D, 0x30, 0xF3, 0x87, 0x4F, 0x86, 0xA1, 0x4A, 0xB5, 0xA2, 0x1A, 0x08, 0xD0, 0x44, 0x2C, 0x9D,
-    0x16, 0xE9, 0x28, 0x49, 0xA1, 0xFF, 0x85, 0x6F, 0x12, 0xBB, 0x7D, 0xAB, 0x11, 0x1C, 0xE7, 0xF7,
-    0x2D, 0x9D, 0x19, 0xE4, 0xD2, 0x26, 0x44, 0x1E, 0xCD, 0x22, 0x08, 0x24, 0xA8, 0x97, 0x46, 0x62,
-    0x04, 0x84, 0x90, 0x4A, 0xEE, 0x99, 0x14, 0xED, 0xB8, 0xC6, 0x0D, 0x37, 0xA1, 0x66, 0x17, 0xB0
-};
-
-
-// "cn/1" Cryptonight variant 1 (Monero v7)
-const static uint8_t test_output_v1[160] = {
-    0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
-    0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
-    0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
-    0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22,
-    0xE7, 0x8C, 0x5A, 0x6E, 0x38, 0x30, 0x68, 0x4A, 0x73, 0xFC, 0x1B, 0xC6, 0x6D, 0xFC, 0x8D, 0x98,
-    0xB4, 0xC2, 0x23, 0x39, 0xAD, 0xE0, 0x9D, 0xF6, 0x6D, 0x8C, 0x6A, 0xAA, 0xF9, 0xB2, 0xE3, 0x4C,
-    0xB6, 0x90, 0x6C, 0xE6, 0x15, 0x5E, 0x46, 0x07, 0x9C, 0xB2, 0x6B, 0xAC, 0x3B, 0xAC, 0x1A, 0xDE,
-    0x92, 0x2C, 0xD6, 0x0C, 0x46, 0x9D, 0x9B, 0xC2, 0x84, 0x52, 0x65, 0xF6, 0xBD, 0xFA, 0x0D, 0x74,
-    0x00, 0x66, 0x10, 0x07, 0xF1, 0x19, 0x06, 0x3A, 0x6C, 0xFF, 0xEE, 0xB2, 0x40, 0xE5, 0x88, 0x2B,
-    0x6C, 0xAB, 0x6B, 0x1D, 0x88, 0xB8, 0x44, 0x25, 0xF4, 0xEA, 0xB7, 0xEC, 0xBA, 0x12, 0x8A, 0x24
-};
-
-
-// "cn/2" Cryptonight variant 2 (Monero v8)
-const static uint8_t test_output_v2[160] = {
-    0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
-    0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
-    0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
-    0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78,
-    0xE6, 0x0D, 0x24, 0x0F, 0x65, 0x85, 0x60, 0x3A, 0x4A, 0xE5, 0x5F, 0x54, 0x9B, 0xC8, 0x79, 0x93,
-    0xEB, 0x3D, 0x98, 0x2C, 0xFE, 0x9B, 0xFB, 0x15, 0xB6, 0x88, 0x21, 0x94, 0xB0, 0x05, 0x86, 0x5C,
-    0x59, 0x8B, 0x93, 0x7A, 0xDA, 0xD2, 0xA2, 0x14, 0xED, 0xB7, 0xC4, 0x5D, 0xA1, 0xEF, 0x26, 0xF3,
-    0xC7, 0x73, 0x29, 0x4D, 0xF1, 0xC8, 0x2C, 0xE0, 0xD0, 0xE9, 0xED, 0x0C, 0x70, 0x75, 0x05, 0x3E,
-    0x5B, 0xF6, 0xA0, 0x6E, 0xEA, 0xDE, 0x87, 0x0B, 0x06, 0x29, 0x03, 0xBF, 0xB4, 0x85, 0x9D, 0x04,
-    0x75, 0x1A, 0xCD, 0x1E, 0xD6, 0xAA, 0x1B, 0x05, 0x24, 0x6A, 0x2C, 0x80, 0x69, 0x68, 0xDC, 0x97
-};
-
-
-// "cn/xtl" Stellite (XTL)
-const static uint8_t test_output_xtl[160] = {
-    0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3,
-    0xC0, 0x8E, 0x0E, 0x1F, 0xE3, 0xBE, 0x48, 0x57, 0x07, 0x03, 0xFE, 0xE1, 0xEC, 0x0E, 0xB0, 0xB1,
-    0x21, 0x26, 0xFF, 0x98, 0xE6, 0x86, 0x08, 0x5B, 0xC9, 0x96, 0x44, 0xA3, 0xB8, 0x4E, 0x28, 0x90,
-    0x76, 0xED, 0xAD, 0xB9, 0xAA, 0xAC, 0x01, 0x94, 0x1D, 0xBE, 0x3E, 0xEA, 0xAD, 0xEE, 0xB2, 0xCF,
-    0xB0, 0x43, 0x4B, 0x88, 0xFC, 0xB2, 0xF3, 0x82, 0x9D, 0xD7, 0xDF, 0x51, 0x97, 0x2C, 0x5A, 0xE3,
-    0xC7, 0x16, 0x0B, 0xC8, 0x7C, 0xB7, 0x2F, 0x1C, 0x55, 0x33, 0xCA, 0xE1, 0xEE, 0x08, 0xA4, 0x86,
-    0x60, 0xED, 0x6E, 0x9D, 0x2D, 0x05, 0x0D, 0x7D, 0x02, 0x49, 0x23, 0x39, 0x7C, 0xC3, 0x6D, 0x3D,
-    0x05, 0x51, 0x28, 0xF1, 0x9B, 0x3C, 0xDF, 0xC4, 0xEA, 0x8A, 0xA6, 0x6A, 0x3C, 0x8B, 0xE2, 0xAF,
-    0x47, 0x00, 0xFC, 0x36, 0xED, 0x50, 0xBB, 0xD2, 0x2E, 0x63, 0x4B, 0x93, 0x11, 0x0C, 0xA7, 0xBA,
-    0x32, 0x6E, 0x47, 0x4D, 0xCE, 0xCC, 0x82, 0x54, 0x1D, 0x06, 0xF8, 0x06, 0x86, 0xBD, 0x22, 0x48
-};
-
-
-// "cn/half"
-const static uint8_t test_output_half[160] = {
-    0x5D, 0x4F, 0xBC, 0x35, 0x60, 0x97, 0xEA, 0x64, 0x40, 0xB0, 0x88, 0x8E, 0xDE, 0xB6, 0x35, 0xDD,
-    0xC8, 0x4A, 0x0E, 0x39, 0x7C, 0x86, 0x84, 0x56, 0x89, 0x5C, 0x3F, 0x29, 0xBE, 0x73, 0x12, 0xA7,
-    0x02, 0xE6, 0x1D, 0x2B, 0xBC, 0x84, 0xB6, 0x71, 0x96, 0x71, 0xD5, 0x0C, 0xAC, 0x76, 0x0E, 0x6B,
-    0xF1, 0xF0, 0x55, 0x34, 0x15, 0x29, 0x93, 0x04, 0x2D, 0xED, 0xD2, 0x33, 0x50, 0x6E, 0xBE, 0x25,
-    0xD0, 0xFD, 0x8E, 0xC6, 0x15, 0xD5, 0x12, 0x53, 0x7B, 0x26, 0xF6, 0x01, 0xA5, 0xA8, 0xBE, 0x7C,
-    0xCF, 0x5E, 0x19, 0xB7, 0x63, 0x0D, 0x0F, 0x02, 0x2B, 0xD7, 0xC4, 0x8C, 0x12, 0x24, 0x80, 0x02,
-    0xE7, 0xB7, 0xA0, 0x4F, 0x94, 0xF9, 0x46, 0xB5, 0x18, 0x64, 0x7E, 0x4E, 0x9C, 0x81, 0x6C, 0x60,
-    0x7D, 0x2E, 0xEA, 0xCF, 0x90, 0xCB, 0x68, 0x09, 0xC9, 0x53, 0xF6, 0xA9, 0xCA, 0x0C, 0xAC, 0xDC,
-    0xFD, 0x07, 0xDA, 0x24, 0x1D, 0xD1, 0x35, 0x32, 0x3C, 0xE8, 0x64, 0x44, 0x5E, 0xCB, 0xB5, 0x00,
-    0x69, 0xF4, 0x6F, 0xBB, 0x62, 0x0D, 0x25, 0xD8, 0xAC, 0x20, 0x90, 0xC5, 0x1B, 0xD3, 0x5F, 0xCA
-};
-
-
-// "cn/msr" Masari (MSR)
-const static uint8_t test_output_msr[160] = {
-    0x3C, 0x7A, 0x61, 0x08, 0x4C, 0x5E, 0xB8, 0x65, 0xB4, 0x98, 0xAB, 0x2F, 0x5A, 0x1A, 0xC5, 0x2C,
-    0x49, 0xC1, 0x77, 0xC2, 0xD0, 0x13, 0x34, 0x42, 0xD6, 0x5E, 0xD5, 0x14, 0x33, 0x5C, 0x82, 0xC5,
-    0x69, 0xDF, 0x38, 0x51, 0x1B, 0xB3, 0xEB, 0x7D, 0xE7, 0x6B, 0x08, 0x8E, 0xB6, 0x7E, 0xB7, 0x1C,
-    0x5F, 0x3C, 0x81, 0xC9, 0xF7, 0xCE, 0xAE, 0x28, 0xC0, 0xFE, 0xEB, 0xBA, 0x0B, 0x40, 0x38, 0x1D,
-    0x44, 0xD0, 0xD5, 0xD3, 0x98, 0x1F, 0xA3, 0x0E, 0xE9, 0x89, 0x1A, 0xD7, 0x88, 0xCC, 0x25, 0x76,
-    0x9C, 0xFF, 0x4D, 0x7F, 0x9C, 0xCF, 0x48, 0x07, 0x91, 0xF9, 0x82, 0xF5, 0x4C, 0xE9, 0xBD, 0x82,
-    0x36, 0x36, 0x64, 0x14, 0xED, 0xB8, 0x54, 0xEE, 0x22, 0xA1, 0x66, 0xA3, 0x87, 0x10, 0x76, 0x1F,
-    0x5A, 0xCD, 0x4C, 0x31, 0x4C, 0xBA, 0x41, 0xD2, 0xDB, 0x6C, 0x31, 0x2E, 0x7A, 0x64, 0x15, 0xFF,
-    0xA6, 0xD9, 0xB9, 0x7D, 0x1C, 0x3C, 0x98, 0xDD, 0x16, 0xE6, 0xD3, 0xAA, 0xEF, 0xB6, 0xB3, 0x53,
-    0x74, 0xD1, 0xAC, 0x5C, 0x04, 0x26, 0x7D, 0x71, 0xDE, 0xAB, 0x66, 0x28, 0x91, 0x3A, 0x6F, 0x4F
-};
-
-
-// "cn/xao" Alloy (XAO)
-const static uint8_t test_output_xao[160] = {
-    0x9A, 0x29, 0xD0, 0xC4, 0xAF, 0xDC, 0x63, 0x9B, 0x65, 0x53, 0xB1, 0xC8, 0x37, 0x35, 0x11, 0x4C,
-    0x5D, 0x77, 0x16, 0x21, 0x42, 0x97, 0x5C, 0xB8, 0x50, 0xC0, 0xA5, 0x1F, 0x64, 0x07, 0xBD, 0x33,
-    0xF1, 0xC9, 0x98, 0x40, 0x42, 0xDE, 0x39, 0xD1, 0xBA, 0x2D, 0xAD, 0xEC, 0xFE, 0xEA, 0xD8, 0x46,
-    0x56, 0x1C, 0x32, 0x90, 0x42, 0x63, 0x10, 0x80, 0xD7, 0x01, 0xE4, 0xE6, 0x20, 0xB3, 0x60, 0x45,
-    0x05, 0xE5, 0xC2, 0x18, 0xCD, 0x07, 0xA4, 0x40, 0x42, 0x91, 0xE2, 0xA4, 0x52, 0x54, 0x79, 0xBA,
-    0xCD, 0x7E, 0x61, 0x2D, 0x7F, 0x7E, 0x69, 0x5E, 0xD7, 0xC0, 0x06, 0x65, 0xD7, 0xA1, 0xB8, 0xB8,
-    0x1E, 0x31, 0x1C, 0xD3, 0xB7, 0xBC, 0x78, 0x3C, 0x01, 0xAF, 0x77, 0xAA, 0xF3, 0x0F, 0x4C, 0xF2,
-    0xD1, 0x8B, 0x58, 0xC7, 0xEB, 0x99, 0x91, 0x53, 0x43, 0x71, 0x47, 0x99, 0x9E, 0x04, 0xA4, 0xEA,
-    0xB8, 0xA3, 0xB0, 0x9E, 0x09, 0xF5, 0x57, 0x5C, 0xCF, 0x8A, 0xC6, 0xCA, 0x88, 0x51, 0x9A, 0x01,
-    0x31, 0xCC, 0x0C, 0xA6, 0x53, 0xB5, 0x5F, 0xFD, 0x7D, 0x29, 0x3A, 0x35, 0xE9, 0x0E, 0x25, 0x6C
-};
-
-
-// "cn/rto" Arto (RTO)
-const static uint8_t test_output_rto[160] = {
-    0x82, 0x66, 0x1E, 0x1C, 0x6E, 0x64, 0x36, 0x66, 0x84, 0x06, 0x32, 0x7A, 0x9B, 0xB1, 0x13, 0x19,
-    0xA5, 0x56, 0x16, 0x15, 0xDF, 0xEC, 0x1C, 0x9E, 0xE3, 0x88, 0x4A, 0x6C, 0x1C, 0xEB, 0x76, 0xA5,
-    0xB3, 0xFB, 0xF4, 0x3F, 0x2B, 0x6A, 0x3A, 0x39, 0xA3, 0x6E, 0x08, 0x33, 0x67, 0x90, 0x31, 0xB9,
-    0x3F, 0x27, 0xE4, 0x79, 0x32, 0x61, 0x6B, 0x5C, 0x8A, 0xF8, 0xAF, 0xC0, 0x60, 0xFD, 0x83, 0xB7,
-    0x11, 0x11, 0x89, 0xB4, 0xDC, 0xAE, 0x40, 0xC8, 0x64, 0xAA, 0x4D, 0x19, 0x23, 0x7B, 0xD3, 0x27,
-    0xB2, 0x0F, 0xA7, 0x50, 0x7D, 0xCA, 0xF5, 0x03, 0x06, 0xB2, 0x26, 0x62, 0xF3, 0x68, 0x2D, 0x30,
-    0x6F, 0x93, 0x1E, 0xFF, 0xCD, 0x85, 0x40, 0x28, 0x5F, 0xC3, 0x8C, 0x76, 0x51, 0x9E, 0xD5, 0x06,
-    0x32, 0xD6, 0x35, 0x83, 0xF6, 0x3B, 0x54, 0x4F, 0xA1, 0x9C, 0x13, 0xD8, 0xC4, 0x0E, 0x01, 0x2F,
-    0x29, 0xDB, 0x8C, 0x1C, 0xB7, 0x06, 0x86, 0x79, 0x6D, 0xFF, 0x9F, 0x89, 0x3B, 0x3A, 0xA5, 0x79,
-    0xE7, 0x81, 0x4E, 0x2A, 0xBD, 0x62, 0xC1, 0x1B, 0x7C, 0xB9, 0x33, 0x7B, 0xEE, 0x95, 0x80, 0xB3
-};
-
-// "cn/rwz"
-const static uint8_t test_output_rwz[160] = {
-    0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85,
-    0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8,
-    0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52,
-    0x49, 0x54, 0x0b, 0x91, 0xea, 0x61, 0x7f, 0x98, 0x7d, 0x39, 0x17, 0xb7, 0xd7, 0x65, 0xff, 0x75,
-    0x13, 0x21, 0x1d, 0xce, 0x61, 0x5a, 0xdc, 0x5f, 0x8c, 0xcb, 0x1f, 0x6f, 0xbb, 0x92, 0x88, 0xc3,
-    0xe3, 0xe2, 0xfc, 0x4f, 0x62, 0xfb, 0xf0, 0x48, 0x02, 0x01, 0xd3, 0xbe, 0x77, 0x6a, 0x40, 0xca,
-    0x9a, 0xe9, 0xba, 0x0c, 0xc0, 0x2b, 0x11, 0xf6, 0x9b, 0xee, 0x24, 0x3a, 0xd8, 0x86, 0x18, 0xd0,
-    0xe8, 0xeb, 0xcb, 0x38, 0x2c, 0xf5, 0x99, 0x83, 0x14, 0x7b, 0x0c, 0x20, 0xbe, 0x50, 0xf4, 0x87,
-    0x83, 0x41, 0x75, 0xd8, 0xd1, 0xdd, 0x4b, 0x73, 0xb3, 0x92, 0x8f, 0xe6, 0x1c, 0x72, 0x70, 0xf5,
-    0x7c, 0xf6, 0x23, 0x3a, 0xb4, 0x5f, 0xdf, 0xde, 0xa6, 0x5a, 0x58, 0xec, 0x13, 0x5a, 0x23, 0x2f
-};
-
-// "cn/zls"
-const static uint8_t test_output_zls[160] = {
-    0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E,
-    0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2,
-    0x0D, 0x62, 0x5B, 0x42, 0x18, 0xE2, 0x76, 0xAD, 0xD0, 0x74, 0x90, 0x60, 0x8D, 0xC4, 0xC7, 0x80,
-    0x17, 0xB5, 0x1B, 0x25, 0x31, 0x39, 0x87, 0xD2, 0x2D, 0x6A, 0x9D, 0x1C, 0x74, 0xF4, 0x43, 0x22,
-    0x4B, 0x97, 0x1F, 0x6A, 0xD0, 0xBE, 0x00, 0x74, 0xEC, 0xC5, 0xD8, 0x3B, 0xE6, 0xF4, 0x03, 0x8A,
-    0x7B, 0xBA, 0x80, 0xCC, 0x9F, 0x00, 0xCB, 0xC2, 0x14, 0x8F, 0xF3, 0xD8, 0x92, 0x73, 0xBF, 0x17,
-    0x3D, 0x9B, 0x22, 0xA3, 0x61, 0x94, 0x41, 0x9E, 0xF9, 0x68, 0x1D, 0x42, 0x48, 0x3B, 0x39, 0x45,
-    0xE2, 0xE6, 0x16, 0x84, 0xFC, 0x21, 0xE6, 0xDA, 0x38, 0x7F, 0x17, 0xAB, 0xD3, 0xF2, 0xCE, 0x1A,
-    0x2F, 0x35, 0xD5, 0x74, 0xFA, 0x45, 0x3B, 0x06, 0xD1, 0x4E, 0x84, 0x3A, 0x5D, 0xE3, 0x0E, 0xA5,
-    0x00, 0x08, 0x64, 0xF0, 0xA6, 0xC8, 0x94, 0x45, 0x08, 0xED, 0x03, 0x95, 0x52, 0xE9, 0xBC, 0x5F
-};
-
-// "cn/double"
-const static uint8_t test_output_double[160] = {
-    0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,
-    0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21,
-    0x49, 0xCE, 0x8E, 0xF3, 0xBC, 0x8A, 0x36, 0xBF, 0x86, 0x37, 0x89, 0x55, 0x09, 0xBA, 0x22, 0xF8,
-    0xEB, 0x3A, 0xE1, 0xDC, 0x91, 0xF7, 0x62, 0x4B, 0x9F, 0x48, 0xE6, 0x92, 0xBD, 0xE4, 0x5D, 0xC1,
-    0xF1, 0x3C, 0x63, 0x1D, 0xEB, 0x0B, 0x04, 0xA3, 0x30, 0xD5, 0x11, 0x15, 0x4C, 0xCE, 0xEF, 0x4F,
-    0xDF, 0x69, 0xE3, 0x9E, 0xD2, 0x68, 0xFC, 0x1B, 0x6F, 0xE8, 0x08, 0x9C, 0xBB, 0xA5, 0x2B, 0x60,
-    0x52, 0x0F, 0xE5, 0xD2, 0xF3, 0x8A, 0xB3, 0xE1, 0x76, 0x7F, 0x44, 0x25, 0x76, 0xEC, 0xFF, 0xA2,
-    0x0C, 0x64, 0xD0, 0x0E, 0x32, 0x33, 0x28, 0x20, 0x73, 0xE0, 0x31, 0x66, 0x4E, 0x54, 0x83, 0x49,
-    0x51, 0x55, 0x4D, 0x2E, 0x22, 0xB7, 0x51, 0x09, 0x73, 0x61, 0x7E, 0x6A, 0x57, 0x0B, 0x28, 0x3C,
-    0x5E, 0x2E, 0xC1, 0x80, 0x89, 0x39, 0xB3, 0x54, 0x39, 0x52, 0x0E, 0x69, 0x3D, 0xF6, 0xC5, 0x4A
-};
-
-#ifndef XMRIG_NO_AEON
-// "cn-lite/0"
-const static uint8_t test_output_v0_lite[160] = {
-    0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
-    0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
-    0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
-    0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
-    0x38, 0x08, 0xE1, 0x17, 0x0B, 0x99, 0x8D, 0x1A, 0x3C, 0xCE, 0x35, 0xC5, 0xC7, 0x3A, 0x00, 0x2E,
-    0xCB, 0x54, 0xF0, 0x78, 0x2E, 0x9E, 0xDB, 0xC7, 0xDF, 0x2E, 0x71, 0x9A, 0x16, 0x97, 0xC4, 0x18,
-    0x4B, 0x97, 0x07, 0xFE, 0x5D, 0x98, 0x9A, 0xD6, 0xD8, 0xE5, 0x92, 0x66, 0x87, 0x7F, 0x19, 0x37,
-    0xA2, 0x5E, 0xE6, 0x96, 0xB5, 0x97, 0x33, 0x89, 0xE0, 0xA7, 0xC9, 0xDD, 0x4A, 0x7E, 0x9E, 0x53,
-    0xBE, 0x91, 0x2B, 0xF5, 0xF5, 0xAF, 0xDD, 0x09, 0xA2, 0xF4, 0xA4, 0x56, 0xEB, 0x96, 0x22, 0xC9,
-    0x94, 0xFB, 0x7B, 0x28, 0xC9, 0x97, 0x65, 0x04, 0xAC, 0x4F, 0x84, 0x71, 0xDA, 0x6E, 0xD8, 0xC5
-};
-
-
-// "cn-lite/1" AEON v7
-const static uint8_t test_output_v1_lite[160] = {
-    0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
-    0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
-    0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
-    0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F,
-    0x16, 0x08, 0x74, 0xC7, 0xA2, 0xD2, 0xA3, 0x97, 0x95, 0x76, 0xCA, 0x4D, 0x06, 0x39, 0x7A, 0xAB,
-    0x6C, 0x87, 0x58, 0x33, 0x4D, 0xC8, 0x5A, 0xAB, 0x04, 0x27, 0xFE, 0x8B, 0x1C, 0x23, 0x2F, 0x32,
-    0xC0, 0x44, 0xFF, 0x0D, 0xB5, 0x3B, 0x27, 0x96, 0x06, 0x89, 0x7B, 0xA3, 0x0B, 0xD0, 0xCE, 0x9E,
-    0x90, 0x22, 0x77, 0x5A, 0xAD, 0xA1, 0xE5, 0xB6, 0xFC, 0xCB, 0x39, 0x7E, 0x2B, 0x10, 0xEE, 0xB4,
-    0x8C, 0x2B, 0xA4, 0x1F, 0x60, 0x76, 0x39, 0xD7, 0xF6, 0x46, 0x77, 0x18, 0x20, 0xAD, 0xD4, 0xC9,
-    0x87, 0xF7, 0x37, 0xDA, 0xFD, 0xBA, 0xBA, 0xD2, 0xF2, 0x68, 0xDC, 0x26, 0x8D, 0x1B, 0x08, 0xC6
-};
-#endif
-
-
-#ifndef XMRIG_NO_SUMO
-// "cn-heavy/0"
-const static uint8_t test_output_v0_heavy[160] = {
-    0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
-    0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2,
-    0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A,
-    0x1F, 0x1A, 0xA2, 0x5B, 0xFC, 0x0A, 0xAD, 0x82, 0xDE, 0xA8, 0x99, 0x96, 0x88, 0x52, 0xD2, 0x7D,
-    0x3E, 0xE1, 0x23, 0x03, 0x5A, 0x63, 0x7B, 0x66, 0xF6, 0xD7, 0xC2, 0x2A, 0x34, 0x5E, 0x88, 0xE7,
-    0xFA, 0xC4, 0x25, 0x36, 0x54, 0xCB, 0xD2, 0x5C, 0x2F, 0x80, 0x2A, 0xF9, 0xCC, 0x43, 0xF7, 0xCD,
-    0xE5, 0x18, 0xA8, 0x05, 0x60, 0x18, 0xA5, 0x73, 0x72, 0x9B, 0x32, 0xDC, 0x69, 0x83, 0xC1, 0xE1,
-    0x1F, 0xDB, 0xDA, 0x6B, 0xAC, 0xEC, 0x9F, 0x67, 0xF8, 0x27, 0x1D, 0xC7, 0xE6, 0x46, 0x42, 0xF9,
-    0x53, 0x62, 0x0A, 0x54, 0x7D, 0x43, 0xEA, 0x18, 0x94, 0xED, 0xD8, 0x92, 0x06, 0x6A, 0xA1, 0x51,
-    0xAD, 0xB1, 0xFD, 0x89, 0xFB, 0x5C, 0xB4, 0x25, 0x6A, 0xDD, 0xB0, 0x09, 0xC5, 0x72, 0x87, 0xEB
-};
-
-
-// "cn-heavy/xhv"
-const static uint8_t test_output_xhv_heavy[160] = {
-    0x5A, 0xC3, 0xF7, 0x85, 0xC4, 0x90, 0xC5, 0x85, 0x50, 0xEC, 0x95, 0xD2, 0x72, 0x65, 0x63, 0x57,
-    0x7E, 0x7C, 0x1C, 0x21, 0x2D, 0x0C, 0xDE, 0x59, 0x12, 0x73, 0x20, 0x1E, 0x44, 0xFD, 0xD5, 0xB6,
-    0x1F, 0x4E, 0xB2, 0x0A, 0x36, 0x51, 0x4B, 0xF5, 0x4D, 0xC9, 0xE0, 0x90, 0x2C, 0x16, 0x47, 0x3F,
-    0xDE, 0x18, 0x29, 0x8E, 0xBB, 0x34, 0x2B, 0xEF, 0x7A, 0x04, 0x22, 0xD1, 0xB1, 0xF2, 0x48, 0xDA,
-    0xE3, 0x7F, 0x4B, 0x4C, 0xB4, 0xDF, 0xE8, 0xD3, 0x70, 0xE2, 0xE7, 0x44, 0x25, 0x87, 0x12, 0xF9,
-    0x8F, 0x28, 0x0B, 0xCE, 0x2C, 0xEE, 0xDD, 0x88, 0x94, 0x35, 0x48, 0x51, 0xAE, 0xC8, 0x9C, 0x0B,
-    0xED, 0x2F, 0xE6, 0x0F, 0x39, 0x05, 0xB4, 0x4A, 0x8F, 0x38, 0x44, 0x2D, 0x4B, 0xE9, 0x7B, 0x81,
-    0xC6, 0xB0, 0xE0, 0x0A, 0x39, 0x8C, 0x38, 0xFE, 0x63, 0x31, 0x47, 0x65, 0x0D, 0x2B, 0xF4, 0x96,
-    0x13, 0x91, 0x89, 0xB4, 0x5B, 0xA9, 0x2A, 0x7A, 0x09, 0x65, 0x14, 0x20, 0x76, 0x24, 0x6C, 0x80,
-    0x1D, 0x3F, 0x9F, 0xCD, 0x68, 0x39, 0xA9, 0x42, 0x27, 0xC1, 0x0C, 0x53, 0x98, 0x35, 0x60, 0x7A
-};
-
-
-// "cn-heavy/tube"
-const static uint8_t test_output_tube_heavy[160] = {
-    0xFE, 0x53, 0x35, 0x20, 0x76, 0xEA, 0xE6, 0x89, 0xFA, 0x3B, 0x4F, 0xDA, 0x61, 0x46, 0x34, 0xCF,
-    0xC3, 0x12, 0xEE, 0x0C, 0x38, 0x7D, 0xF2, 0xB8, 0xB7, 0x4D, 0xA2, 0xA1, 0x59, 0x74, 0x12, 0x35,
-    0xCD, 0x3F, 0x29, 0xDF, 0x07, 0x4A, 0x14, 0xAD, 0x0B, 0x98, 0x99, 0x37, 0xCA, 0x14, 0x68, 0xA3,
-    0x8D, 0xAE, 0x86, 0xC1, 0xA3, 0x54, 0x05, 0xBE, 0xEA, 0x6D, 0x29, 0x24, 0x0C, 0x82, 0x97, 0x74,
-    0xA0, 0x64, 0x77, 0xCD, 0x8D, 0x8A, 0xC3, 0x10, 0xB4, 0x89, 0x0E, 0xBB, 0x7D, 0xE6, 0x32, 0x8F,
-    0xF4, 0x2D, 0xB6, 0x9E, 0x8A, 0xF9, 0xF8, 0xEE, 0x2C, 0xD0, 0x74, 0xED, 0xA9, 0xAA, 0xA1, 0xFB,
-    0xE2, 0xC9, 0x89, 0x66, 0xD6, 0x66, 0x52, 0xA2, 0x16, 0xDA, 0x36, 0xA0, 0x10, 0x62, 0xD2, 0xB1,
-    0x76, 0xD1, 0x31, 0xE9, 0x1C, 0x08, 0xB6, 0xCA, 0xAF, 0x89, 0xB9, 0x3D, 0x2C, 0xFA, 0x9A, 0x30,
-    0x74, 0x6A, 0x96, 0xA1, 0x95, 0x6C, 0xBB, 0x46, 0x4D, 0xE0, 0xEB, 0x28, 0xBE, 0x2A, 0x8C, 0x34,
-    0x57, 0x79, 0xBE, 0x52, 0xFB, 0xBC, 0x68, 0x43, 0x45, 0xF4, 0xDF, 0xA5, 0xA8, 0xFD, 0x55, 0xA6
-};
-#endif
-
-
-#ifndef XMRIG_NO_CN_PICO
-// "cn-pico/trtl"
-const static uint8_t test_output_pico_trtl[160] = {
-    0x08, 0xF4, 0x21, 0xD7, 0x83, 0x31, 0x17, 0x30, 0x0E, 0xDA, 0x66, 0xE9, 0x8F, 0x4A, 0x25, 0x69,
-    0x09, 0x3D, 0xF3, 0x00, 0x50, 0x01, 0x73, 0x94, 0x4E, 0xFC, 0x40, 0x1E, 0x9A, 0x4A, 0x17, 0xAF,
-    0xB2, 0x17, 0x2E, 0xC9, 0x46, 0x6E, 0x1A, 0xEE, 0x70, 0xEC, 0x85, 0x72, 0xA1, 0x4C, 0x23, 0x3E,
-    0xE3, 0x54, 0x58, 0x2B, 0xCB, 0x93, 0xF8, 0x69, 0xD4, 0x29, 0x74, 0x4D, 0xE5, 0x72, 0x6A, 0x26,
-    0x4E, 0xFD, 0x28, 0xFC, 0xD3, 0x74, 0x8A, 0x83, 0xF3, 0xCA, 0x92, 0x84, 0xE7, 0x4E, 0x10, 0xC2,
-    0x05, 0x62, 0xC7, 0xBE, 0x99, 0x73, 0xED, 0x90, 0xB5, 0x6F, 0xDA, 0x64, 0x71, 0x2D, 0x99, 0x39,
-    0x29, 0xDB, 0x22, 0x2B, 0x97, 0xB6, 0x37, 0x0E, 0x9A, 0x03, 0x65, 0xCC, 0xF7, 0xD0, 0x9A, 0xB7,
-    0x68, 0xCE, 0x07, 0x3E, 0x15, 0x40, 0x3C, 0xCE, 0x8C, 0x63, 0x16, 0x72, 0xB5, 0x74, 0x84, 0xF4,
-    0xA1, 0xE7, 0x53, 0x85, 0xFB, 0x72, 0xDD, 0x75, 0x90, 0x39, 0xB2, 0x3D, 0xC3, 0x08, 0x2C, 0xD5,
-    0x01, 0x08, 0x27, 0x75, 0x86, 0xB9, 0xBB, 0x9B, 0xDF, 0xEA, 0x49, 0xDE, 0x46, 0xCB, 0x83, 0x45
-};
-#endif
-
-
-#ifndef XMRIG_NO_CN_GPU
-// "cn/gpu"
-const static uint8_t test_output_gpu[160] = {
-    0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7,
-    0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
-#endif
-
-
-#endif /* XMRIG_CRYPTONIGHT_TEST_H */
diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h
deleted file mode 100644
index 202b662a..00000000
--- a/src/crypto/CryptoNight_x86.h
+++ /dev/null
@@ -1,1481 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_CRYPTONIGHT_X86_H
-#define XMRIG_CRYPTONIGHT_X86_H
-
-
-#ifdef __GNUC__
-#   include <x86intrin.h>
-#else
-#   include <intrin.h>
-#   define __restrict__ __restrict
-#endif
-
-
-#include "common/cpu/Cpu.h"
-#include "common/crypto/keccak.h"
-#include "crypto/CryptoNight.h"
-#include "crypto/CryptoNight_constants.h"
-#include "crypto/CryptoNight_monero.h"
-#include "crypto/soft_aes.h"
-
-
-extern "C"
-{
-#include "crypto/c_groestl.h"
-#include "crypto/c_blake256.h"
-#include "crypto/c_jh.h"
-#include "crypto/c_skein.h"
-}
-
-
-static inline void do_blake_hash(const uint8_t *input, size_t len, uint8_t *output) {
-    blake256_hash(output, input, len);
-}
-
-
-static inline void do_groestl_hash(const uint8_t *input, size_t len, uint8_t *output) {
-    groestl(input, len * 8, output);
-}
-
-
-static inline void do_jh_hash(const uint8_t *input, size_t len, uint8_t *output) {
-    jh_hash(32 * 8, input, 8 * len, output);
-}
-
-
-static inline void do_skein_hash(const uint8_t *input, size_t len, uint8_t *output) {
-    xmr_skein(input, output);
-}
-
-
-void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
-
-
-#if defined(__x86_64__) || defined(_M_AMD64)
-#   ifdef __GNUC__
-static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
-{
-    unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
-    *hi = r >> 64;
-    return (uint64_t) r;
-}
-#   else
-    #define __umul128 _umul128
-#   endif
-#elif defined(__i386__) || defined(_M_IX86)
-static inline int64_t _mm_cvtsi128_si64(__m128i a)
-{
-    return ((uint64_t)(uint32_t)_mm_cvtsi128_si32(a) | ((uint64_t)(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4)) << 32));
-}
-
-static inline __m128i _mm_cvtsi64_si128(int64_t a) {
-    return _mm_set_epi64x(0, a);
-}
-
-static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
-    // multiplier   = ab = a * 2^32 + b
-    // multiplicand = cd = c * 2^32 + d
-    // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
-    uint64_t a = multiplier >> 32;
-    uint64_t b = multiplier & 0xFFFFFFFF;
-    uint64_t c = multiplicand >> 32;
-    uint64_t d = multiplicand & 0xFFFFFFFF;
-
-    //uint64_t ac = a * c;
-    uint64_t ad = a * d;
-    //uint64_t bc = b * c;
-    uint64_t bd = b * d;
-
-    uint64_t adbc = ad + (b * c);
-    uint64_t adbc_carry = adbc < ad ? 1 : 0;
-
-    // multiplier * multiplicand = product_hi * 2^64 + product_lo
-    uint64_t product_lo = bd + (adbc << 32);
-    uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
-    *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
-
-    return product_lo;
-}
-#endif
-
-
-// This will shift and xor tmp1 into itself as 4 32-bit vals such as
-// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
-static inline __m128i sl_xor(__m128i tmp1)
-{
-    __m128i tmp4;
-    tmp4 = _mm_slli_si128(tmp1, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    tmp4 = _mm_slli_si128(tmp4, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    tmp4 = _mm_slli_si128(tmp4, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    return tmp1;
-}
-
-
-template<uint8_t rcon>
-static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
-{
-    __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
-    xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
-    *xout0 = sl_xor(*xout0);
-    *xout0 = _mm_xor_si128(*xout0, xout1);
-    xout1  = _mm_aeskeygenassist_si128(*xout0, 0x00);
-    xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
-    *xout2 = sl_xor(*xout2);
-    *xout2 = _mm_xor_si128(*xout2, xout1);
-}
-
-
-template<uint8_t rcon>
-static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2)
-{
-    __m128i xout1 = soft_aeskeygenassist<rcon>(*xout2);
-    xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
-    *xout0 = sl_xor(*xout0);
-    *xout0 = _mm_xor_si128(*xout0, xout1);
-    xout1  = soft_aeskeygenassist<0x00>(*xout0);
-    xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
-    *xout2 = sl_xor(*xout2);
-    *xout2 = _mm_xor_si128(*xout2, xout1);
-}
-
-
-template<bool SOFT_AES>
-static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
-{
-    __m128i xout0 = _mm_load_si128(memory);
-    __m128i xout2 = _mm_load_si128(memory + 1);
-    *k0 = xout0;
-    *k1 = xout2;
-
-    SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : aes_genkey_sub<0x01>(&xout0, &xout2);
-    *k2 = xout0;
-    *k3 = xout2;
-
-    SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : aes_genkey_sub<0x02>(&xout0, &xout2);
-    *k4 = xout0;
-    *k5 = xout2;
-
-    SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : aes_genkey_sub<0x04>(&xout0, &xout2);
-    *k6 = xout0;
-    *k7 = xout2;
-
-    SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : aes_genkey_sub<0x08>(&xout0, &xout2);
-    *k8 = xout0;
-    *k9 = xout2;
-}
-
-
-static FORCEINLINE void soft_aesenc(void* __restrict ptr, const void* __restrict key, const uint32_t* __restrict t)
-{
-    uint32_t x0 = ((const uint32_t*)(ptr))[0];
-    uint32_t x1 = ((const uint32_t*)(ptr))[1];
-    uint32_t x2 = ((const uint32_t*)(ptr))[2];
-    uint32_t x3 = ((const uint32_t*)(ptr))[3];
-
-    uint32_t y0 = t[x0 & 0xff]; x0 >>= 8;
-    uint32_t y1 = t[x1 & 0xff]; x1 >>= 8;
-    uint32_t y2 = t[x2 & 0xff]; x2 >>= 8;
-    uint32_t y3 = t[x3 & 0xff]; x3 >>= 8;
-    t += 256;
-
-    y0 ^= t[x1 & 0xff]; x1 >>= 8;
-    y1 ^= t[x2 & 0xff]; x2 >>= 8;
-    y2 ^= t[x3 & 0xff]; x3 >>= 8;
-    y3 ^= t[x0 & 0xff]; x0 >>= 8;
-    t += 256;
-
-    y0 ^= t[x2 & 0xff]; x2 >>= 8;
-    y1 ^= t[x3 & 0xff]; x3 >>= 8;
-    y2 ^= t[x0 & 0xff]; x0 >>= 8;
-    y3 ^= t[x1 & 0xff]; x1 >>= 8;
-    t += 256;
-
-    y0 ^= t[x3];
-    y1 ^= t[x0];
-    y2 ^= t[x1];
-    y3 ^= t[x2];
-
-    ((uint32_t*)ptr)[0] = y0 ^ ((uint32_t*)key)[0];
-    ((uint32_t*)ptr)[1] = y1 ^ ((uint32_t*)key)[1];
-    ((uint32_t*)ptr)[2] = y2 ^ ((uint32_t*)key)[2];
-    ((uint32_t*)ptr)[3] = y3 ^ ((uint32_t*)key)[3];
-}
-
-static FORCEINLINE __m128i soft_aesenc(const void* __restrict ptr, const __m128i key, const uint32_t* __restrict t)
-{
-    uint32_t x0 = ((const uint32_t*)(ptr))[0];
-    uint32_t x1 = ((const uint32_t*)(ptr))[1];
-    uint32_t x2 = ((const uint32_t*)(ptr))[2];
-    uint32_t x3 = ((const uint32_t*)(ptr))[3];
-
-    uint32_t y0 = t[x0 & 0xff]; x0 >>= 8;
-    uint32_t y1 = t[x1 & 0xff]; x1 >>= 8;
-    uint32_t y2 = t[x2 & 0xff]; x2 >>= 8;
-    uint32_t y3 = t[x3 & 0xff]; x3 >>= 8;
-    t += 256;
-
-    y0 ^= t[x1 & 0xff]; x1 >>= 8;
-    y1 ^= t[x2 & 0xff]; x2 >>= 8;
-    y2 ^= t[x3 & 0xff]; x3 >>= 8;
-    y3 ^= t[x0 & 0xff]; x0 >>= 8;
-    t += 256;
-
-    y0 ^= t[x2 & 0xff]; x2 >>= 8;
-    y1 ^= t[x3 & 0xff]; x3 >>= 8;
-    y2 ^= t[x0 & 0xff]; x0 >>= 8;
-    y3 ^= t[x1 & 0xff]; x1 >>= 8;
-
-    y0 ^= t[x3 + 256];
-    y1 ^= t[x0 + 256];
-    y2 ^= t[x1 + 256];
-    y3 ^= t[x2 + 256];
-
-    return _mm_xor_si128(_mm_set_epi32(y3, y2, y1, y0), key);
-}
-
-template<bool SOFT_AES>
-void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7);
-
-template<>
-NOINLINE void aes_round<true>(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
-{
-    *x0 = soft_aesenc((uint32_t*)x0, key, (const uint32_t*)saes_table);
-    *x1 = soft_aesenc((uint32_t*)x1, key, (const uint32_t*)saes_table);
-    *x2 = soft_aesenc((uint32_t*)x2, key, (const uint32_t*)saes_table);
-    *x3 = soft_aesenc((uint32_t*)x3, key, (const uint32_t*)saes_table);
-    *x4 = soft_aesenc((uint32_t*)x4, key, (const uint32_t*)saes_table);
-    *x5 = soft_aesenc((uint32_t*)x5, key, (const uint32_t*)saes_table);
-    *x6 = soft_aesenc((uint32_t*)x6, key, (const uint32_t*)saes_table);
-    *x7 = soft_aesenc((uint32_t*)x7, key, (const uint32_t*)saes_table);
-}
-
-template<>
-FORCEINLINE void aes_round<false>(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
-{
-    *x0 = _mm_aesenc_si128(*x0, key);
-    *x1 = _mm_aesenc_si128(*x1, key);
-    *x2 = _mm_aesenc_si128(*x2, key);
-    *x3 = _mm_aesenc_si128(*x3, key);
-    *x4 = _mm_aesenc_si128(*x4, key);
-    *x5 = _mm_aesenc_si128(*x5, key);
-    *x6 = _mm_aesenc_si128(*x6, key);
-    *x7 = _mm_aesenc_si128(*x7, key);
-}
-
-inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7)
-{
-    __m128i tmp0 = x0;
-    x0 = _mm_xor_si128(x0, x1);
-    x1 = _mm_xor_si128(x1, x2);
-    x2 = _mm_xor_si128(x2, x3);
-    x3 = _mm_xor_si128(x3, x4);
-    x4 = _mm_xor_si128(x4, x5);
-    x5 = _mm_xor_si128(x5, x6);
-    x6 = _mm_xor_si128(x6, x7);
-    x7 = _mm_xor_si128(x7, tmp0);
-}
-
-
-template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES>
-static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
-{
-    __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
-    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-
-    aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
-
-    xin0 = _mm_load_si128(input + 4);
-    xin1 = _mm_load_si128(input + 5);
-    xin2 = _mm_load_si128(input + 6);
-    xin3 = _mm_load_si128(input + 7);
-    xin4 = _mm_load_si128(input + 8);
-    xin5 = _mm_load_si128(input + 9);
-    xin6 = _mm_load_si128(input + 10);
-    xin7 = _mm_load_si128(input + 11);
-
-    if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-        for (size_t i = 0; i < 16; i++) {
-            aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-            aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-
-            mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
-        }
-    }
-
-    for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
-        aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-
-        _mm_store_si128(output + i + 0, xin0);
-        _mm_store_si128(output + i + 1, xin1);
-        _mm_store_si128(output + i + 2, xin2);
-        _mm_store_si128(output + i + 3, xin3);
-        _mm_store_si128(output + i + 4, xin4);
-        _mm_store_si128(output + i + 5, xin5);
-        _mm_store_si128(output + i + 6, xin6);
-        _mm_store_si128(output + i + 7, xin7);
-    }
-}
-
-
-#ifndef XMRIG_NO_CN_GPU
-template<xmrig::Algo ALGO, size_t MEM>
-void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output)
-{
-    constexpr size_t hash_size = 200; // 25x8 bytes
-    alignas(16) uint64_t hash[25];
-
-    for (uint64_t i = 0; i < MEM / 512; i++)
-    {
-        memcpy(hash, input, hash_size);
-        hash[0] ^= i;
-
-        xmrig::keccakf(hash, 24);
-        memcpy(output, hash, 160);
-        output += 160;
-
-        xmrig::keccakf(hash, 24);
-        memcpy(output, hash, 176);
-        output += 176;
-
-        xmrig::keccakf(hash, 24);
-        memcpy(output, hash, 176);
-        output += 176;
-    }
-}
-#endif
-
-
-template<xmrig::Algo ALGO, size_t MEM, bool SOFT_AES>
-static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
-{
-    __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
-    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
-
-    aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
-
-    xout0 = _mm_load_si128(output + 4);
-    xout1 = _mm_load_si128(output + 5);
-    xout2 = _mm_load_si128(output + 6);
-    xout3 = _mm_load_si128(output + 7);
-    xout4 = _mm_load_si128(output + 8);
-    xout5 = _mm_load_si128(output + 9);
-    xout6 = _mm_load_si128(output + 10);
-    xout7 = _mm_load_si128(output + 11);
-
-    for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
-    {
-        xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
-        xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
-        xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
-        xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
-        xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
-        xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
-        xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
-        xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
-
-        aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-
-        if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-            mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
-        }
-    }
-
-    if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-        for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
-            xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
-            xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
-            xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
-            xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
-            xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
-            xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
-            xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
-            xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
-
-            aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-
-            mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
-        }
-
-        for (size_t i = 0; i < 16; i++) {
-            aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-            aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-
-            mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
-        }
-    }
-
-    _mm_store_si128(output + 4, xout0);
-    _mm_store_si128(output + 5, xout1);
-    _mm_store_si128(output + 6, xout2);
-    _mm_store_si128(output + 7, xout3);
-    _mm_store_si128(output + 8, xout4);
-    _mm_store_si128(output + 9, xout5);
-    _mm_store_si128(output + 10, xout6);
-    _mm_store_si128(output + 11, xout7);
-}
-
-
-static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
-{
-    alignas(16) uint32_t k[4];
-    alignas(16) uint32_t x[4];
-
-    _mm_store_si128((__m128i*) k, key);
-    _mm_store_si128((__m128i*) x, _mm_xor_si128(in, _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffffffff)));
-
-    #define BYTE(p, i) ((unsigned char*)&x[p])[i]
-    k[0] ^= saes_table[0][BYTE(0, 0)] ^ saes_table[1][BYTE(1, 1)] ^ saes_table[2][BYTE(2, 2)] ^ saes_table[3][BYTE(3, 3)];
-    x[0] ^= k[0];
-    k[1] ^= saes_table[0][BYTE(1, 0)] ^ saes_table[1][BYTE(2, 1)] ^ saes_table[2][BYTE(3, 2)] ^ saes_table[3][BYTE(0, 3)];
-    x[1] ^= k[1];
-    k[2] ^= saes_table[0][BYTE(2, 0)] ^ saes_table[1][BYTE(3, 1)] ^ saes_table[2][BYTE(0, 2)] ^ saes_table[3][BYTE(1, 3)];
-    x[2] ^= k[2];
-    k[3] ^= saes_table[0][BYTE(3, 0)] ^ saes_table[1][BYTE(0, 1)] ^ saes_table[2][BYTE(1, 2)] ^ saes_table[3][BYTE(2, 3)];
-    #undef BYTE
-
-    return _mm_load_si128((__m128i*)k);
-}
-
-
-static inline __m128i int_sqrt_v2(const uint64_t n0)
-{
-    __m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(n0 >> 12), _mm_set_epi64x(0, 1023ULL << 52)));
-    x = _mm_sqrt_sd(_mm_setzero_pd(), x);
-    uint64_t r = static_cast<uint64_t>(_mm_cvtsi128_si64(_mm_castpd_si128(x)));
-
-    const uint64_t s = r >> 20;
-    r >>= 19;
-
-    uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1);
-#   if (defined(_MSC_VER) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 1)) && (defined(__x86_64__) || defined(_M_AMD64))
-    _addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r);
-#   else
-    if (x2 < n0) ++r;
-#   endif
-
-    return _mm_cvtsi64_si128(r);
-}
-
-
-template<xmrig::Variant VARIANT, xmrig::Variant BASE>
-static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx)
-{
-    if (BASE == xmrig::VARIANT_2) {
-        VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
-        _mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx));
-    } else {
-        __m128i tmp = _mm_xor_si128(bx0, cx);
-        mem_out[0] = _mm_cvtsi128_si64(tmp);
-
-        tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
-        uint64_t vh = _mm_cvtsi128_si64(tmp);
-
-        uint8_t x = static_cast<uint8_t>(vh >> 24);
-        static const uint16_t table = 0x7531;
-        const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1;
-        vh ^= ((table >> index) & 0x3) << 28;
-
-        mem_out[1] = vh;
-    }
-}
-
-void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
-void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MASK         = xmrig::cn_select_mask<ALGO>();
-    constexpr size_t ITERATIONS   = xmrig::cn_select_iter<ALGO, VARIANT>();
-    constexpr size_t MEM          = xmrig::cn_select_memory<ALGO>();
-    constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
-
-    static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant");
-
-    if (BASE == xmrig::VARIANT_1 && size < 43) {
-        memset(output, 0, 32);
-        return;
-    }
-
-    xmrig::keccak(input, size, ctx[0]->state);
-
-    cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
-
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
-
-#ifndef XMRIG_NO_ASM
-    if (SOFT_AES && xmrig::cn_is_cryptonight_r<VARIANT>())
-    {
-        if (!ctx[0]->generated_code_data.match(VARIANT, height)) {
-            V4_Instruction code[256];
-            const int code_size = v4_random_math_init<VARIANT>(code, height);
-
-            if (VARIANT == xmrig::VARIANT_WOW)
-                wow_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), xmrig::ASM_NONE);
-            else if (VARIANT == xmrig::VARIANT_4)
-                v4_soft_aes_compile_code(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), xmrig::ASM_NONE);
-
-            ctx[0]->generated_code_data.variant = VARIANT;
-            ctx[0]->generated_code_data.height = height;
-        }
-
-        ctx[0]->saes_table = (const uint32_t*)saes_table;
-        ctx[0]->generated_code(ctx);
-    } else {
-#endif
-
-    const uint8_t* l0 = ctx[0]->memory;
-
-    VARIANT1_INIT(0);
-    VARIANT2_INIT(0);
-    VARIANT2_SET_ROUNDING_MODE();
-    VARIANT4_RANDOM_MATH_INIT(0);
-
-    uint64_t al0 = h0[0] ^ h0[4];
-    uint64_t ah0 = h0[1] ^ h0[5];
-    __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-    __m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
-
-    uint64_t idx0 = al0;
-
-    for (size_t i = 0; i < ITERATIONS; i++) {
-        __m128i cx;
-        if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) {
-            cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
-        }
-
-        const __m128i ax0 = _mm_set_epi64x(ah0, al0);
-        if (VARIANT == xmrig::VARIANT_TUBE) {
-            cx = aes_round_tweak_div(cx, ax0);
-        }
-        else if (SOFT_AES) {
-            cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table);
-        }
-        else {
-            cx = _mm_aesenc_si128(cx, ax0);
-        }
-
-        if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) {
-            cryptonight_monero_tweak<VARIANT, BASE>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx0, bx1, cx);
-        } else {
-            _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
-        }
-
-        idx0 = _mm_cvtsi128_si64(cx);
-
-        uint64_t hi, lo, cl, ch;
-        cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-        ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-
-        if (BASE == xmrig::VARIANT_2) {
-            if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) {
-                VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
-                if (VARIANT == xmrig::VARIANT_4) {
-                    al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
-                    ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
-                }
-            } else {
-                VARIANT2_INTEGER_MATH(0, cl, cx);
-            }
-        }
-
-        lo = __umul128(idx0, cl, &hi);
-
-        if (BASE == xmrig::VARIANT_2) {
-            if (VARIANT == xmrig::VARIANT_4) {
-                VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0);
-            } else {
-                VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
-            }
-        }
-
-        al0 += hi;
-        ah0 += lo;
-
-        ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
-
-        if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
-            ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
-        } else if (BASE == xmrig::VARIANT_1) {
-            ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
-        } else {
-            ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
-        }
-
-        al0 ^= cl;
-        ah0 ^= ch;
-        idx0 = al0;
-
-        if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-            int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
-            int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
-            int64_t q = n / (d | 0x5);
-
-            ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
-
-            if (VARIANT == xmrig::VARIANT_XHV) {
-                d = ~d;
-            }
-
-            idx0 = d ^ q;
-        }
-
-        if (BASE == xmrig::VARIANT_2) {
-            bx1 = bx0;
-        }
-
-        bx0 = cx;
-    }
-
-#ifndef XMRIG_NO_ASM
-    }
-#endif
-        
-    cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
-
-    xmrig::keccakf(h0, 24);
-    extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
-}
-
-
-#ifndef XMRIG_NO_CN_GPU
-template<size_t ITER, uint32_t MASK>
-void cn_gpu_inner_avx(const uint8_t *spad, uint8_t *lpad);
-
-
-template<size_t ITER, uint32_t MASK>
-void cn_gpu_inner_ssse3(const uint8_t *spad, uint8_t *lpad);
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MASK         = xmrig::CRYPTONIGHT_GPU_MASK;
-    constexpr size_t ITERATIONS   = xmrig::cn_select_iter<ALGO, VARIANT>();
-    constexpr size_t MEM          = xmrig::cn_select_memory<ALGO>();
-
-    static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant");
-
-    xmrig::keccak(input, size, ctx[0]->state);
-    cn_explode_scratchpad_gpu<ALGO, MEM>(ctx[0]->state, ctx[0]->memory);
-
-#   ifdef _MSC_VER
-    _control87(RC_NEAR, MCW_RC);
-#   else
-    fesetround(FE_TONEAREST);
-#   endif
-
-    if (xmrig::Cpu::info()->hasAVX2()) {
-        cn_gpu_inner_avx<ITERATIONS, MASK>(ctx[0]->state, ctx[0]->memory);
-    } else {
-        cn_gpu_inner_ssse3<ITERATIONS, MASK>(ctx[0]->state, ctx[0]->memory);
-    }
-
-    cn_implode_scratchpad<xmrig::CRYPTONIGHT_HEAVY, MEM, SOFT_AES>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
-
-    xmrig::keccakf((uint64_t*) ctx[0]->state, 24);
-    memcpy(output, ctx[0]->state, 32);
-}
-#endif
-
-
-#ifndef XMRIG_NO_ASM
-extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
-extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
-extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
-extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
-extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
-extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
-
-extern xmrig::CpuThread::cn_mainloop_fun        cn_half_mainloop_ivybridge_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_half_mainloop_ryzen_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_half_mainloop_bulldozer_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_half_double_mainloop_sandybridge_asm;
-
-extern xmrig::CpuThread::cn_mainloop_fun        cn_trtl_mainloop_ivybridge_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_trtl_mainloop_ryzen_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_trtl_mainloop_bulldozer_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_trtl_double_mainloop_sandybridge_asm;
-
-extern xmrig::CpuThread::cn_mainloop_fun        cn_zls_mainloop_ivybridge_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_zls_mainloop_ryzen_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_zls_mainloop_bulldozer_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_zls_double_mainloop_sandybridge_asm;
-
-extern xmrig::CpuThread::cn_mainloop_fun        cn_double_mainloop_ivybridge_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_double_mainloop_ryzen_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_double_mainloop_bulldozer_asm;
-extern xmrig::CpuThread::cn_mainloop_fun        cn_double_double_mainloop_sandybridge_asm;
-
-void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
-void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
-void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
-void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM);
-
-template<xmrig::Variant VARIANT>
-void cn_r_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    v4_compile_code(code, code_size, machine_code, ASM);
-}
-
-template<xmrig::Variant VARIANT>
-void cn_r_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    v4_compile_code_double(code, code_size, machine_code, ASM);
-}
-
-template<>
-void cn_r_compile_code<xmrig::VARIANT_WOW>(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    wow_compile_code(code, code_size, machine_code, ASM);
-}
-
-template<>
-void cn_r_compile_code_double<xmrig::VARIANT_WOW>(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    wow_compile_code_double(code, code_size, machine_code, ASM);
-}
-
-template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
-inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
-
-    if (xmrig::cn_is_cryptonight_r<VARIANT>() && !ctx[0]->generated_code_data.match(VARIANT, height)) {
-        V4_Instruction code[256];
-        const int code_size = v4_random_math_init<VARIANT>(code, height);
-        cn_r_compile_code<VARIANT>(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code), ASM);
-        ctx[0]->generated_code_data.variant = VARIANT;
-        ctx[0]->generated_code_data.height = height;
-    }
-
-    xmrig::keccak(input, size, ctx[0]->state);
-    cn_explode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
-
-    if (VARIANT == xmrig::VARIANT_2) {
-        if (ASM == xmrig::ASM_INTEL) {
-            cnv2_mainloop_ivybridge_asm(ctx);
-        }
-        else if (ASM == xmrig::ASM_RYZEN) {
-            cnv2_mainloop_ryzen_asm(ctx);
-        }
-        else {
-            cnv2_mainloop_bulldozer_asm(ctx);
-        }
-    }
-    else if (VARIANT == xmrig::VARIANT_HALF) {
-        if (ASM == xmrig::ASM_INTEL) {
-            cn_half_mainloop_ivybridge_asm(ctx);
-        }
-        else if (ASM == xmrig::ASM_RYZEN) {
-            cn_half_mainloop_ryzen_asm(ctx);
-        }
-        else {
-            cn_half_mainloop_bulldozer_asm(ctx);
-        }
-    }
-    else if (VARIANT == xmrig::VARIANT_TRTL) {
-        if (ASM == xmrig::ASM_INTEL) {
-            cn_trtl_mainloop_ivybridge_asm(ctx);
-        }
-        else if (ASM == xmrig::ASM_RYZEN) {
-            cn_trtl_mainloop_ryzen_asm(ctx);
-        }
-        else {
-            cn_trtl_mainloop_bulldozer_asm(ctx);
-        }
-    }
-    else if (VARIANT == xmrig::VARIANT_RWZ) {
-        cnv2_rwz_mainloop_asm(ctx);
-    }
-    else if (VARIANT == xmrig::VARIANT_ZLS) {
-        if (ASM == xmrig::ASM_INTEL) {
-            cn_zls_mainloop_ivybridge_asm(ctx);
-        }
-        else if (ASM == xmrig::ASM_RYZEN) {
-            cn_zls_mainloop_ryzen_asm(ctx);
-        }
-        else {
-            cn_zls_mainloop_bulldozer_asm(ctx);
-        }
-    }
-    else if (VARIANT == xmrig::VARIANT_DOUBLE) {
-        if (ASM == xmrig::ASM_INTEL) {
-            cn_double_mainloop_ivybridge_asm(ctx);
-        }
-        else if (ASM == xmrig::ASM_RYZEN) {
-            cn_double_mainloop_ryzen_asm(ctx);
-        }
-        else {
-            cn_double_mainloop_bulldozer_asm(ctx);
-        }
-    }
-    else if (xmrig::cn_is_cryptonight_r<VARIANT>()) {
-        ctx[0]->generated_code(ctx);
-    }
-
-    cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
-    xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
-    extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
-}
-
-
-template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
-inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
-
-    if (xmrig::cn_is_cryptonight_r<VARIANT>() && !ctx[0]->generated_code_double_data.match(VARIANT, height)) {
-        V4_Instruction code[256];
-        const int code_size = v4_random_math_init<VARIANT>(code, height);
-        cn_r_compile_code_double<VARIANT>(code, code_size, reinterpret_cast<void*>(ctx[0]->generated_code_double), ASM);
-        ctx[0]->generated_code_double_data.variant = VARIANT;
-        ctx[0]->generated_code_double_data.height = height;
-    }
-
-    xmrig::keccak(input,        size, ctx[0]->state);
-    xmrig::keccak(input + size, size, ctx[1]->state);
-
-    cn_explode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
-    cn_explode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
-
-    if (VARIANT == xmrig::VARIANT_2) {
-        cnv2_double_mainloop_sandybridge_asm(ctx);
-    }
-    else if (VARIANT == xmrig::VARIANT_HALF) {
-        cn_half_double_mainloop_sandybridge_asm(ctx);
-    }
-    else if (VARIANT == xmrig::VARIANT_TRTL) {
-        cn_trtl_double_mainloop_sandybridge_asm(ctx);
-    }
-    else if (VARIANT == xmrig::VARIANT_RWZ) {
-        cnv2_rwz_double_mainloop_asm(ctx);
-    }
-    else if (VARIANT == xmrig::VARIANT_ZLS) {
-        cn_zls_double_mainloop_sandybridge_asm(ctx);
-    }
-    else if (VARIANT == xmrig::VARIANT_DOUBLE) {
-        cn_double_double_mainloop_sandybridge_asm(ctx);
-    }
-    else if (xmrig::cn_is_cryptonight_r<VARIANT>()) {
-        ctx[0]->generated_code_double(ctx);
-    }
-
-    cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
-    cn_implode_scratchpad<ALGO, MEM, false>(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
-
-    xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
-    xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
-
-    extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
-    extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
-}
-#endif
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MASK         = xmrig::cn_select_mask<ALGO>();
-    constexpr size_t ITERATIONS   = xmrig::cn_select_iter<ALGO, VARIANT>();
-    constexpr size_t MEM          = xmrig::cn_select_memory<ALGO>();
-    constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
-
-    if (BASE == xmrig::VARIANT_1 && size < 43) {
-        memset(output, 0, 64);
-        return;
-    }
-
-    xmrig::keccak(input,        size, ctx[0]->state);
-    xmrig::keccak(input + size, size, ctx[1]->state);
-
-    const uint8_t* l0 = ctx[0]->memory;
-    const uint8_t* l1 = ctx[1]->memory;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
-    uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
-
-    VARIANT1_INIT(0);
-    VARIANT1_INIT(1);
-    VARIANT2_INIT(0);
-    VARIANT2_INIT(1);
-    VARIANT2_SET_ROUNDING_MODE();
-    VARIANT4_RANDOM_MATH_INIT(0);
-    VARIANT4_RANDOM_MATH_INIT(1);
-
-    cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
-    cn_explode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
-
-    uint64_t al0 = h0[0] ^ h0[4];
-    uint64_t al1 = h1[0] ^ h1[4];
-    uint64_t ah0 = h0[1] ^ h0[5];
-    uint64_t ah1 = h1[1] ^ h1[5];
-
-    __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-    __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
-    __m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-    __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
-
-    uint64_t idx0 = al0;
-    uint64_t idx1 = al1;
-
-    for (size_t i = 0; i < ITERATIONS; i++) {
-        __m128i cx0, cx1;
-        if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) {
-            cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
-            cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
-        }
-
-        const __m128i ax0 = _mm_set_epi64x(ah0, al0);
-        const __m128i ax1 = _mm_set_epi64x(ah1, al1);
-        if (VARIANT == xmrig::VARIANT_TUBE) {
-            cx0 = aes_round_tweak_div(cx0, ax0);
-            cx1 = aes_round_tweak_div(cx1, ax1);
-        }
-        else if (SOFT_AES) {
-            cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table);
-            cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1, (const uint32_t*)saes_table);
-        }
-        else {
-            cx0 = _mm_aesenc_si128(cx0, ax0);
-            cx1 = _mm_aesenc_si128(cx1, ax1);
-        }
-
-        if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) {
-            cryptonight_monero_tweak<VARIANT, BASE>((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0);
-            cryptonight_monero_tweak<VARIANT, BASE>((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1);
-        } else {
-            _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
-            _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1));
-        }
-
-        idx0 = _mm_cvtsi128_si64(cx0);
-        idx1 = _mm_cvtsi128_si64(cx1);
-
-        uint64_t hi, lo, cl, ch;
-        cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-        ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-
-        if (BASE == xmrig::VARIANT_2) {
-            if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) {
-                VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
-                if (VARIANT == xmrig::VARIANT_4) {
-                    al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
-                    ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
-                }
-            } else {
-                VARIANT2_INTEGER_MATH(0, cl, cx0);
-            }
-        }
-
-        lo = __umul128(idx0, cl, &hi);
-
-        if (BASE == xmrig::VARIANT_2) {
-            if (VARIANT == xmrig::VARIANT_4) {
-                VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0);
-            } else {
-                VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
-            }
-        }
-
-        al0 += hi;
-        ah0 += lo;
-
-        ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
-
-        if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
-            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
-        } else if (BASE == xmrig::VARIANT_1) {
-            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
-        } else {
-            ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
-        }
-
-        al0 ^= cl;
-        ah0 ^= ch;
-        idx0 = al0;
-
-        if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-            int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
-            int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
-            int64_t q = n / (d | 0x5);
-
-            ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
-
-            if (VARIANT == xmrig::VARIANT_XHV) {
-                d = ~d;
-            }
-
-            idx0 = d ^ q;
-        }
-
-        cl = ((uint64_t*) &l1[idx1 & MASK])[0];
-        ch = ((uint64_t*) &l1[idx1 & MASK])[1];
-
-        if (BASE == xmrig::VARIANT_2) {
-            if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) {
-                VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
-                if (VARIANT == xmrig::VARIANT_4) {
-                    al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
-                    ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
-                }
-            } else {
-                VARIANT2_INTEGER_MATH(1, cl, cx1);
-            }
-        }
-
-        lo = __umul128(idx1, cl, &hi);
-
-        if (BASE == xmrig::VARIANT_2) {
-            if (VARIANT == xmrig::VARIANT_4) {
-                VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0);
-            } else {
-                VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0));
-            }
-        }
-
-        al1 += hi;
-        ah1 += lo;
-
-        ((uint64_t*)&l1[idx1 & MASK])[0] = al1;
-
-        if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) {
-            ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1;
-        } else if (BASE == xmrig::VARIANT_1) {
-            ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
-        } else {
-            ((uint64_t*)&l1[idx1 & MASK])[1] = ah1;
-        }
-
-        al1 ^= cl;
-        ah1 ^= ch;
-        idx1 = al1;
-
-        if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {
-            int64_t n = ((int64_t*)&l1[idx1 & MASK])[0];
-            int32_t d = ((int32_t*)&l1[idx1 & MASK])[2];
-            int64_t q = n / (d | 0x5);
-
-            ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q;
-
-            if (VARIANT == xmrig::VARIANT_XHV) {
-                d = ~d;
-            }
-
-            idx1 = d ^ q;
-        }
-
-        if (BASE == xmrig::VARIANT_2) {
-            bx01 = bx00;
-            bx11 = bx10;
-        }
-
-        bx00 = cx0;
-        bx10 = cx1;
-    }
-
-    cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
-    cn_implode_scratchpad<ALGO, MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
-
-    xmrig::keccakf(h0, 24);
-    xmrig::keccakf(h1, 24);
-
-    extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
-    extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
-}
-
-
-#define CN_STEP1(a, b0, b1, c, l, ptr, idx)           \
-    ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
-    c = _mm_load_si128(ptr);
-
-
-#define CN_STEP2(a, b0, b1, c, l, ptr, idx)                            \
-    if (VARIANT == xmrig::VARIANT_TUBE) {                              \
-        c = aes_round_tweak_div(c, a);                                 \
-    }                                                                  \
-    else if (SOFT_AES) {                                               \
-        c = soft_aesenc(&c, a, (const uint32_t*)saes_table);           \
-    } else {                                                           \
-        c = _mm_aesenc_si128(c, a);                                    \
-    }                                                                  \
-                                                                       \
-    if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) {                               \
-        cryptonight_monero_tweak<VARIANT, BASE>((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \
-    } else {                                                           \
-        _mm_store_si128(ptr, _mm_xor_si128(b0, c));                    \
-    }
-
-
-#define CN_STEP3(part, a, b0, b1, c, l, ptr, idx)     \
-    idx = _mm_cvtsi128_si64(c);                       \
-    ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
-    uint64_t cl##part = ((uint64_t*)ptr)[0];          \
-    uint64_t ch##part = ((uint64_t*)ptr)[1];
-
-
-#define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx)   \
-    uint64_t al##part, ah##part;                        \
-    if (BASE == xmrig::VARIANT_2) {                     \
-        if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { \
-            al##part = _mm_cvtsi128_si64(a);            \
-            ah##part = _mm_cvtsi128_si64(_mm_srli_si128(a, 8)); \
-            VARIANT4_RANDOM_MATH(part, al##part, ah##part, cl##part, b0, b1); \
-            if (VARIANT == xmrig::VARIANT_4) { \
-                al##part ^= r##part[2] | ((uint64_t)(r##part[3]) << 32); \
-                ah##part ^= r##part[0] | ((uint64_t)(r##part[1]) << 32); \
-            }                                           \
-        } else {                                        \
-            VARIANT2_INTEGER_MATH(part, cl##part, c);   \
-        }                                               \
-    }                                                   \
-    lo = __umul128(idx, cl##part, &hi);                 \
-    if (BASE == xmrig::VARIANT_2) {                     \
-        if (VARIANT == xmrig::VARIANT_4) { \
-            VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \
-        } else {                                        \
-            VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); \
-        } \
-    }                                                   \
-    if (VARIANT == xmrig::VARIANT_4) { \
-        a = _mm_set_epi64x(ah##part, al##part);         \
-    }                                                   \
-    a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi));       \
-                                                        \
-    if (BASE == xmrig::VARIANT_1) {                     \
-        _mm_store_si128(ptr, _mm_xor_si128(a, mc));     \
-                                                        \
-        if (VARIANT == xmrig::VARIANT_TUBE ||           \
-            VARIANT == xmrig::VARIANT_RTO) {            \
-            ((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0]; \
-        }                                               \
-    } else {                                            \
-        _mm_store_si128(ptr, a);                        \
-    }                                                   \
-                                                        \
-    a = _mm_xor_si128(a, _mm_set_epi64x(ch##part, cl##part)); \
-    idx = _mm_cvtsi128_si64(a);                         \
-                                                        \
-    if (ALGO == xmrig::CRYPTONIGHT_HEAVY) {             \
-        int64_t n = ((int64_t*)&l[idx & MASK])[0];      \
-        int32_t d = ((int32_t*)&l[idx & MASK])[2];      \
-        int64_t q = n / (d | 0x5);                      \
-        ((int64_t*)&l[idx & MASK])[0] = n ^ q;          \
-        if (VARIANT == xmrig::VARIANT_XHV) {            \
-            d = ~d;                                     \
-        }                                               \
-                                                        \
-        idx = d ^ q;                                    \
-    }                                                   \
-    if (BASE == xmrig::VARIANT_2) {                     \
-        b1 = b0;                                        \
-    }                                                   \
-    b0 = c;
-
-
-#define CONST_INIT(ctx, n)                                                                       \
-    __m128i mc##n;                                                                               \
-    __m128i division_result_xmm_##n;                                                             \
-    __m128i sqrt_result_xmm_##n;                                                                 \
-    if (BASE == xmrig::VARIANT_1) {                                                              \
-        mc##n = _mm_set_epi64x(*reinterpret_cast<const uint64_t*>(input + n * size + 35) ^       \
-                               *(reinterpret_cast<const uint64_t*>((ctx)->state) + 24), 0);      \
-    }                                                                                            \
-    if (BASE == xmrig::VARIANT_2) {                                                              \
-        division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]);                                   \
-        sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]);                                       \
-    }                                                                                            \
-    __m128i ax##n = _mm_set_epi64x(h##n[1] ^ h##n[5], h##n[0] ^ h##n[4]);                        \
-    __m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]);                     \
-    __m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]);                   \
-    __m128i cx##n = _mm_setzero_si128();                                                         \
-    VARIANT4_RANDOM_MATH_INIT(n);
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MASK         = xmrig::cn_select_mask<ALGO>();
-    constexpr size_t ITERATIONS   = xmrig::cn_select_iter<ALGO, VARIANT>();
-    constexpr size_t MEM          = xmrig::cn_select_memory<ALGO>();
-    constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
-
-    if (BASE == xmrig::VARIANT_1 && size < 43) {
-        memset(output, 0, 32 * 3);
-        return;
-    }
-
-    for (size_t i = 0; i < 3; i++) {
-        xmrig::keccak(input + size * i, size, ctx[i]->state);
-        cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
-    }
-
-    uint8_t* l0  = ctx[0]->memory;
-    uint8_t* l1  = ctx[1]->memory;
-    uint8_t* l2  = ctx[2]->memory;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
-    uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
-    uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
-
-    CONST_INIT(ctx[0], 0);
-    CONST_INIT(ctx[1], 1);
-    CONST_INIT(ctx[2], 2);
-    VARIANT2_SET_ROUNDING_MODE();
-
-    uint64_t idx0, idx1, idx2;
-    idx0 = _mm_cvtsi128_si64(ax0);
-    idx1 = _mm_cvtsi128_si64(ax1);
-    idx2 = _mm_cvtsi128_si64(ax2);
-
-    for (size_t i = 0; i < ITERATIONS; i++) {
-        uint64_t hi, lo;
-        __m128i *ptr0, *ptr1, *ptr2;
-
-        CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
-        CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
-        CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
-
-        CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
-        CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
-        CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
-
-        CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0);
-        CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1);
-        CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2);
-
-        CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0);
-        CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1);
-        CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2);
-    }
-
-    for (size_t i = 0; i < 3; i++) {
-        cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
-        xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
-        extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
-    }
-}
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MASK         = xmrig::cn_select_mask<ALGO>();
-    constexpr size_t ITERATIONS   = xmrig::cn_select_iter<ALGO, VARIANT>();
-    constexpr size_t MEM          = xmrig::cn_select_memory<ALGO>();
-    constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
-
-    if (BASE == xmrig::VARIANT_1 && size < 43) {
-        memset(output, 0, 32 * 4);
-        return;
-    }
-
-    for (size_t i = 0; i < 4; i++) {
-        xmrig::keccak(input + size * i, size, ctx[i]->state);
-        cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
-    }
-
-    uint8_t* l0  = ctx[0]->memory;
-    uint8_t* l1  = ctx[1]->memory;
-    uint8_t* l2  = ctx[2]->memory;
-    uint8_t* l3  = ctx[3]->memory;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
-    uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
-    uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
-    uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
-
-    CONST_INIT(ctx[0], 0);
-    CONST_INIT(ctx[1], 1);
-    CONST_INIT(ctx[2], 2);
-    CONST_INIT(ctx[3], 3);
-    VARIANT2_SET_ROUNDING_MODE();
-
-    uint64_t idx0, idx1, idx2, idx3;
-    idx0 = _mm_cvtsi128_si64(ax0);
-    idx1 = _mm_cvtsi128_si64(ax1);
-    idx2 = _mm_cvtsi128_si64(ax2);
-    idx3 = _mm_cvtsi128_si64(ax3);
-
-    for (size_t i = 0; i < ITERATIONS; i++)
-    {
-        uint64_t hi, lo;
-        __m128i *ptr0, *ptr1, *ptr2, *ptr3;
-
-        CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
-        CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
-        CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
-        CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
-
-        CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
-        CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
-        CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
-        CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
-
-        CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0);
-        CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1);
-        CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2);
-        CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3);
-
-        CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0);
-        CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1);
-        CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2);
-        CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
-    }
-
-    for (size_t i = 0; i < 4; i++) {
-        cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
-        xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
-        extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
-    }
-}
-
-
-template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
-inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
-{
-    constexpr size_t MASK         = xmrig::cn_select_mask<ALGO>();
-    constexpr size_t ITERATIONS   = xmrig::cn_select_iter<ALGO, VARIANT>();
-    constexpr size_t MEM          = xmrig::cn_select_memory<ALGO>();
-    constexpr xmrig::Variant BASE = xmrig::cn_base_variant<VARIANT>();
-
-    if (BASE == xmrig::VARIANT_1 && size < 43) {
-        memset(output, 0, 32 * 5);
-        return;
-    }
-
-    for (size_t i = 0; i < 5; i++) {
-        xmrig::keccak(input + size * i, size, ctx[i]->state);
-        cn_explode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
-    }
-
-    uint8_t* l0  = ctx[0]->memory;
-    uint8_t* l1  = ctx[1]->memory;
-    uint8_t* l2  = ctx[2]->memory;
-    uint8_t* l3  = ctx[3]->memory;
-    uint8_t* l4  = ctx[4]->memory;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
-    uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
-    uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
-    uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
-    uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx[4]->state);
-
-    CONST_INIT(ctx[0], 0);
-    CONST_INIT(ctx[1], 1);
-    CONST_INIT(ctx[2], 2);
-    CONST_INIT(ctx[3], 3);
-    CONST_INIT(ctx[4], 4);
-    VARIANT2_SET_ROUNDING_MODE();
-
-    uint64_t idx0, idx1, idx2, idx3, idx4;
-    idx0 = _mm_cvtsi128_si64(ax0);
-    idx1 = _mm_cvtsi128_si64(ax1);
-    idx2 = _mm_cvtsi128_si64(ax2);
-    idx3 = _mm_cvtsi128_si64(ax3);
-    idx4 = _mm_cvtsi128_si64(ax4);
-
-    for (size_t i = 0; i < ITERATIONS; i++)
-    {
-        uint64_t hi, lo;
-        __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4;
-
-        CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
-        CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
-        CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
-        CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
-        CN_STEP1(ax4, bx40, bx41, cx4, l4, ptr4, idx4);
-
-        CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0);
-        CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1);
-        CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2);
-        CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3);
-        CN_STEP2(ax4, bx40, bx41, cx4, l4, ptr4, idx4);
-
-        CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0);
-        CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1);
-        CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2);
-        CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3);
-        CN_STEP3(4, ax4, bx40, bx41, cx4, l4, ptr4, idx4);
-
-        CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0);
-        CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1);
-        CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2);
-        CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
-        CN_STEP4(4, ax4, bx40, bx41, cx4, l4, mc4, ptr4, idx4);
-    }
-
-    for (size_t i = 0; i < 5; i++) {
-        cn_implode_scratchpad<ALGO, MEM, SOFT_AES>(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
-        xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
-        extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
-    }
-}
-
-#endif /* XMRIG_CRYPTONIGHT_X86_H */
diff --git a/src/crypto/CryptonightR_gen.cpp b/src/crypto/CryptonightR_gen.cpp
deleted file mode 100644
index 3fba49cd..00000000
--- a/src/crypto/CryptonightR_gen.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <cstring>
-#include "crypto/CryptoNight_monero.h"
-
-typedef void(*void_func)();
-
-#include "crypto/asm/CryptonightR_template.h"
-#include "Mem.h"
-
-
-static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)())
-{
-    const ptrdiff_t size = reinterpret_cast<const uint8_t*>(p2) - reinterpret_cast<const uint8_t*>(p1);
-    if (size > 0) {
-        memcpy(p, reinterpret_cast<void*>(p1), size);
-        p += size;
-    }
-}
-
-static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, xmrig::Assembly ASM)
-{
-    uint32_t prev_rot_src = (uint32_t)(-1);
-
-    for (int i = 0;; ++i) {
-        const V4_Instruction inst = code[i];
-        if (inst.opcode == RET) {
-            break;
-        }
-
-        uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
-        uint8_t dst_index = inst.dst_index;
-        uint8_t src_index = inst.src_index;
-
-        const uint32_t a = inst.dst_index;
-        const uint32_t b = inst.src_index;
-        const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
-
-        switch (inst.opcode) {
-        case ROR:
-        case ROL:
-            if (b != prev_rot_src) {
-                prev_rot_src = b;
-                add_code(p, instructions_mov[c], instructions_mov[c + 1]);
-            }
-            break;
-        }
-
-        if (a == prev_rot_src) {
-            prev_rot_src = (uint32_t)(-1);
-        }
-
-        void_func begin = instructions[c];
-
-        if ((ASM = xmrig::ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) {
-            // AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
-            // Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
-            uint8_t* prefix = reinterpret_cast<uint8_t*>(begin);
-
-            if (*prefix == 0x49) {
-                *(p++) = 0x41;
-            }
-
-            begin = reinterpret_cast<void_func>(prefix + 1);
-        }
-
-        add_code(p, begin, instructions[c + 1]);
-
-        if (inst.opcode == ADD) {
-            *(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
-            if (is_64_bit) {
-                prev_rot_src = (uint32_t)(-1);
-            }
-        }
-    }
-}
-
-void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
-    uint8_t* p = p0;
-
-    add_code(p, CryptonightWOW_template_part1, CryptonightWOW_template_part2);
-    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
-    add_code(p, CryptonightWOW_template_part2, CryptonightWOW_template_part3);
-    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_mainloop) - ((const uint8_t*)CryptonightWOW_template_part1)) - (p - p0));
-    add_code(p, CryptonightWOW_template_part3, CryptonightWOW_template_end);
-
-    Mem::flushInstructionCache(machine_code, p - p0);
-}
-
-void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
-    uint8_t* p = p0;
-
-    add_code(p, CryptonightR_template_part1, CryptonightR_template_part2);
-    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
-    add_code(p, CryptonightR_template_part2, CryptonightR_template_part3);
-    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
-    add_code(p, CryptonightR_template_part3, CryptonightR_template_end);
-
-    Mem::flushInstructionCache(machine_code, p - p0);
-}
-
-void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
-    uint8_t* p = p0;
-
-    add_code(p, CryptonightWOW_template_double_part1, CryptonightWOW_template_double_part2);
-    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
-    add_code(p, CryptonightWOW_template_double_part2, CryptonightWOW_template_double_part3);
-    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
-    add_code(p, CryptonightWOW_template_double_part3, CryptonightWOW_template_double_part4);
-    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_double_mainloop) - ((const uint8_t*)CryptonightWOW_template_double_part1)) - (p - p0));
-    add_code(p, CryptonightWOW_template_double_part4, CryptonightWOW_template_double_end);
-
-    Mem::flushInstructionCache(machine_code, p - p0);
-}
-
-void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
-    uint8_t* p = p0;
-
-    add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2);
-    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
-    add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3);
-    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
-    add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4);
-    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0));
-    add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end);
-
-    Mem::flushInstructionCache(machine_code, p - p0);
-}
-
-void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
-    uint8_t* p = p0;
-
-    add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2);
-    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
-    add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3);
-    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0));
-    add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end);
-
-    Mem::flushInstructionCache(machine_code, p - p0);
-}
-
-void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM)
-{
-    uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
-    uint8_t* p = p0;
-
-    add_code(p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
-    add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
-    add_code(p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
-    *(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
-    add_code(p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
-
-    Mem::flushInstructionCache(machine_code, p - p0);
-}
diff --git a/src/crypto/SSE2NEON.h b/src/crypto/SSE2NEON.h
deleted file mode 100644
index 6a00448d..00000000
--- a/src/crypto/SSE2NEON.h
+++ /dev/null
@@ -1,1497 +0,0 @@
-#ifndef SSE2NEON_H
-#define SSE2NEON_H
-
-// This header file provides a simple API translation layer
-// between SSE intrinsics to their corresponding ARM NEON versions
-//
-// This header file does not (yet) translate *all* of the SSE intrinsics.
-// Since this is in support of a specific porting effort, I have only
-// included the intrinsics I needed to get my port to work.
-//
-// Questions/Comments/Feedback send to: jratcliffscarab@gmail.com
-//
-// If you want to improve or add to this project, send me an
-// email and I will probably approve your access to the depot.
-//
-// Project is located here:
-//
-//	https://github.com/jratcliff63367/sse2neon
-//
-// Show your appreciation for open source by sending me a bitcoin tip to the following
-// address.
-//
-// TipJar: 1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p :
-// https://blockchain.info/address/1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p
-//
-//
-// Contributors to this project are:
-//
-// John W. Ratcliff     : jratcliffscarab@gmail.com
-// Brandon Rowlett      : browlett@nvidia.com
-// Ken Fast             : kfast@gdeb.com
-// Eric van Beurden     : evanbeurden@nvidia.com
-// Alexander Potylitsin : apotylitsin@nvidia.com
-//
-//
-// *********************************************************************************************************************
-// apoty: March 17, 2017
-// Current version was changed in most to fix issues and potential issues.
-// All unit tests were rewritten as a part of forge lib project to cover all implemented functions.
-// *********************************************************************************************************************
-// Release notes for January 20, 2017 version:
-//
-// The unit tests have been refactored.  They no longer assert on an error, instead they return a pass/fail condition
-// The unit-tests now test 10,000 random float and int values against each intrinsic.
-//
-// SSE2NEON now supports 95 SSE intrinsics.  39 of them have formal unit tests which have been implemented and
-// fully tested on NEON/ARM.  The remaining 56 still need unit tests implemented.
-//
-// A struct is now defined in this header file called 'SIMDVec' which can be used by applications which
-// attempt to access the contents of an _m128 struct directly.  It is important to note that accessing the __m128
-// struct directly is bad coding practice by Microsoft: @see: https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx
-// 
-// However, some legacy source code may try to access the contents of an __m128 struct directly so the developer
-// can use the SIMDVec as an alias for it.  Any casting must be done manually by the developer, as you cannot
-// cast or otherwise alias the base NEON data type for intrinsic operations.
-//
-// A bug was found with the _mm_shuffle_ps intrinsic.  If the shuffle permutation was not one of the ones with
-// a custom/unique implementation causing it to fall through to the default shuffle implementation it was failing
-// to return the correct value.  This is now fixed.
-//
-// A bug was found with the _mm_cvtps_epi32 intrinsic.  This converts floating point values to integers.
-// It was not honoring the correct rounding mode.  In SSE the default rounding mode when converting from float to int
-// is to use 'round to even' otherwise known as 'bankers rounding'.  ARMv7 did not support this feature but ARMv8 does.
-// As it stands today, this header file assumes ARMv8.  If you are trying to target really old ARM devices, you may get
-// a build error.
-//
-// Support for a number of new intrinsics was added, however, none of them yet have unit-tests to 100% confirm they are
-// producing the correct results on NEON.  These unit tests will be added as soon as possible.
-// 
-// Here is the list of new instrinsics which have been added:
-//
-// _mm_cvtss_f32     :  extracts the lower order floating point value from the parameter
-// _mm_add_ss        : adds the scalar single - precision floating point values of a and b
-// _mm_div_ps        : Divides the four single - precision, floating - point values of a and b.
-// _mm_div_ss        : Divides the scalar single - precision floating point value of a by b.
-// _mm_sqrt_ss       : Computes the approximation of the square root of the scalar single - precision floating point value of in.
-// _mm_rsqrt_ps      : Computes the approximations of the reciprocal square roots of the four single - precision floating point values of in.
-// _mm_comilt_ss     : Compares the lower single - precision floating point scalar values of a and b using a less than operation
-// _mm_comigt_ss     : Compares the lower single - precision floating point scalar values of a and b using a greater than operation.
-// _mm_comile_ss     :  Compares the lower single - precision floating point scalar values of a and b using a less than or equal operation.
-// _mm_comige_ss     : Compares the lower single - precision floating point scalar values of a and b using a greater than or equal operation.
-// _mm_comieq_ss     :  Compares the lower single - precision floating point scalar values of a and b using an equality operation.
-// _mm_comineq_s     :  Compares the lower single - precision floating point scalar values of a and b using an inequality operation
-// _mm_unpackhi_epi8 : Interleaves the upper 8 signed or unsigned 8 - bit integers in a with the upper 8 signed or unsigned 8 - bit integers in b.
-// _mm_unpackhi_epi16:  Interleaves the upper 4 signed or unsigned 16 - bit integers in a with the upper 4 signed or unsigned 16 - bit integers in b.
-//
-// *********************************************************************************************************************
-/*
-** The MIT license:
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and associated documentation files (the "Software"), to deal
-** in the Software without restriction, including without limitation the rights
-** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-** copies of the Software, and to permit persons to whom the Software is furnished
-** to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in all
-** copies or substantial portions of the Software.
-
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#define ENABLE_CPP_VERSION 0
-
-#if defined(__GNUC__) || defined(__clang__)
-#	pragma push_macro("FORCE_INLINE")
-#	pragma push_macro("ALIGN_STRUCT")
-#	define FORCE_INLINE       static inline __attribute__((always_inline))
-#	define ALIGN_STRUCT(x)    __attribute__((aligned(x)))
-#else
-#	error "Macro name collisions may happens with unknown compiler"
-#	define FORCE_INLINE       static inline
-#	define ALIGN_STRUCT(x)    __declspec(align(x))
-#endif
-
-#include <stdint.h>
-#include "arm_neon.h"
-
-
-/*******************************************************/
-/* MACRO for shuffle parameter for _mm_shuffle_ps().   */
-/* Argument fp3 is a digit[0123] that represents the fp*/
-/* from argument "b" of mm_shuffle_ps that will be     */
-/* placed in fp3 of result. fp2 is the same for fp2 in */
-/* result. fp1 is a digit[0123] that represents the fp */
-/* from argument "a" of mm_shuffle_ps that will be     */
-/* places in fp1 of result. fp0 is the same for fp0 of */
-/* result                                              */
-/*******************************************************/
-#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
-	(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
-
-/* indicate immediate constant argument in a given range */
-#define __constrange(a,b) \
-	const
-
-typedef float32x4_t __m128;
-typedef int32x4_t __m128i;
-
-
-// ******************************************
-// type-safe casting between types
-// ******************************************
-
-#define vreinterpretq_m128_f16(x) \
-	vreinterpretq_f32_f16(x)
-
-#define vreinterpretq_m128_f32(x) \
-	(x)
-
-#define vreinterpretq_m128_f64(x) \
-	vreinterpretq_f32_f64(x)
-
-
-#define vreinterpretq_m128_u8(x) \
-	vreinterpretq_f32_u8(x)
-
-#define vreinterpretq_m128_u16(x) \
-	vreinterpretq_f32_u16(x)
-
-#define vreinterpretq_m128_u32(x) \
-	vreinterpretq_f32_u32(x)
-
-#define vreinterpretq_m128_u64(x) \
-	vreinterpretq_f32_u64(x)
-
-
-#define vreinterpretq_m128_s8(x) \
-	vreinterpretq_f32_s8(x)
-
-#define vreinterpretq_m128_s16(x) \
-	vreinterpretq_f32_s16(x)
-
-#define vreinterpretq_m128_s32(x) \
-	vreinterpretq_f32_s32(x)
-
-#define vreinterpretq_m128_s64(x) \
-	vreinterpretq_f32_s64(x)
-
-
-#define vreinterpretq_f16_m128(x) \
-	vreinterpretq_f16_f32(x)
-
-#define vreinterpretq_f32_m128(x) \
-	(x)
-
-#define vreinterpretq_f64_m128(x) \
-	vreinterpretq_f64_f32(x)
-
-
-#define vreinterpretq_u8_m128(x) \
-	vreinterpretq_u8_f32(x)
-
-#define vreinterpretq_u16_m128(x) \
-	vreinterpretq_u16_f32(x)
-
-#define vreinterpretq_u32_m128(x) \
-	vreinterpretq_u32_f32(x)
-
-#define vreinterpretq_u64_m128(x) \
-	vreinterpretq_u64_f32(x)
-
-
-#define vreinterpretq_s8_m128(x) \
-	vreinterpretq_s8_f32(x)
-
-#define vreinterpretq_s16_m128(x) \
-	vreinterpretq_s16_f32(x)
-
-#define vreinterpretq_s32_m128(x) \
-	vreinterpretq_s32_f32(x)
-
-#define vreinterpretq_s64_m128(x) \
-	vreinterpretq_s64_f32(x)
-
-
-#define vreinterpretq_m128i_s8(x) \
-	vreinterpretq_s32_s8(x)
-
-#define vreinterpretq_m128i_s16(x) \
-	vreinterpretq_s32_s16(x)
-
-#define vreinterpretq_m128i_s32(x) \
-	(x)
-
-#define vreinterpretq_m128i_s64(x) \
-	vreinterpretq_s32_s64(x)
-
-
-#define vreinterpretq_m128i_u8(x) \
-	vreinterpretq_s32_u8(x)
-
-#define vreinterpretq_m128i_u16(x) \
-	vreinterpretq_s32_u16(x)
-
-#define vreinterpretq_m128i_u32(x) \
-	vreinterpretq_s32_u32(x)
-
-#define vreinterpretq_m128i_u64(x) \
-	vreinterpretq_s32_u64(x)
-
-
-#define vreinterpretq_s8_m128i(x) \
-	vreinterpretq_s8_s32(x)
-
-#define vreinterpretq_s16_m128i(x) \
-	vreinterpretq_s16_s32(x)
-
-#define vreinterpretq_s32_m128i(x) \
-	(x)
-
-#define vreinterpretq_s64_m128i(x) \
-	vreinterpretq_s64_s32(x)
-
-
-#define vreinterpretq_u8_m128i(x) \
-	vreinterpretq_u8_s32(x)
-
-#define vreinterpretq_u16_m128i(x) \
-	vreinterpretq_u16_s32(x)
-
-#define vreinterpretq_u32_m128i(x) \
-	vreinterpretq_u32_s32(x)
-
-#define vreinterpretq_u64_m128i(x) \
-	vreinterpretq_u64_s32(x)
-
-
-// union intended to allow direct access to an __m128 variable using the names that the MSVC
-// compiler provides.  This union should really only be used when trying to access the members
-// of the vector as integer values.  GCC/clang allow native access to the float members through
-// a simple array access operator (in C since 4.6, in C++ since 4.8).
-//
-// Ideally direct accesses to SIMD vectors should not be used since it can cause a performance
-// hit.  If it really is needed however, the original __m128 variable can be aliased with a
-// pointer to this union and used to access individual components.  The use of this union should
-// be hidden behind a macro that is used throughout the codebase to access the members instead
-// of always declaring this type of variable.
-typedef union ALIGN_STRUCT(16) SIMDVec
-{
-	float       m128_f32[4];    // as floats - do not to use this.  Added for convenience.
-	int8_t      m128_i8[16];    // as signed 8-bit integers.
-	int16_t     m128_i16[8];    // as signed 16-bit integers.
-	int32_t     m128_i32[4];    // as signed 32-bit integers.
-	int64_t     m128_i64[2];    // as signed 64-bit integers.
-	uint8_t     m128_u8[16];    // as unsigned 8-bit integers.
-	uint16_t    m128_u16[8];    // as unsigned 16-bit integers.
-	uint32_t    m128_u32[4];    // as unsigned 32-bit integers.
-	uint64_t    m128_u64[2];    // as unsigned 64-bit integers.
-} SIMDVec;
-
-
-// ******************************************
-// Set/get methods
-// ******************************************
-
-// extracts the lower order floating point value from the parameter : https://msdn.microsoft.com/en-us/library/bb514059%28v=vs.120%29.aspx?f=255&MSPPError=-2147217396
-FORCE_INLINE float _mm_cvtss_f32(__m128 a)
-{
-	return vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
-}
-
-// Sets the 128-bit value to zero https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_setzero_si128()
-{
-	return vreinterpretq_m128i_s32(vdupq_n_s32(0));
-}
-
-// Clears the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_setzero_ps(void)
-{
-	return vreinterpretq_m128_f32(vdupq_n_f32(0));
-}
-
-// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_set1_ps(float _w)
-{
-	return vreinterpretq_m128_f32(vdupq_n_f32(_w));
-}
-
-// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_set_ps1(float _w)
-{
-	return vreinterpretq_m128_f32(vdupq_n_f32(_w));
-}
-
-// Sets the four single-precision, floating-point values to the four inputs. https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x)
-{
-	float __attribute__((aligned(16))) data[4] = { x, y, z, w };
-	return vreinterpretq_m128_f32(vld1q_f32(data));
-}
-
-// Sets the four single-precision, floating-point values to the four inputs in reverse order. https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_setr_ps(float w, float z , float y , float x ) 
-{
-	float __attribute__ ((aligned (16))) data[4] = { w, z, y, x };
-	return vreinterpretq_m128_f32(vld1q_f32(data));
-}
-
-// Sets the 4 signed 32-bit integer values to i. https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_set1_epi32(int _i)
-{
-	return vreinterpretq_m128i_s32(vdupq_n_s32(_i));
-}
-
-// Sets the 4 signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
-{
-	int32_t __attribute__((aligned(16))) data[4] = { i0, i1, i2, i3 };
-	return vreinterpretq_m128i_s32(vld1q_s32(data));
-}
-
-// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx
-FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
-{
-	vst1q_f32(p, vreinterpretq_f32_m128(a));
-}
-
-// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx
-FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
-{
-	vst1q_f32(p, vreinterpretq_f32_m128(a));
-}
-
-// Stores four 32-bit integer values as (as a __m128i value) at the address p. https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx
-FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
-{
-	vst1q_s32((int32_t*) p, vreinterpretq_s32_m128i(a));
-}
-
-// Stores the lower single - precision, floating - point value. https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx
-FORCE_INLINE void _mm_store_ss(float *p, __m128 a)
-{
-	vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0);
-}
-
-// Reads the lower 64 bits of b and stores them into the lower 64 bits of a.  https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx
-FORCE_INLINE void _mm_storel_epi64(__m128i* a, __m128i b)
-{
-	uint64x1_t hi = vget_high_u64(vreinterpretq_u64_m128i(*a));
-	uint64x1_t lo = vget_low_u64(vreinterpretq_u64_m128i(b));
-	*a = vreinterpretq_m128i_u64(vcombine_u64(lo, hi));
-}
-
-// Loads a single single-precision, floating-point value, copying it into all four words https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_load1_ps(const float * p)
-{
-	return vreinterpretq_m128_f32(vld1q_dup_f32(p));
-}
-
-// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_load_ps(const float * p)
-{
-	return vreinterpretq_m128_f32(vld1q_f32(p));
-}
-
-// Loads four single-precision, floating-point values.  https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_loadu_ps(const float * p)
-{
-	// for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are equivalent for neon
-	return vreinterpretq_m128_f32(vld1q_f32(p));
-}
-
-// Loads an single - precision, floating - point value into the low word and clears the upper three words.  https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_load_ss(const float * p)
-{
-	return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0));
-}
-
-
-// ******************************************
-// Logic/Binary operations
-// ******************************************
-
-// Compares for inequality.  https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_u32( vmvnq_u32( vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)) ) );
-}
-
-// Computes the bitwise AND-NOT of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_s32( vbicq_s32(vreinterpretq_s32_m128(b), vreinterpretq_s32_m128(a)) ); // *NOTE* argument swap
-}
-
-// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the 128-bit value in a. https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s32( vbicq_s32(vreinterpretq_s32_m128i(b), vreinterpretq_s32_m128i(a)) ); // *NOTE* argument swap
-}
-
-// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s32( vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) );
-}
-
-// Computes the bitwise AND of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_s32( vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) );
-}
-
-// Computes the bitwise OR of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_s32( vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) );
-}
-
-// Computes bitwise EXOR (exclusive-or) of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_s32( veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) );
-}
-
-// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s32( vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) );
-}
-
-// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in b.  https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s32( veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) );
-}
-
-// NEON does not provide this method
-// Creates a 4-bit mask from the most significant bits of the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx
-FORCE_INLINE int _mm_movemask_ps(__m128 a)
-{
-#if ENABLE_CPP_VERSION // I am not yet convinced that the NEON version is faster than the C version of this
-	uint32x4_t &ia = *(uint32x4_t *)&a;
-	return (ia[0] >> 31) | ((ia[1] >> 30) & 2) | ((ia[2] >> 29) & 4) | ((ia[3] >> 28) & 8);
-#else
-	static const uint32x4_t movemask = { 1, 2, 4, 8 };
-	static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
-	uint32x4_t t0 = vreinterpretq_u32_m128(a);
-	uint32x4_t t1 = vtstq_u32(t0, highbit);
-	uint32x4_t t2 = vandq_u32(t1, movemask);
-	uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
-	return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
-#endif
-}
-
-// Takes the upper 64 bits of a and places it in the low end of the result
-// Takes the lower 64 bits of b and places it into the high end of the result.
-FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b)
-{
-	float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
-	float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
-	return vreinterpretq_m128_f32(vcombine_f32(a32, b10));
-}
-
-// takes the lower two 32-bit values from a and swaps them and places in high end of result
-// takes the higher two 32 bit values from b and swaps them and places in low end of result.
-FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b)
-{
-	float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
-	float32x2_t b23 = vrev64_f32(vget_high_f32(vreinterpretq_f32_m128(b)));
-	return vreinterpretq_m128_f32(vcombine_f32(a01, b23));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_0321(__m128 a, __m128 b)
-{
-	float32x2_t a21 = vget_high_f32(vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3));
-	float32x2_t b03 = vget_low_f32(vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3));
-	return vreinterpretq_m128_f32(vcombine_f32(a21, b03));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_2103(__m128 a, __m128 b)
-{
-	float32x2_t a03 = vget_low_f32(vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3));
-	float32x2_t b21 = vget_high_f32(vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3));
-	return vreinterpretq_m128_f32(vcombine_f32(a03, b21));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_1010(__m128 a, __m128 b)
-{
-	float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
-	float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
-	return vreinterpretq_m128_f32(vcombine_f32(a10, b10));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_1001(__m128 a, __m128 b)
-{
-	float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
-	float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
-	return vreinterpretq_m128_f32(vcombine_f32(a01, b10));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_0101(__m128 a, __m128 b)
-{
-	float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
-	float32x2_t b01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(b)));
-	return vreinterpretq_m128_f32(vcombine_f32(a01, b01));
-}
-
-// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the high
-FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b)
-{
-	float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
-	float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
-	return vreinterpretq_m128_f32(vcombine_f32(a10, b32));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b)
-{
-	float32x2_t a11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 1);
-	float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-	return vreinterpretq_m128_f32(vcombine_f32(a11, b00));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b)
-{
-	float32x2_t a22 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
-	float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-	return vreinterpretq_m128_f32(vcombine_f32(a22, b00));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b)
-{
-	float32x2_t a00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 0);
-	float32x2_t b22 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(b)), 0);
-	return vreinterpretq_m128_f32(vcombine_f32(a00, b22));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b)
-{
-	float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0);
-	float32x2_t a22 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0);
-	float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* apoty: TODO: use vzip ?*/
-	float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
-	return vreinterpretq_m128_f32(vcombine_f32(a02, b32));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b)
-{
-	float32x2_t a33 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 1);
-	float32x2_t b11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 1);
-	return vreinterpretq_m128_f32(vcombine_f32(a33, b11));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b)
-{
-	float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
-	float32_t b2 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 2);
-	float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-	float32x2_t b20 = vset_lane_f32(b2, b00, 1);
-	return vreinterpretq_m128_f32(vcombine_f32(a10, b20));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b)
-{
-	float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a)));
-	float32_t b2 = vgetq_lane_f32(b, 2);
-	float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-	float32x2_t b20 = vset_lane_f32(b2, b00, 1);
-	return vreinterpretq_m128_f32(vcombine_f32(a01, b20));
-}
-
-FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b)
-{
-	float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
-	float32_t b2 = vgetq_lane_f32(b, 2);
-	float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0);
-	float32x2_t b20 = vset_lane_f32(b2, b00, 1);
-	return vreinterpretq_m128_f32(vcombine_f32(a32, b20));
-}
-
-// NEON does not support a general purpose permute intrinsic
-// Currently I am not sure whether the C implementation is faster or slower than the NEON version.
-// Note, this has to be expanded as a template because the shuffle value must be an immediate value.
-// The same is true on SSE as well.
-// Selects four specific single-precision, floating-point values from a and b, based on the mask i. https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx
-#if ENABLE_CPP_VERSION // I am not convinced that the NEON version is faster than the C version yet.
-FORCE_INLINE __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, __constrange(0,255) int imm)
-{
-	__m128 ret;
-	ret[0] = a[imm & 0x3];
-	ret[1] = a[(imm >> 2) & 0x3];
-	ret[2] = b[(imm >> 4) & 0x03];
-	ret[3] = b[(imm >> 6) & 0x03];
-	return ret;
-}
-#else
-#define _mm_shuffle_ps_default(a, b, imm) \
-({ \
-	float32x4_t ret; \
-	ret = vmovq_n_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & 0x3)); \
-	ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), ret, 1); \
-	ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3), ret, 2); \
-	ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3), ret, 3); \
-	vreinterpretq_m128_f32(ret); \
-})
-#endif
-
-//FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255) int imm)
-#define _mm_shuffle_ps(a, b, imm) \
-({ \
-	__m128 ret; \
-	switch (imm) \
-	{ \
-		case _MM_SHUFFLE(1, 0, 3, 2): ret = _mm_shuffle_ps_1032((a), (b)); break; \
-		case _MM_SHUFFLE(2, 3, 0, 1): ret = _mm_shuffle_ps_2301((a), (b)); break; \
-		case _MM_SHUFFLE(0, 3, 2, 1): ret = _mm_shuffle_ps_0321((a), (b)); break; \
-		case _MM_SHUFFLE(2, 1, 0, 3): ret = _mm_shuffle_ps_2103((a), (b)); break; \
-		case _MM_SHUFFLE(1, 0, 1, 0): ret = _mm_shuffle_ps_1010((a), (b)); break; \
-		case _MM_SHUFFLE(1, 0, 0, 1): ret = _mm_shuffle_ps_1001((a), (b)); break; \
-		case _MM_SHUFFLE(0, 1, 0, 1): ret = _mm_shuffle_ps_0101((a), (b)); break; \
-		case _MM_SHUFFLE(3, 2, 1, 0): ret = _mm_shuffle_ps_3210((a), (b)); break; \
-		case _MM_SHUFFLE(0, 0, 1, 1): ret = _mm_shuffle_ps_0011((a), (b)); break; \
-		case _MM_SHUFFLE(0, 0, 2, 2): ret = _mm_shuffle_ps_0022((a), (b)); break; \
-		case _MM_SHUFFLE(2, 2, 0, 0): ret = _mm_shuffle_ps_2200((a), (b)); break; \
-		case _MM_SHUFFLE(3, 2, 0, 2): ret = _mm_shuffle_ps_3202((a), (b)); break; \
-		case _MM_SHUFFLE(1, 1, 3, 3): ret = _mm_shuffle_ps_1133((a), (b)); break; \
-		case _MM_SHUFFLE(2, 0, 1, 0): ret = _mm_shuffle_ps_2010((a), (b)); break; \
-		case _MM_SHUFFLE(2, 0, 0, 1): ret = _mm_shuffle_ps_2001((a), (b)); break; \
-		case _MM_SHUFFLE(2, 0, 3, 2): ret = _mm_shuffle_ps_2032((a), (b)); break; \
-		default: ret = _mm_shuffle_ps_default((a), (b), (imm)); break; \
-	} \
-	ret; \
-})
-
-// Takes the upper 64 bits of a and places it in the low end of the result
-// Takes the lower 64 bits of a and places it into the high end of the result.
-FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a)
-{
-	int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a));
-	int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
-	return vreinterpretq_m128i_s32(vcombine_s32(a32, a10));
-}
-
-// takes the lower two 32-bit values from a and swaps them and places in low end of result
-// takes the higher two 32 bit values from a and swaps them and places in high end of result.
-FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a)
-{
-	int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
-	int32x2_t a23 = vrev64_s32(vget_high_s32(vreinterpretq_s32_m128i(a)));
-	return vreinterpretq_m128i_s32(vcombine_s32(a01, a23));
-}
-
-// rotates the least significant 32 bits into the most signficant 32 bits, and shifts the rest down
-FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a)
-{
-	return vreinterpretq_m128i_s32(vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 1));
-}
-
-// rotates the most significant 32 bits into the least signficant 32 bits, and shifts the rest up
-FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a)
-{
-	return vreinterpretq_m128i_s32(vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 3));
-}
-
-// gets the lower 64 bits of a, and places it in the upper 64 bits
-// gets the lower 64 bits of a and places it in the lower 64 bits
-FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a)
-{
-	int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
-	return vreinterpretq_m128i_s32(vcombine_s32(a10, a10));
-}
-
-// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the lower 64 bits
-// gets the lower 64 bits of a, and places it in the upper 64 bits
-FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a)
-{
-	int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
-	int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a));
-	return vreinterpretq_m128i_s32(vcombine_s32(a01, a10));
-}
-
-// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the upper 64 bits
-// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the lower 64 bits
-FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a)
-{
-	int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
-	return vreinterpretq_m128i_s32(vcombine_s32(a01, a01));
-}
-
-FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a)
-{
-	int32x2_t a11 = vdup_lane_s32(vget_low_s32(vreinterpretq_s32_m128i(a)), 1);
-	int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0);
-	return vreinterpretq_m128i_s32(vcombine_s32(a11, a22));
-}
-
-FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a)
-{
-	int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0);
-	int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a)));
-	return vreinterpretq_m128i_s32(vcombine_s32(a22, a01));
-}
-
-FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a)
-{
-	int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a));
-	int32x2_t a33 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 1);
-	return vreinterpretq_m128i_s32(vcombine_s32(a32, a33));
-}
-
-//FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) int imm)
-#if ENABLE_CPP_VERSION
-FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) int imm)
-{
-	__m128i ret;
-	ret[0] = a[imm & 0x3];
-	ret[1] = a[(imm >> 2) & 0x3];
-	ret[2] = a[(imm >> 4) & 0x03];
-	ret[3] = a[(imm >> 6) & 0x03];
-	return ret;
-}
-#else
-#define _mm_shuffle_epi32_default(a, imm) \
-({ \
-	int32x4_t ret; \
-	ret = vmovq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm) & 0x3)); \
-	ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 2) & 0x3), ret, 1); \
-	ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), ret, 2); \
-	ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3), ret, 3); \
-	vreinterpretq_m128i_s32(ret); \
-})
-#endif
-
-//FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a, __constrange(0,255) int imm)
-#if defined(__aarch64__)
-#define _mm_shuffle_epi32_splat(a, imm) \
-({ \
-	vreinterpretq_m128i_s32(vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm))); \
-})
-#else
-#define _mm_shuffle_epi32_splat(a, imm) \
-({ \
-	vreinterpretq_m128i_s32(vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)))); \
-})
-#endif
-
-// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm.	https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx
-//FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a, __constrange(0,255) int imm)
-#define _mm_shuffle_epi32(a, imm) \
-({ \
-	__m128i ret; \
-	switch (imm) \
-	{ \
-		case _MM_SHUFFLE(1, 0, 3, 2): ret = _mm_shuffle_epi_1032((a)); break; \
-		case _MM_SHUFFLE(2, 3, 0, 1): ret = _mm_shuffle_epi_2301((a)); break; \
-		case _MM_SHUFFLE(0, 3, 2, 1): ret = _mm_shuffle_epi_0321((a)); break; \
-		case _MM_SHUFFLE(2, 1, 0, 3): ret = _mm_shuffle_epi_2103((a)); break; \
-		case _MM_SHUFFLE(1, 0, 1, 0): ret = _mm_shuffle_epi_1010((a)); break; \
-		case _MM_SHUFFLE(1, 0, 0, 1): ret = _mm_shuffle_epi_1001((a)); break; \
-		case _MM_SHUFFLE(0, 1, 0, 1): ret = _mm_shuffle_epi_0101((a)); break; \
-		case _MM_SHUFFLE(2, 2, 1, 1): ret = _mm_shuffle_epi_2211((a)); break; \
-		case _MM_SHUFFLE(0, 1, 2, 2): ret = _mm_shuffle_epi_0122((a)); break; \
-		case _MM_SHUFFLE(3, 3, 3, 2): ret = _mm_shuffle_epi_3332((a)); break; \
-		case _MM_SHUFFLE(0, 0, 0, 0): ret = _mm_shuffle_epi32_splat((a),0); break; \
-		case _MM_SHUFFLE(1, 1, 1, 1): ret = _mm_shuffle_epi32_splat((a),1); break; \
-		case _MM_SHUFFLE(2, 2, 2, 2): ret = _mm_shuffle_epi32_splat((a),2); break; \
-		case _MM_SHUFFLE(3, 3, 3, 3): ret = _mm_shuffle_epi32_splat((a),3); break; \
-		default: ret = _mm_shuffle_epi32_default((a), (imm)); break; \
-	} \
-	ret; \
-})
-
-// Shuffles the upper 4 signed or unsigned 16 - bit integers in a as specified by imm.  https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx
-//FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a, __constrange(0,255) int imm)
-#define _mm_shufflehi_epi16_function(a, imm) \
-({ \
-	int16x8_t ret = vreinterpretq_s16_s32(a); \
-	int16x4_t highBits = vget_high_s16(ret); \
-	ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & 0x3), ret, 4); \
-	ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, 5); \
-	ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, 6); \
-	ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, 7); \
-	vreinterpretq_s32_s16(ret); \
-})
-
-//FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a, __constrange(0,255) int imm)
-#define _mm_shufflehi_epi16(a, imm) \
-	_mm_shufflehi_epi16_function((a), (imm))
-
-
-// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while shifting in zeros. : https://msdn.microsoft.com/en-us/library/z2k3bbtb%28v=vs.90%29.aspx
-//FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, __constrange(0,255) int imm)
-#define _mm_slli_epi32(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) {\
-		ret = a; \
-	} \
-	else if ((imm) > 31) { \
-		ret = _mm_setzero_si128(); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_s32(vshlq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \
-	} \
-	ret; \
-})
-
-//Shifts the 4 signed or unsigned 32-bit integers in a right by count bits while shifting in zeros.  https://msdn.microsoft.com/en-us/library/w486zcfa(v=vs.100).aspx
-//FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm)
-#define _mm_srli_epi32(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) { \
-		ret = a; \
-	} \
-	else if ((imm)> 31) { \
-		ret = _mm_setzero_si128(); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_u32(vshrq_n_u32(vreinterpretq_u32_m128i(a), (imm))); \
-	} \
-	ret; \
-})
-
-// Shifts the 4 signed 32 - bit integers in a right by count bits while shifting in the sign bit.  https://msdn.microsoft.com/en-us/library/z1939387(v=vs.100).aspx
-//FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm)
-#define _mm_srai_epi32(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) { \
-		ret = a; \
-	} \
-	else if ((imm) > 31) { \
-		ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), 16)); \
-		ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(ret), 16)); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \
-	} \
-	ret; \
-})
-
-// Shifts the 128 - bit value in a right by imm bytes while shifting in zeros.imm must be an immediate. https://msdn.microsoft.com/en-us/library/305w28yz(v=vs.100).aspx
-//FORCE_INLINE _mm_srli_si128(__m128i a, __constrange(0,255) int imm)
-#define _mm_srli_si128(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) { \
-		ret = a; \
-	} \
-	else if ((imm) > 15) { \
-		ret = _mm_setzero_si128(); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_s8(vextq_s8(vreinterpretq_s8_m128i(a), vdupq_n_s8(0), (imm))); \
-	} \
-	ret; \
-})
-
-// Shifts the 128-bit value in a left by imm bytes while shifting in zeros. imm must be an immediate.  https://msdn.microsoft.com/en-us/library/34d3k2kt(v=vs.100).aspx
-//FORCE_INLINE __m128i _mm_slli_si128(__m128i a, __constrange(0,255) int imm)
-#define _mm_slli_si128(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) { \
-		ret = a; \
-	} \
-	else if ((imm) > 15) { \
-		ret = _mm_setzero_si128(); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_m128i(a), 16 - (imm))); \
-	} \
-	ret; \
-})
-
-// NEON does not provide a version of this function, here is an article about some ways to repro the results.
-// http://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon
-// Creates a 16-bit mask from the most significant bits of the 16 signed or unsigned 8-bit integers in a and zero extends the upper bits. https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx
-FORCE_INLINE int _mm_movemask_epi8(__m128i _a)
-{
-	uint8x16_t input = vreinterpretq_u8_m128i(_a);
-	static const int8_t __attribute__((aligned(16))) xr[8] = { -7, -6, -5, -4, -3, -2, -1, 0 };
-	uint8x8_t mask_and = vdup_n_u8(0x80);
-	int8x8_t mask_shift = vld1_s8(xr);
-
-	uint8x8_t lo = vget_low_u8(input);
-	uint8x8_t hi = vget_high_u8(input);
-
-	lo = vand_u8(lo, mask_and);
-	lo = vshl_u8(lo, mask_shift);
-
-	hi = vand_u8(hi, mask_and);
-	hi = vshl_u8(hi, mask_shift);
-
-	lo = vpadd_u8(lo, lo);
-	lo = vpadd_u8(lo, lo);
-	lo = vpadd_u8(lo, lo);
-
-	hi = vpadd_u8(hi, hi);
-	hi = vpadd_u8(hi, hi);
-	hi = vpadd_u8(hi, hi);
-
-	return ((hi[0] << 8) | (lo[0] & 0xFF));
-}
-
-
-// ******************************************
-// Math operations
-// ******************************************
-
-// Subtracts the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_f32(vsubq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or unsigned 32-bit integers of a. https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128_f32(vsubq_s32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-FORCE_INLINE __m128i _mm_sub_epi16(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s16(vsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
-}
-
-// Adds the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_f32(vaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// adds the scalar single-precision floating point values of a and b.  https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b)
-{
-	float32_t b0 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 0);
-	float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0);
-	//the upper values in the result must be the remnants of <a>.
-	return vreinterpretq_m128_f32(vaddq_f32(a, value));
-}
-
-// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or unsigned 32-bit integers in b. https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s32(vaddq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
-}
-
-// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s16(vaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
-}
-
-// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or unsigned 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s16(vmulq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
-}
-
-// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or unsigned 32-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s32(vmulq_s32(vreinterpretq_s32_m128i(a),vreinterpretq_s32_m128i(b)));
-}
-
-// Multiplies the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// Divides the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b)
-{
-	float32x4_t recip0 = vrecpeq_f32(vreinterpretq_f32_m128(b));
-	float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, vreinterpretq_f32_m128(b)));
-	return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), recip1));
-}
-
-// Divides the scalar single-precision floating point value of a by b.  https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b)
-{
-	float32_t value = vgetq_lane_f32(vreinterpretq_f32_m128(_mm_div_ps(a, b)), 0);
-	return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
-}
-
-// This version does additional iterations to improve accuracy.  Between 1 and 4 recommended.
-// Computes the approximations of reciprocals of the four single-precision, floating-point values of a. https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx
-FORCE_INLINE __m128 recipq_newton(__m128 in, int n)
-{
-	int i;
-	float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in));
-	for (i = 0; i < n; ++i)
-	{
-		recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
-	}
-	return vreinterpretq_m128_f32(recip);
-}
-
-// Computes the approximations of reciprocals of the four single-precision, floating-point values of a. https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_rcp_ps(__m128 in)
-{
-	float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in));
-	recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
-	return vreinterpretq_m128_f32(recip);
-}
-
-// Computes the approximations of square roots of the four single-precision, floating-point values of a. First computes reciprocal square roots and then reciprocals of the four values. https://msdn.microsoft.com/en-us/library/vstudio/8z67bwwk(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in)
-{
-	float32x4_t recipsq = vrsqrteq_f32(vreinterpretq_f32_m128(in));
-	float32x4_t sq = vrecpeq_f32(recipsq);
-	// ??? use step versions of both sqrt and recip for better accuracy?
-	return vreinterpretq_m128_f32(sq);
-}
-
-// Computes the approximation of the square root of the scalar single-precision floating point value of in.  https://msdn.microsoft.com/en-us/library/ahfsc22d(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in)
-{
-	float32_t value = vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0);
-	return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0));
-}
-
-// Computes the approximations of the reciprocal square roots of the four single-precision floating point values of in.  https://msdn.microsoft.com/en-us/library/22hfsh53(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in)
-{
-	return vreinterpretq_m128_f32(vrsqrteq_f32(vreinterpretq_f32_m128(in)));
-}
-
-// Computes the maximums of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/ff5d607a(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_f32(vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// Computes the minima of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/wh13kadz(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_f32(vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// Computes the maximum of the two lower scalar single-precision floating point values of a and b.  https://msdn.microsoft.com/en-us/library/s6db5esz(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b)
-{
-	float32_t value = vgetq_lane_f32(vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0);
-	return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
-}
-
-// Computes the minimum of the two lower scalar single-precision floating point values of a and b.  https://msdn.microsoft.com/en-us/library/0a9y7xaa(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b)
-{
-	float32_t value = vgetq_lane_f32(vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0);
-	return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0));
-}
-
-// Computes the pairwise minima of the 8 signed 16-bit integers from a and the 8 signed 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/6te997ew(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s16(vminq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)));
-}
-
-// epi versions of min/max
-// Computes the pariwise maximums of the four signed 32-bit integer values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/bb514055(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s32(vmaxq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
-}
-
-// Computes the pariwise minima of the four signed 32-bit integer values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/bb531476(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s32(vminq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
-}
-
-// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/59hddw1d(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
-{
-	/* apoty: issue with large values because of result saturation */
-	//int16x8_t ret = vqdmulhq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)); /* =2*a*b */
-	//return vreinterpretq_m128i_s16(vshrq_n_s16(ret, 1));
-	int16x4_t a3210 = vget_low_s16(vreinterpretq_s16_m128i(a));
-	int16x4_t b3210 = vget_low_s16(vreinterpretq_s16_m128i(b));
-	int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */
-	int16x4_t a7654 = vget_high_s16(vreinterpretq_s16_m128i(a));
-	int16x4_t b7654 = vget_high_s16(vreinterpretq_s16_m128i(b));
-	int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */
-	uint16x8x2_t r = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654));
-	return vreinterpretq_m128i_u16(r.val[1]);
-}
-
-// Computes pairwise add of each argument as single-precision, floating-point values a and b. 
-//https://msdn.microsoft.com/en-us/library/yd9wecaa.aspx
-FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b ) 
-{
-#if defined(__aarch64__)
-	return vreinterpretq_m128_f32(vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); //AArch64
-#else
-	float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a));
-	float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a));
-	float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b));
-	float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b));
-	return vreinterpretq_m128_f32(vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32)));
-#endif
-}
-
-// ******************************************
-// Compare operations
-// ******************************************
-
-// Compares for less than https://msdn.microsoft.com/en-us/library/vstudio/f330yhc8(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_u32(vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// Compares for greater than. https://msdn.microsoft.com/en-us/library/vstudio/11dy102s(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_u32(vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// Compares for greater than or equal. https://msdn.microsoft.com/en-us/library/vstudio/fs813y2t(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_u32(vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// Compares for less than or equal. https://msdn.microsoft.com/en-us/library/vstudio/1s75w83z(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_u32(vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// Compares for equality. https://msdn.microsoft.com/en-us/library/vstudio/36aectz5(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b)
-{
-	return vreinterpretq_m128_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-}
-
-// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers in b for less than. https://msdn.microsoft.com/en-us/library/vstudio/4ak0bf5d(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_u32(vcltq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
-}
-
-// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers in b for greater than. https://msdn.microsoft.com/en-us/library/vstudio/1s9f2z0y(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_u32(vcgtq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
-}
-
-// Compares the four 32-bit floats in a and b to check if any values are NaN. Ordered compare between each value returns true for "orderable" and false for "not orderable" (NaN). https://msdn.microsoft.com/en-us/library/vstudio/0h9w00fx(v=vs.100).aspx
-// see also:
-// http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean
-// http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics
-FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b ) 
-{
-	// Note: NEON does not have ordered compare builtin
-	// Need to compare a eq a and b eq b to check for NaN
-	// Do AND of results to get final
-	uint32x4_t ceqaa = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-	uint32x4_t ceqbb = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-	return vreinterpretq_m128_u32(vandq_u32(ceqaa, ceqbb));
-}
-
-// Compares the lower single-precision floating point scalar values of a and b using a less than operation. : https://msdn.microsoft.com/en-us/library/2kwe606b(v=vs.90).aspx
-// Important note!! The documentation on MSDN is incorrect!  If either of the values is a NAN the docs say you will get a one, but in fact, it will return a zero!!
-FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b)
-{
-	uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-	uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-	uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-	uint32x4_t a_lt_b = vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-	return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0) ? 1 : 0;
-}
-
-// Compares the lower single-precision floating point scalar values of a and b using a greater than operation. : https://msdn.microsoft.com/en-us/library/b0738e0t(v=vs.100).aspx
-FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b)
-{
-	//return vgetq_lane_u32(vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0);
-	uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-	uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-	uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-	uint32x4_t a_gt_b = vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-	return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0) ? 1 : 0;
-}
-
-// Compares the lower single-precision floating point scalar values of a and b using a less than or equal operation. : https://msdn.microsoft.com/en-us/library/1w4t7c57(v=vs.90).aspx
-FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b)
-{
-	//return vgetq_lane_u32(vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0);
-	uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-	uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-	uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-	uint32x4_t a_le_b = vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-	return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0) ? 1 : 0;
-}
-
-// Compares the lower single-precision floating point scalar values of a and b using a greater than or equal operation. : https://msdn.microsoft.com/en-us/library/8t80des6(v=vs.100).aspx
-FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b)
-{
-	//return vgetq_lane_u32(vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0);
-	uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-	uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-	uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-	uint32x4_t a_ge_b = vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-	return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0) ? 1 : 0;
-}
-
-// Compares the lower single-precision floating point scalar values of a and b using an equality operation. : https://msdn.microsoft.com/en-us/library/93yx2h2b(v=vs.100).aspx
-FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b)
-{
-	//return vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0);
-	uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-	uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-	uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan));
-	uint32x4_t a_eq_b = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b));
-	return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0) ? 1 : 0;
-}
-
-// Compares the lower single-precision floating point scalar values of a and b using an inequality operation. : https://msdn.microsoft.com/en-us/library/bafh5e0a(v=vs.90).aspx
-FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b)
-{
-	//return !vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0);
-	uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a));
-	uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b));
-	uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan);
-	uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)));
-	return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0) ? 1 : 0;
-}
-
-// according to the documentation, these intrinsics behave the same as the non-'u' versions.  We'll just alias them here.
-#define _mm_ucomilt_ss      _mm_comilt_ss
-#define _mm_ucomile_ss      _mm_comile_ss
-#define _mm_ucomigt_ss      _mm_comigt_ss
-#define _mm_ucomige_ss      _mm_comige_ss
-#define _mm_ucomieq_ss      _mm_comieq_ss
-#define _mm_ucomineq_ss     _mm_comineq_ss
-
-// ******************************************
-// Conversions
-// ******************************************
-
-// Converts the four single-precision, floating-point values of a to signed 32-bit integer values using truncate. https://msdn.microsoft.com/en-us/library/vstudio/1h005y6x(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a)
-{
-	return vreinterpretq_m128i_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)));
-}
-
-// Converts the four signed 32-bit integer values of a to single-precision, floating-point values https://msdn.microsoft.com/en-us/library/vstudio/36bwxcx5(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
-{
-	return vreinterpretq_m128_f32(vcvtq_f32_s32(vreinterpretq_s32_m128i(a)));
-}
-
-// Converts the four unsigned 8-bit integers in the lower 32 bits to four unsigned 32-bit integers. https://msdn.microsoft.com/en-us/library/bb531467%28v=vs.100%29.aspx
-FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a)
-{
-	uint8x16_t u8x16 = vreinterpretq_u8_s32(a);        /* xxxx xxxx xxxx DCBA */
-	uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16));   /* 0x0x 0x0x 0D0C 0B0A */
-	uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */
-	return vreinterpretq_s32_u32(u32x4);
-}
-
-// Converts the four signed 16-bit integers in the lower 64 bits to four signed 32-bit integers. https://msdn.microsoft.com/en-us/library/bb514079%28v=vs.100%29.aspx
-FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a)
-{
-	return vreinterpretq_m128i_s32(vmovl_s16(vget_low_s16(vreinterpretq_s16_m128i(a))));
-}
-
-// Converts the four single-precision, floating-point values of a to signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/xdc42k5e(v=vs.100).aspx
-// *NOTE*. The default rounding mode on SSE is 'round to even', which ArmV7 does not support!  
-// It is supported on ARMv8 however.
-FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
-{
-#if defined(__aarch64__)
-	return vcvtnq_s32_f32(a);
-#else
-    uint32x4_t signmask = vdupq_n_u32(0x80000000);
-    float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a), vdupq_n_f32(0.5f)); /* +/- 0.5 */
-    int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
-    int32x4_t r_trunc = vcvtq_s32_f32(vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
-    int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
-    int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
-    float32x4_t delta = vsubq_f32(vreinterpretq_f32_m128(a), vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
-    uint32x4_t is_delta_half = vceqq_f32(delta, half); /* delta == +/- 0.5 */
-    return vreinterpretq_m128i_s32(vbslq_s32(is_delta_half, r_even, r_normal));
-#endif
-}
-
-// Moves the least significant 32 bits of a to a 32-bit integer. https://msdn.microsoft.com/en-us/library/5z7a9642%28v=vs.90%29.aspx
-FORCE_INLINE int _mm_cvtsi128_si32(__m128i a)
-{
-	return vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0);
-}
-
-// Moves 32-bit integer a to the least significant 32 bits of an __m128 object, zero extending the upper bits. https://msdn.microsoft.com/en-us/library/ct3539ha%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_cvtsi32_si128(int a)
-{
-	return vreinterpretq_m128i_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0));
-}
-
-
-// Applies a type cast to reinterpret four 32-bit floating point values passed in as a 128-bit parameter as packed 32-bit integers. https://msdn.microsoft.com/en-us/library/bb514099.aspx
-FORCE_INLINE __m128i _mm_castps_si128(__m128 a)
-{
-	return vreinterpretq_m128i_s32(vreinterpretq_s32_m128(a));
-}
-
-// Applies a type cast to reinterpret four 32-bit integers passed in as a 128-bit parameter as packed 32-bit floating point values. https://msdn.microsoft.com/en-us/library/bb514029.aspx
-FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a)
-{
-	return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a));
-}
-
-// Loads 128-bit value. : https://msdn.microsoft.com/en-us/library/atzzad1h(v=vs.80).aspx
-FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
-{
-	return vreinterpretq_m128i_s32(vld1q_s32((int32_t *)p));
-}
-
-// ******************************************
-// Miscellaneous Operations
-// ******************************************
-
-// Packs the 16 signed 16-bit integers from a and b into 8-bit integers and saturates. https://msdn.microsoft.com/en-us/library/k4y4f7w5%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s8(vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)), vqmovn_s16(vreinterpretq_s16_m128i(b))));
-}
-
-// Packs the 16 signed 16 - bit integers from a and b into 8 - bit unsigned integers and saturates. https://msdn.microsoft.com/en-us/library/07ad1wx4(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b)
-{
-	return vreinterpretq_m128i_u8(vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)), vqmovun_s16(vreinterpretq_s16_m128i(b))));
-}
-
-// Packs the 8 signed 32-bit integers from a and b into signed 16-bit integers and saturates. https://msdn.microsoft.com/en-us/library/393t56f9%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b)
-{
-	return vreinterpretq_m128i_s16(vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)), vqmovn_s32(vreinterpretq_s32_m128i(b))));
-}
-
-// Interleaves the lower 8 signed or unsigned 8-bit integers in a with the lower 8 signed or unsigned 8-bit integers in b.  https://msdn.microsoft.com/en-us/library/xf7k860c%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
-{
-	int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(a)));
-	int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(b)));
-	int8x8x2_t result = vzip_s8(a1, b1);
-	return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1]));
-}
-
-// Interleaves the lower 4 signed or unsigned 16-bit integers in a with the lower 4 signed or unsigned 16-bit integers in b.  https://msdn.microsoft.com/en-us/library/btxb17bw%28v=vs.90%29.aspx
-FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
-{
-	int16x4_t a1 = vget_low_s16(vreinterpretq_s16_m128i(a));
-	int16x4_t b1 = vget_low_s16(vreinterpretq_s16_m128i(b));
-	int16x4x2_t result = vzip_s16(a1, b1);
-	return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1]));
-}
-
-// Interleaves the lower 2 signed or unsigned 32 - bit integers in a with the lower 2 signed or unsigned 32 - bit integers in b.  https://msdn.microsoft.com/en-us/library/x8atst9d(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
-{
-	int32x2_t a1 = vget_low_s32(vreinterpretq_s32_m128i(a));
-	int32x2_t b1 = vget_low_s32(vreinterpretq_s32_m128i(b));
-	int32x2x2_t result = vzip_s32(a1, b1);
-	return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1]));
-}
-
-// Selects and interleaves the lower two single-precision, floating-point values from a and b. https://msdn.microsoft.com/en-us/library/25st103b%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b)
-{
-	float32x2_t a1 = vget_low_f32(vreinterpretq_f32_m128(a));
-	float32x2_t b1 = vget_low_f32(vreinterpretq_f32_m128(b));
-	float32x2x2_t result = vzip_f32(a1, b1);
-	return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1]));
-}
-
-// Selects and interleaves the upper two single-precision, floating-point values from a and b. https://msdn.microsoft.com/en-us/library/skccxx7d%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b)
-{
-	float32x2_t a1 = vget_high_f32(vreinterpretq_f32_m128(a));
-	float32x2_t b1 = vget_high_f32(vreinterpretq_f32_m128(b));
-	float32x2x2_t result = vzip_f32(a1, b1);
-	return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1]));
-}
-
-// Interleaves the upper 8 signed or unsigned 8-bit integers in a with the upper 8 signed or unsigned 8-bit integers in b.  https://msdn.microsoft.com/en-us/library/t5h7783k(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
-{
-	int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(a)));
-	int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(b)));
-	int8x8x2_t result = vzip_s8(a1, b1);
-	return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1]));
-}
-
-// Interleaves the upper 4 signed or unsigned 16-bit integers in a with the upper 4 signed or unsigned 16-bit integers in b.  https://msdn.microsoft.com/en-us/library/03196cz7(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
-{
-	int16x4_t a1 = vget_high_s16(vreinterpretq_s16_m128i(a));
-	int16x4_t b1 = vget_high_s16(vreinterpretq_s16_m128i(b));
-	int16x4x2_t result = vzip_s16(a1, b1);
-	return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1]));
-}
-
-// Interleaves the upper 2 signed or unsigned 32-bit integers in a with the upper 2 signed or unsigned 32-bit integers in b.  https://msdn.microsoft.com/en-us/library/65sa7cbs(v=vs.100).aspx
-FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
-{
-	int32x2_t a1 = vget_high_s32(vreinterpretq_s32_m128i(a));
-	int32x2_t b1 = vget_high_s32(vreinterpretq_s32_m128i(b));
-	int32x2x2_t result = vzip_s32(a1, b1);
-	return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1]));
-}
-
-// Extracts the selected signed or unsigned 16-bit integer from a and zero extends.  https://msdn.microsoft.com/en-us/library/6dceta0c(v=vs.100).aspx
-//FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm)
-#define _mm_extract_epi16(a, imm) \
-({ \
-	(vgetq_lane_s16(vreinterpretq_s16_m128i(a), (imm)) & 0x0000ffffUL); \
-})
-
-// Inserts the least significant 16 bits of b into the selected 16-bit integer of a. https://msdn.microsoft.com/en-us/library/kaze8hz1%28v=vs.100%29.aspx
-//FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, const int b, __constrange(0,8) int imm)
-#define _mm_insert_epi16(a, b, imm) \
-({ \
-	vreinterpretq_m128i_s16(vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \
-})
-
-// ******************************************
-// Streaming Extensions
-// ******************************************
-
-// Guarantees that every preceding store is globally visible before any subsequent store.  https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx
-FORCE_INLINE void _mm_sfence(void)
-{
-	__sync_synchronize();
-}
-
-// Stores the data in a to the address p without polluting the caches.  If the cache line containing address p is already in the cache, the cache will be updated.Address p must be 16 - byte aligned.  https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx
-FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a)
-{
-	*p = a;
-}
-
-// Cache line containing p is flushed and invalidated from all caches in the coherency domain. : https://msdn.microsoft.com/en-us/library/ba08y07y(v=vs.100).aspx
-FORCE_INLINE void _mm_clflush(void const*p) 
-{
-	// no corollary for Neon?
-}
-
-#if defined(__GNUC__) || defined(__clang__)
-#	pragma pop_macro("ALIGN_STRUCT")
-#	pragma pop_macro("FORCE_INLINE")
-#endif
-
-#endif
diff --git a/src/crypto/argon2_hasher/common/DLLExport.h b/src/crypto/argon2_hasher/common/DLLExport.h
new file mode 100644
index 00000000..3019914f
--- /dev/null
+++ b/src/crypto/argon2_hasher/common/DLLExport.h
@@ -0,0 +1,16 @@
+//
+// Created by Haifa Bogdan Adnan on 04.11.2018.
+//
+
+#ifndef ARGON2_DLLEXPORT_H
+#define ARGON2_DLLEXPORT_H
+
+#undef DLLEXPORT
+
+#ifndef _WIN64
+	#define DLLEXPORT
+#else
+	#define DLLEXPORT __declspec(dllexport)
+#endif
+
+#endif //ARGON2_DLLEXPORT_H
diff --git a/src/crypto/argon2_hasher/common/DLLImport.h b/src/crypto/argon2_hasher/common/DLLImport.h
new file mode 100644
index 00000000..1946a4a2
--- /dev/null
+++ b/src/crypto/argon2_hasher/common/DLLImport.h
@@ -0,0 +1,16 @@
+//
+// Created by Haifa Bogdan Adnan on 04.11.2018.
+//
+
+#ifndef ARGON2_DLLIMPORT_H
+#define ARGON2_DLLIMPORT_H
+
+#ifndef DLLEXPORT
+    #ifndef _WIN64
+        #define DLLEXPORT
+    #else
+        #define DLLEXPORT __declspec(dllimport)
+    #endif
+#endif
+
+#endif //ARGON2_DLLIMPORT_H
diff --git a/src/crypto/argon2_hasher/common/common.cpp b/src/crypto/argon2_hasher/common/common.cpp
new file mode 100644
index 00000000..676e5a80
--- /dev/null
+++ b/src/crypto/argon2_hasher/common/common.cpp
@@ -0,0 +1,21 @@
+//
+// Created by Haifa Bogdan Adnan on 05/08/2018.
+//
+
+#include "DLLExport.h"
+#include "common.h"
+#include <dirent.h>
+
+vector<string> getFiles(const string &folder) {
+	vector<string> result;
+	DIR *dir;
+	struct dirent *ent;
+	if ((dir = opendir (folder.c_str())) != NULL) {
+		while ((ent = readdir (dir)) != NULL) {
+			if(ent->d_type == DT_REG)
+    			result.push_back(ent->d_name);
+		}
+		closedir (dir);
+	}
+	return result;
+}
diff --git a/src/crypto/argon2_hasher/common/common.h b/src/crypto/argon2_hasher/common/common.h
new file mode 100755
index 00000000..753716a0
--- /dev/null
+++ b/src/crypto/argon2_hasher/common/common.h
@@ -0,0 +1,56 @@
+//
+// Created by Haifa Bogdan Adnan on 04/08/2018.
+//
+
+#ifndef ARGON2_COMMON_H
+#define ARGON2_COMMON_H
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include <string>
+#include <vector>
+#include <queue>
+#include <list>
+#include <map>
+#include <iostream>
+#include <sstream>
+#include <fstream>
+#include <iomanip>
+#include <regex>
+#include <random>
+#include <algorithm>
+#include <thread>
+#include <mutex>
+#include <chrono>
+
+#include <cmath>
+#include <signal.h>
+
+#include <dlfcn.h>
+#include "DLLImport.h"
+
+#ifndef _WIN64
+#include <unistd.h>
+#include <sys/time.h>
+
+#include<sys/socket.h>
+#include<netdb.h>
+#include<arpa/inet.h>
+#include <fcntl.h>
+#else
+#include <win64.h>
+#endif
+
+#ifdef __APPLE__
+#include "../macosx/cpu_affinity.h"
+#endif
+
+using namespace std;
+
+#define LOG(msg) cout<<msg<<endl<<flush
+
+DLLEXPORT vector<string> getFiles(const string &folder);
+
+#endif //ARGON2_COMMON_H
diff --git a/src/crypto/argon2_hasher/crypt/base64.cpp b/src/crypto/argon2_hasher/crypt/base64.cpp
new file mode 100644
index 00000000..12975989
--- /dev/null
+++ b/src/crypto/argon2_hasher/crypt/base64.cpp
@@ -0,0 +1,103 @@
+//
+// Created by Haifa Bogdan Adnan on 17/08/2018.
+//
+
+#include "crypto/argon2_hasher/common/DLLExport.h"
+#include "../common/common.h"
+#include "base64.h"
+
+static const string base64_chars =
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+        "abcdefghijklmnopqrstuvwxyz"
+        "0123456789+/";
+
+static inline bool is_base64(unsigned char c) {
+        return (isalnum(c) || (c == '+') || (c == '/'));
+}
+
+void base64::encode(const char *input, int input_size, char *output) {
+        char *ret = output;
+        int i = 0;
+        int j = 0;
+        unsigned char char_array_3[3];
+        unsigned char char_array_4[4];
+
+        while (input_size--) {
+                char_array_3[i++] = *(input++);
+                if (i == 3) {
+                        char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+                        char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+                        char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+                        char_array_4[3] = char_array_3[2] & 0x3f;
+
+                        for(i = 0; (i <4) ; i++)
+                                *(ret++) = base64_chars[char_array_4[i]];
+                        i = 0;
+                }
+        }
+
+        if (i)
+        {
+                for(j = i; j < 3; j++)
+                        char_array_3[j] = '\0';
+
+                char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+                char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+                char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+                char_array_4[3] = char_array_3[2] & 0x3f;
+
+                for (j = 0; (j < i + 1); j++)
+                        *(ret++) = base64_chars[char_array_4[j]];
+
+                while((i++ < 3))
+                        *(ret++) = '=';
+
+        }
+}
+
+int base64::decode(const char *input, char *output, int output_size) {
+        size_t in_len = strlen(input);
+        int i = 0;
+        int j = 0;
+        int in_ = 0;
+        unsigned char char_array_4[4], char_array_3[3];
+        char *ret = output;
+        int out_size = 0;
+
+        while (in_len-- && ( input[in_] != '=') && is_base64(input[in_])) {
+                char_array_4[i++] = input[in_]; in_++;
+                if (i ==4) {
+                        for (i = 0; i <4; i++)
+                                char_array_4[i] = base64_chars.find(char_array_4[i]);
+
+                        char_array_3[0] = ( char_array_4[0] << 2       ) + ((char_array_4[1] & 0x30) >> 4);
+                        char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+                        char_array_3[2] = ((char_array_4[2] & 0x3) << 6) +   char_array_4[3];
+
+                        for (i = 0; (i < 3); i++) {
+                                out_size ++;
+                                if(output_size < out_size)
+                                        return -1;
+                                *(ret++) = char_array_3[i];
+                        }
+                        i = 0;
+                }
+        }
+
+        if (i) {
+                for (j = 0; j < i; j++)
+                        char_array_4[j] = base64_chars.find(char_array_4[j]);
+
+                char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+                char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+
+                for (j = 0; (j < i - 1); j++) {
+                        out_size ++;
+                        if(output_size < out_size)
+                                return -1;
+                        *(ret++) = char_array_3[j];
+                }
+        }
+        return out_size;
+}
+
diff --git a/src/crypto/argon2_hasher/crypt/base64.h b/src/crypto/argon2_hasher/crypt/base64.h
new file mode 100644
index 00000000..2ce74b88
--- /dev/null
+++ b/src/crypto/argon2_hasher/crypt/base64.h
@@ -0,0 +1,14 @@
+//
+// Created by Haifa Bogdan Adnan on 17/08/2018.
+//
+
+#ifndef ARGON2_BASE64_H
+#define ARGON2_BASE64_H
+
+class DLLEXPORT base64 {
+public:
+    static void encode(const char *input, int input_size, char *output);
+    static int decode(const char *input, char *output, int output_size);
+};
+
+#endif //ARGON2_BASE64_H
diff --git a/src/crypto/argon2_hasher/crypt/hex.cpp b/src/crypto/argon2_hasher/crypt/hex.cpp
new file mode 100644
index 00000000..e8a86312
--- /dev/null
+++ b/src/crypto/argon2_hasher/crypt/hex.cpp
@@ -0,0 +1,30 @@
+//
+// Created by Haifa Bogdan Adnan on 30/05/2019.
+//
+
+#include "crypto/argon2_hasher/common/DLLExport.h"
+#include "../common/common.h"
+#include "hex.h"
+
+void hex::encode(const unsigned char *input, int input_size, char *output) {
+    for ( int i=0; i<input_size; i++ ) {
+        char b1= *input >> 4;   // hi nybble
+        char b2= *input & 0x0f; // lo nybble
+        b1+='0'; if (b1>'9') b1 += 7;  // gap between '9' and 'A'
+        b2+='0'; if (b2>'9') b2 += 7;
+        *(output++)= b1;
+        *(output++) = b2;
+        input++;
+    }
+    *output = 0;
+}
+
+int hex::decode(const char *input, unsigned char *output, int output_size) {
+    size_t in_len = strlen(input);
+    for ( int i=0; i<in_len; i+=2 ) {
+        unsigned char b1= input[i] -'0'; if (b1>9) b1 -= 7;
+        unsigned char b2= input[i+1] -'0'; if (b2>9) b2 -= 7;
+        *(output++) = (b1<<4) + b2;  // <<4 multiplies by 16
+    }
+    return in_len / 2;
+}
diff --git a/src/crypto/argon2_hasher/crypt/hex.h b/src/crypto/argon2_hasher/crypt/hex.h
new file mode 100644
index 00000000..038f2f8e
--- /dev/null
+++ b/src/crypto/argon2_hasher/crypt/hex.h
@@ -0,0 +1,14 @@
+//
+// Created by Haifa Bogdan Adnan on 30/05/2019.
+//
+
+#ifndef ARGON2_HEX_H
+#define ARGON2_HEX_H
+
+class DLLEXPORT hex {
+public:
+    static void encode(const unsigned char *input, int input_size, char *output);
+    static int decode(const char *input, unsigned char *output, int output_size);
+};
+
+#endif //ARGON2_HEX_H
diff --git a/src/crypto/argon2_hasher/crypt/random_generator.cpp b/src/crypto/argon2_hasher/crypt/random_generator.cpp
new file mode 100644
index 00000000..a6801266
--- /dev/null
+++ b/src/crypto/argon2_hasher/crypt/random_generator.cpp
@@ -0,0 +1,27 @@
+//
+// Created by Haifa Bogdan Adnan on 17/08/2018.
+//
+
+#include "crypto/argon2_hasher/common/DLLExport.h"
+#include "../common/common.h"
+
+#include "random_generator.h"
+
+random_generator::random_generator() : __mt19937Gen(__randomDevice()), __mt19937Distr(0, 255) {
+
+}
+
+random_generator &random_generator::instance() {
+    return __instance;
+}
+
+void random_generator::get_random_data(unsigned char *buffer, int length) {
+//    __thread_lock.lock();
+    for(int i=0;i<length;i++) {
+        buffer[i] = (unsigned char)__mt19937Distr(__mt19937Gen);
+    }
+//    __thread_lock.unlock();
+}
+
+
+random_generator random_generator::__instance;
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/crypt/random_generator.h b/src/crypto/argon2_hasher/crypt/random_generator.h
new file mode 100644
index 00000000..f6438e43
--- /dev/null
+++ b/src/crypto/argon2_hasher/crypt/random_generator.h
@@ -0,0 +1,24 @@
+//
+// Created by Haifa Bogdan Adnan on 17/08/2018.
+//
+
+#ifndef ARGON2_RANDOM_GENERATOR_H
+#define ARGON2_RANDOM_GENERATOR_H
+
+class DLLEXPORT random_generator {
+public:
+    random_generator();
+    static random_generator &instance();
+
+    void get_random_data(unsigned char *buffer, int length);
+
+private:
+    random_device __randomDevice;
+    mt19937 __mt19937Gen;
+    uniform_int_distribution<> __mt19937Distr;
+    mutex __thread_lock;
+
+    static random_generator __instance;
+};
+
+#endif //ARGON2_RANDOM_GENERATOR_H
diff --git a/src/crypto/argon2_hasher/crypt/sha512.cpp b/src/crypto/argon2_hasher/crypt/sha512.cpp
new file mode 100644
index 00000000..d94ec1bb
--- /dev/null
+++ b/src/crypto/argon2_hasher/crypt/sha512.cpp
@@ -0,0 +1,152 @@
+#include "crypto/argon2_hasher/common/DLLExport.h"
+
+#include <cstring>
+#include <fstream>
+#include "sha512.h"
+
+const unsigned long long SHA512::sha512_k[80] = //ULL = uint64
+        {0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+         0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+         0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+         0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+         0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+         0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+         0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+         0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+         0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+         0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+         0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+         0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+         0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+         0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+         0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+         0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+         0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+         0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+         0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+         0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+         0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+         0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+         0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+         0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+         0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+         0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+         0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+         0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+         0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+         0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+         0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+         0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+         0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+         0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+         0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+         0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+         0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+         0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+         0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+         0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL};
+
+void SHA512::transform(const unsigned char *message, unsigned int block_nb)
+{
+    uint64 w[80];
+    uint64 wv[8];
+    uint64 t1, t2;
+    const unsigned char *sub_block;
+    int i, j;
+    for (i = 0; i < (int) block_nb; i++) {
+        sub_block = message + (i << 7);
+        for (j = 0; j < 16; j++) {
+            SHA2_PACK64(&sub_block[j << 3], &w[j]);
+        }
+        for (j = 16; j < 80; j++) {
+            w[j] =  SHA512_F4(w[j -  2]) + w[j -  7] + SHA512_F3(w[j - 15]) + w[j - 16];
+        }
+        for (j = 0; j < 8; j++) {
+            wv[j] = m_h[j];
+        }
+        for (j = 0; j < 80; j++) {
+            t1 = wv[7] + SHA512_F2(wv[4]) + SHA2_CH(wv[4], wv[5], wv[6])
+                 + sha512_k[j] + w[j];
+            t2 = SHA512_F1(wv[0]) + SHA2_MAJ(wv[0], wv[1], wv[2]);
+            wv[7] = wv[6];
+            wv[6] = wv[5];
+            wv[5] = wv[4];
+            wv[4] = wv[3] + t1;
+            wv[3] = wv[2];
+            wv[2] = wv[1];
+            wv[1] = wv[0];
+            wv[0] = t1 + t2;
+        }
+        for (j = 0; j < 8; j++) {
+            m_h[j] += wv[j];
+        }
+
+    }
+}
+
+void SHA512::init()
+{
+    m_h[0] = 0x6a09e667f3bcc908ULL;
+    m_h[1] = 0xbb67ae8584caa73bULL;
+    m_h[2] = 0x3c6ef372fe94f82bULL;
+    m_h[3] = 0xa54ff53a5f1d36f1ULL;
+    m_h[4] = 0x510e527fade682d1ULL;
+    m_h[5] = 0x9b05688c2b3e6c1fULL;
+    m_h[6] = 0x1f83d9abfb41bd6bULL;
+    m_h[7] = 0x5be0cd19137e2179ULL;
+    m_len = 0;
+    m_tot_len = 0;
+}
+
+void SHA512::update(const unsigned char *message, unsigned int len)
+{
+    unsigned int block_nb;
+    unsigned int new_len, rem_len, tmp_len;
+    const unsigned char *shifted_message;
+    tmp_len = SHA384_512_BLOCK_SIZE - m_len;
+    rem_len = len < tmp_len ? len : tmp_len;
+    memcpy(&m_block[m_len], message, rem_len);
+    if (m_len + len < SHA384_512_BLOCK_SIZE) {
+        m_len += len;
+        return;
+    }
+    new_len = len - rem_len;
+    block_nb = new_len / SHA384_512_BLOCK_SIZE;
+    shifted_message = message + rem_len;
+    transform(m_block, 1);
+    transform(shifted_message, block_nb);
+    rem_len = new_len % SHA384_512_BLOCK_SIZE;
+    memcpy(m_block, &shifted_message[block_nb << 7], rem_len);
+    m_len = rem_len;
+    m_tot_len += (block_nb + 1) << 7;
+}
+
+void SHA512::final(unsigned char *digest)
+{
+    unsigned int block_nb;
+    unsigned int pm_len;
+    unsigned int len_b;
+    int i;
+    block_nb = 1 + ((SHA384_512_BLOCK_SIZE - 17)
+                    < (m_len % SHA384_512_BLOCK_SIZE));
+    len_b = (m_tot_len + m_len) << 3;
+    pm_len = block_nb << 7;
+    memset(m_block + m_len, 0, pm_len - m_len);
+    m_block[m_len] = 0x80;
+    SHA2_UNPACK32(len_b, m_block + pm_len - 4);
+    transform(m_block, block_nb);
+    for (i = 0 ; i < 8; i++) {
+        SHA2_UNPACK64(m_h[i], &digest[i << 3]);
+    }
+}
+
+unsigned char *SHA512::hash(unsigned char *input, size_t length)
+{
+    unsigned char *digest = (unsigned char*)malloc(SHA512::DIGEST_SIZE);
+    memset(digest,0,SHA512::DIGEST_SIZE);
+    SHA512 ctx = SHA512();
+    ctx.init();
+    ctx.update(input, length);
+    ctx.final(digest);
+    return digest;
+}
diff --git a/src/crypto/argon2_hasher/crypt/sha512.h b/src/crypto/argon2_hasher/crypt/sha512.h
new file mode 100644
index 00000000..5eb28326
--- /dev/null
+++ b/src/crypto/argon2_hasher/crypt/sha512.h
@@ -0,0 +1,70 @@
+#ifndef SHA512_H
+#define SHA512_H
+
+#include <string>
+
+class DLLEXPORT SHA512
+{
+protected:
+    typedef unsigned char uint8;
+    typedef unsigned int uint32;
+    typedef unsigned long long uint64;
+
+    const static uint64 sha512_k[];
+    static const unsigned int SHA384_512_BLOCK_SIZE = (1024/8);
+
+public:
+    void init();
+    void update(const unsigned char *message, unsigned int len);
+    void final(unsigned char *digest);
+    static const unsigned int DIGEST_SIZE = ( 512 / 8);
+
+    static unsigned char *hash(unsigned char *input, size_t length);
+protected:
+    void transform(const unsigned char *message, unsigned int block_nb);
+    unsigned int m_tot_len;
+    unsigned int m_len;
+    unsigned char m_block[2 * SHA384_512_BLOCK_SIZE];
+    uint64 m_h[8];
+};
+
+#define SHA2_SHFR(x, n)    (x >> n)
+#define SHA2_ROTR(x, n)   ((x >> n) | (x << ((sizeof(x) << 3) - n)))
+#define SHA2_ROTL(x, n)   ((x << n) | (x >> ((sizeof(x) << 3) - n)))
+#define SHA2_CH(x, y, z)  ((x & y) ^ (~x & z))
+#define SHA2_MAJ(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
+#define SHA512_F1(x) (SHA2_ROTR(x, 28) ^ SHA2_ROTR(x, 34) ^ SHA2_ROTR(x, 39))
+#define SHA512_F2(x) (SHA2_ROTR(x, 14) ^ SHA2_ROTR(x, 18) ^ SHA2_ROTR(x, 41))
+#define SHA512_F3(x) (SHA2_ROTR(x,  1) ^ SHA2_ROTR(x,  8) ^ SHA2_SHFR(x,  7))
+#define SHA512_F4(x) (SHA2_ROTR(x, 19) ^ SHA2_ROTR(x, 61) ^ SHA2_SHFR(x,  6))
+#define SHA2_UNPACK32(x, str)                 \
+{                                             \
+*((str) + 3) = (uint8) ((x)      );       \
+*((str) + 2) = (uint8) ((x) >>  8);       \
+*((str) + 1) = (uint8) ((x) >> 16);       \
+*((str) + 0) = (uint8) ((x) >> 24);       \
+}
+#define SHA2_UNPACK64(x, str)                 \
+{                                             \
+*((str) + 7) = (uint8) ((x)      );       \
+*((str) + 6) = (uint8) ((x) >>  8);       \
+*((str) + 5) = (uint8) ((x) >> 16);       \
+*((str) + 4) = (uint8) ((x) >> 24);       \
+*((str) + 3) = (uint8) ((x) >> 32);       \
+*((str) + 2) = (uint8) ((x) >> 40);       \
+*((str) + 1) = (uint8) ((x) >> 48);       \
+*((str) + 0) = (uint8) ((x) >> 56);       \
+}
+#define SHA2_PACK64(str, x)                   \
+{                                             \
+*(x) =   ((uint64) *((str) + 7)      )    \
+| ((uint64) *((str) + 6) <<  8)    \
+| ((uint64) *((str) + 5) << 16)    \
+| ((uint64) *((str) + 4) << 24)    \
+| ((uint64) *((str) + 3) << 32)    \
+| ((uint64) *((str) + 2) << 40)    \
+| ((uint64) *((str) + 1) << 48)    \
+| ((uint64) *((str) + 0) << 56);   \
+}
+
+#endif
diff --git a/src/crypto/argon2_hasher/hash/Hasher.cpp b/src/crypto/argon2_hasher/hash/Hasher.cpp
new file mode 100755
index 00000000..cea64052
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/Hasher.cpp
@@ -0,0 +1,132 @@
+//
+// Created by Haifa Bogdan Adnan on 03/08/2018.
+//
+
+#include "../common/common.h"
+#include "../crypt/base64.h"
+#include "../crypt/hex.h"
+#include "../crypt/random_generator.h"
+
+#include "crypto/argon2_hasher/common/DLLExport.h"
+#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
+#include "Hasher.h"
+
+vector<Hasher *> *Hasher::m_registeredHashers = NULL;
+string Hasher::m_appFolder = "";
+
+typedef void (*hasherLoader)();
+
+Hasher::Hasher() {
+    m_intensity = 0;
+    m_type = "";
+	m_subType = "";
+	m_shortSubType = "";
+    m_description = "";
+
+    m_computingThreads = 1;
+
+    if(m_registeredHashers == NULL) {
+        m_registeredHashers = new vector<Hasher*>();
+    }
+
+    m_registeredHashers->push_back(this);
+}
+
+Hasher::~Hasher() {};
+
+string Hasher::type() {
+	return m_type;
+}
+
+string Hasher::subType(bool shortName) {
+    if(shortName && !(m_shortSubType.empty())) {
+        string shortVersion = m_shortSubType;
+        shortVersion.erase(3);
+        return shortVersion;
+    }
+    else
+    	return m_subType;
+}
+
+string Hasher::info() {
+    return m_description;
+}
+
+int Hasher::computingThreads() {
+    return m_computingThreads;
+}
+
+void Hasher::loadHashers(const string &appPath) {
+    m_registeredHashers = new vector<Hasher*>();
+
+    string modulePath = ".";
+
+    size_t lastSlash = appPath.find_last_of("/\\");
+    if (lastSlash != string::npos) {
+        modulePath = appPath.substr(0, lastSlash);
+        if(modulePath.empty()) {
+            modulePath = ".";
+        }
+    }
+
+    m_appFolder = modulePath;
+
+    modulePath += "/modules/";
+
+    vector<string> files = getFiles(modulePath);
+    for(string file : files) {
+        if(file.find(".hsh") != string::npos) {
+            void *dllHandle = dlopen((modulePath + file).c_str(), RTLD_LAZY);
+            if(dllHandle != NULL) {
+                hasherLoader hasherLoaderPtr = (hasherLoader) dlsym(dllHandle, "hasherLoader");
+                (*hasherLoaderPtr)();
+            }
+        }
+    }
+}
+
+vector<Hasher *> Hasher::getHashers() {
+    return *m_registeredHashers;
+}
+
+vector<Hasher *> Hasher::getActiveHashers() {
+    vector<Hasher *> filtered;
+    for(Hasher *hasher : *m_registeredHashers) {
+        if(hasher->m_intensity != 0)
+            filtered.push_back(hasher);
+    }
+    return filtered;
+}
+
+vector<Hasher *> Hasher::getHashers(const string &type) {
+    vector<Hasher *> filtered;
+    for(Hasher *hasher : *m_registeredHashers) {
+        if(hasher->m_type == type)
+            filtered.push_back(hasher);
+    }
+    return filtered;
+}
+
+map<int, DeviceInfo> &Hasher::devices() {
+    return m_deviceInfos;
+}
+
+void Hasher::storeDeviceInfo(int deviceId, DeviceInfo device) {
+    m_deviceInfosMutex.lock();
+    m_deviceInfos[deviceId] = device;
+    m_deviceInfosMutex.unlock();
+}
+
+Argon2Profile *Hasher::getArgon2Profile(xmrig::Algo algorithm, xmrig::Variant variant) {
+    if(algorithm == xmrig::ARGON2) {
+        switch(variant) {
+            case xmrig::VARIANT_CHUKWA:
+                return &argon2profile_3_1_512;
+            case xmrig::VARIANT_CHUKWA_LITE:
+                return &argon2profile_4_1_256;
+            default:
+                return nullptr;
+        }
+    }
+    return nullptr;
+}
diff --git a/src/crypto/argon2_hasher/hash/Hasher.h b/src/crypto/argon2_hasher/hash/Hasher.h
new file mode 100755
index 00000000..3f0c1b86
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/Hasher.h
@@ -0,0 +1,63 @@
+//
+// Created by Haifa Bogdan Adnan on 03/08/2018.
+//
+
+#ifndef ARGON2_HASHER_H
+#define ARGON2_HASHER_H
+
+#include "crypto/argon2_hasher/hash/argon2/Defs.h"
+#include "../../../core/HasherConfig.h"
+#include "../../../common/xmrig.h"
+
+struct DeviceInfo {
+	string name;
+	string bus_id;
+	double intensity;
+};
+
+#define REGISTER_HASHER(x)        extern "C"  { DLLEXPORT void hasherLoader() { x *instance = new x(); } }
+
+class DLLEXPORT Hasher {
+public:
+    Hasher();
+    virtual ~Hasher();
+
+    virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant) = 0;
+    virtual bool configure(xmrig::HasherConfig &config) = 0;
+    virtual void cleanup() = 0;
+    virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) = 0;
+    virtual size_t parallelism(int workerIdx) = 0;
+    virtual size_t deviceCount() = 0;
+
+    string type();
+	string subType(bool shortName = false);
+
+    string info();
+    int computingThreads();
+
+    map<int, DeviceInfo> &devices();
+
+    static vector<Hasher*> getHashers(const string &type);
+    static vector<Hasher*> getHashers();
+    static vector<Hasher*> getActiveHashers();
+    static void loadHashers(const string &appPath);
+
+protected:
+    double m_intensity;
+    string m_type;
+	string m_subType;
+	string m_shortSubType; //max 3 characters
+    string m_description;
+    int m_computingThreads;
+    static string m_appFolder;
+
+	void storeDeviceInfo(int deviceId, DeviceInfo device);
+    Argon2Profile *getArgon2Profile(xmrig::Algo algorithm, xmrig::Variant variant);
+
+private:
+    static vector<Hasher*> *m_registeredHashers;
+    map<int, DeviceInfo> m_deviceInfos;
+    mutex m_deviceInfosMutex;
+};
+
+#endif //ARGON2_HASHER_H
diff --git a/src/crypto/argon2_hasher/hash/argon2/Argon2.cpp b/src/crypto/argon2_hasher/hash/argon2/Argon2.cpp
new file mode 100755
index 00000000..7accf8c0
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/Argon2.cpp
@@ -0,0 +1,143 @@
+//
+// Created by Haifa Bogdan Adnan on 05/08/2018.
+//
+
+#include "../../common/common.h"
+#include "../../crypt/base64.h"
+#include "../../crypt/hex.h"
+#include "../../crypt/random_generator.h"
+
+#include "blake2/blake2.h"
+#include "../../common/DLLExport.h"
+#include "../../../Argon2_constants.h"
+#include "Argon2.h"
+#include "Defs.h"
+
+Argon2::Argon2(argon2BlocksPrehash prehash, argon2BlocksFillerPtr filler, argon2BlocksPosthash posthash, void *memory, void *userData) {
+    m_prehash = prehash;
+    m_filler = filler;
+    m_posthash = posthash;
+    m_outputMemory = m_seedMemory = (uint8_t*)memory;
+    m_userData = userData;
+    m_threads = 1;
+}
+
+int Argon2::generateHashes(const Argon2Profile &profile, HashData &hashData) {
+    if(initializeSeeds(profile, hashData)) {
+        if(fillBlocks(profile)) {
+            return encodeHashes(profile, hashData);
+        }
+    }
+
+    return 0;
+}
+
+bool Argon2::initializeSeeds(const Argon2Profile &profile, HashData &hashData) {
+    if(m_prehash != NULL) {
+        return (*m_prehash)(hashData.input, m_threads, (Argon2Profile*)&profile, m_userData);
+    }
+    else {
+        uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
+
+        for (int i = 0; i < m_threads; i++, (*(nonce(hashData)))++) {
+            initialHash(profile, blockhash, (char *) hashData.input, hashData.inSize, xmrig::ARGON2_HASHLEN);
+
+            memset(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0,
+                   ARGON2_PREHASH_SEED_LENGTH -
+                   ARGON2_PREHASH_DIGEST_LENGTH);
+
+            fillFirstBlocks(profile, blockhash, i);
+        }
+
+        return true;
+    }
+}
+
+bool Argon2::fillBlocks(const Argon2Profile &profile) {
+    m_outputMemory = (uint8_t *)(*m_filler) (m_threads, (Argon2Profile*)&profile, m_userData);
+    return m_outputMemory != NULL;
+}
+
+int Argon2::encodeHashes(const Argon2Profile &profile, HashData &hashData) {
+    if(m_posthash != NULL) {
+        if((*m_posthash)(hashData.output, m_threads, (Argon2Profile*)&profile, m_userData)) {
+            return m_threads;
+        }
+        return 0;
+    }
+    else {
+        if (m_outputMemory != NULL) {
+            uint32_t nonceInfo = *(nonce(hashData)) - m_threads;
+
+            for (int i = 0; i < m_threads; i++, nonceInfo++) {
+                blake2b_long((void *) (hashData.output + i * hashData.outSize), xmrig::ARGON2_HASHLEN,
+                             (void *) (m_outputMemory + i * profile.memSize), ARGON2_BLOCK_SIZE);
+                memcpy(hashData.output + i * hashData.outSize + xmrig::ARGON2_HASHLEN, &nonceInfo, 4);
+            }
+            return m_threads;
+        }
+        else
+            return 0;
+    }
+}
+
+void Argon2::initialHash(const Argon2Profile &profile, uint8_t *blockhash, const char *data, size_t dataSz,size_t outSz) {
+    blake2b_state BlakeHash;
+    uint32_t value;
+
+    blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
+
+    value = profile.thrCost;
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+    value = outSz;
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+    value = profile.memCost;
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+    value = profile.tmCost;
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+    value = ARGON2_VERSION;
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+    value = ARGON2_TYPE_VALUE;
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+    value = (uint32_t)dataSz;
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+    blake2b_update(&BlakeHash, (const uint8_t *)data, dataSz);
+
+    value = xmrig::ARGON2_SALTLEN;
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+    blake2b_update(&BlakeHash, (const uint8_t *)data, xmrig::ARGON2_SALTLEN);
+
+    value = 0;
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+    blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+    blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
+}
+
+void Argon2::fillFirstBlocks(const Argon2Profile &profile, uint8_t *blockhash, int thread) {
+    block *blocks = (block *)(m_seedMemory + thread * profile.memSize);
+    size_t lane_length = profile.memCost / profile.thrCost;
+
+    for (uint32_t l = 0; l < profile.thrCost; ++l) {
+        *((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH)) = 0;
+        *((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4)) = l;
+
+        blake2b_long((void *)(blocks + l * lane_length), ARGON2_BLOCK_SIZE, blockhash,
+                     ARGON2_PREHASH_SEED_LENGTH);
+
+        *((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH)) = 1;
+
+        blake2b_long((void *)(blocks + l * lane_length + 1), ARGON2_BLOCK_SIZE, blockhash,
+                     ARGON2_PREHASH_SEED_LENGTH);
+    }
+}
+
+void Argon2::setThreads(int threads) {
+    m_threads = threads;
+}
diff --git a/src/crypto/argon2_hasher/hash/argon2/Argon2.h b/src/crypto/argon2_hasher/hash/argon2/Argon2.h
new file mode 100644
index 00000000..90e72d53
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/Argon2.h
@@ -0,0 +1,56 @@
+//
+// Created by Haifa Bogdan Adnan on 05/08/2018.
+//
+
+#ifndef ARIOMINER_ARGON2_H
+#define ARIOMINER_ARGON2_H
+
+#include "Defs.h"
+#include "crypto/argon2_hasher/hash/Hasher.h"
+
+typedef bool (*argon2BlocksPrehash)(void *, int, Argon2Profile *, void *); // data_memory
+typedef void *(*argon2BlocksFillerPtr)(int, Argon2Profile *, void *);
+typedef bool (*argon2BlocksPosthash)(void *, int, Argon2Profile *, void *); // raw_hash_mem
+
+struct HashData {
+    uint8_t *input;
+    uint8_t *output;
+    size_t inSize;
+    size_t outSize;
+};
+
+class DLLEXPORT Argon2 {
+public:
+    Argon2(argon2BlocksPrehash prehash, argon2BlocksFillerPtr filler, argon2BlocksPosthash posthash, void *memory, void *userData);
+
+    int generateHashes(const Argon2Profile &profile, HashData &hashData);
+
+    bool initializeSeeds(const Argon2Profile &profile, HashData &hashData);
+    bool fillBlocks(const Argon2Profile &profile);
+	int encodeHashes(const Argon2Profile &profile, HashData &hashData);
+
+    void setThreads(int threads);
+
+private:
+    void initialHash(const Argon2Profile &profile, uint8_t *blockhash, const char *data, size_t dataSz, size_t outSz);
+    void fillFirstBlocks(const Argon2Profile &profile, uint8_t *blockhash, int thread);
+
+    inline uint32_t *nonce(HashData &hashData)
+    {
+        return reinterpret_cast<uint32_t*>(hashData.input + 39);
+    }
+
+	argon2BlocksPrehash m_prehash;
+	argon2BlocksFillerPtr m_filler;
+	argon2BlocksPosthash m_posthash;
+
+    int m_threads;
+
+    uint8_t *m_seedMemory;
+	uint8_t *m_outputMemory;
+
+    void *m_userData;
+};
+
+
+#endif //ARIOMINER_ARGON2_H
diff --git a/src/crypto/argon2_hasher/hash/argon2/Defs.h b/src/crypto/argon2_hasher/hash/argon2/Defs.h
new file mode 100755
index 00000000..3f6b7181
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/Defs.h
@@ -0,0 +1,50 @@
+//
+// Created by Haifa Bogdan Adnan on 06/08/2018.
+//
+
+#ifndef ARIOMINER_DEFS_H
+#define ARIOMINER_DEFS_H
+
+#define ARGON2_RAW_LENGTH               32
+#define ARGON2_TYPE_VALUE               2
+#define ARGON2_VERSION                  0x13
+
+#define ARGON2_BLOCK_SIZE               1024
+#define ARGON2_DWORDS_IN_BLOCK          ARGON2_BLOCK_SIZE / 4
+#define ARGON2_QWORDS_IN_BLOCK          ARGON2_BLOCK_SIZE / 8
+#define ARGON2_OWORDS_IN_BLOCK          ARGON2_BLOCK_SIZE / 16
+#define ARGON2_HWORDS_IN_BLOCK          ARGON2_BLOCK_SIZE / 32
+#define ARGON2_512BIT_WORDS_IN_BLOCK    ARGON2_BLOCK_SIZE / 64
+#define ARGON2_PREHASH_DIGEST_LENGTH    64
+#define ARGON2_PREHASH_SEED_LENGTH      72
+
+#ifdef __cplusplus 
+extern "C" {
+#endif
+
+typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
+
+typedef struct Argon2Profile_ {
+    uint32_t memCost;
+    uint32_t thrCost;
+    uint32_t tmCost;
+    size_t memSize;
+    int32_t *blockRefs;
+    size_t blockRefsSize;
+    char profileName[15];
+    int32_t *segments; // { start segment / current block, stop segment (excluding) / previous block, addressing type = 0 -> i, 1 -> d }
+    uint32_t segSize;
+    uint32_t segCount;
+    uint32_t succesiveIdxs; // 0 - idx are precalculated, 1 - idx are successive
+    int pwdLen; // in dwords
+    int saltLen; // in dwords
+} Argon2Profile;
+
+extern DLLEXPORT Argon2Profile argon2profile_3_1_512;
+extern DLLEXPORT Argon2Profile argon2profile_4_1_256;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //ARIOMINER_DEFS_H
diff --git a/src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c b/src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c
new file mode 100644
index 00000000..9a0cbfa3
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c
@@ -0,0 +1,292 @@
+#include <stdint.h>
+#include <stddef.h>
+#include "../../common/DLLExport.h"
+#include "Defs.h"
+
+int32_t blocks_refs_3_1_512[] = {
+        2, 0, 1,
+        3, 1, 1,
+        4, 2, 1,
+        5, 3, 1,
+        6, 3, 1,
+        7, 3, 1,
+        8, 2, 1,
+        9, 5, 1,
+        10, 0, 1,
+        11, 9, 1,
+        12, 10, 1,
+        13, 9, 1,
+        14, 12, 1,
+        15, 8, 1,
+        16, 5, 1,
+        17, 15, 1,
+        18, 10, 1,
+        19, 14, 1,
+        20, 7, 1,
+        21, 19, 1,
+        22, 14, 1,
+        23, 7, 1,
+        24, 14, 1,
+        25, 23, 1,
+        26, 24, 1,
+        27, 0, 1,
+        28, 9, 1,
+        29, 11, 1,
+        30, 12, 1,
+        31, 29, 1,
+        32, 12, 1,
+        33, 23, 1,
+        34, 30, 1,
+        35, 1, 1,
+        36, 32, 1,
+        37, 8, 1,
+        38, 30, 1,
+        39, 31, 1,
+        40, 15, 1,
+        41, 38, 1,
+        42, 29, 1,
+        43, 18, 1,
+        44, 33, 1,
+        45, 18, 1,
+        46, 39, 1,
+        47, 43, 1,
+        48, 40, 1,
+        49, 38, 1,
+        50, 5, 1,
+        51, 47, 1,
+        52, 14, 1,
+        53, 45, 1,
+        54, 30, 1,
+        55, 13, 1,
+        56, 47, 1,
+        57, 30, 1,
+        58, 21, 1,
+        59, 18, 1,
+        60, 36, 1,
+        61, 58, 1,
+        62, 58, 1,
+        63, 19, 1,
+        64, 59, 1,
+        65, 29, 1,
+        66, 10, 1,
+        67, 48, 1,
+        68, 39, 1,
+        69, 25, 1,
+        70, 63, 1,
+        71, 57, 1,
+        72, 70, 1,
+        73, 16, 1,
+        74, 20, 1,
+        75, 72, 1,
+        76, 67, 1,
+        77, 61, 1,
+        78, 49, 1,
+        79, 63, 1,
+        80, 9, 1,
+        81, 19, 1,
+        82, 80, 1,
+        83, 36, 1,
+        84, 20, 1,
+        85, 23, 1,
+        86, 52, 1,
+        87, 85, 1,
+        88, 75, 1,
+        89, 18, 1,
+        90, 85, 1,
+        91, 2, 1,
+        92, 81, 1,
+        93, 91, 1,
+        94, 91, 1,
+        95, 3, 1,
+        96, 45, 1,
+        97, 16, 1,
+        98, 11, 1,
+        99, 60, 1,
+        100, 89, 1,
+        101, 65, 1,
+        102, 39, 1,
+        103, 63, 1,
+        104, 66, 1,
+        105, 74, 1,
+        106, 54, 1,
+        107, 88, 1,
+        108, 106, 1,
+        109, 107, 1,
+        110, 47, 1,
+        111, 8, 1,
+        112, 95, 1,
+        113, 66, 1,
+        114, 1, 1,
+        115, 2, 1,
+        116, 20, 1,
+        117, 110, 1,
+        118, 47, 1,
+        119, 117, 1,
+        120, 114, 1,
+        121, 37, 1,
+        122, 71, 1,
+        123, 51, 1,
+        124, 122, 1,
+        125, 44, 1,
+        126, 92, 1,
+        127, 120, 1,
+        128, 123, 1,
+        129, 127, 1,
+        130, 11, 1,
+        131, 110, 1,
+        132, 93, 1,
+        133, 20, 1,
+        134, 58, 1,
+        135, 13, 1,
+        136, 73, 1,
+        137, 27, 1,
+        138, 94, 1,
+        139, 110, 1,
+        140, 96, 1,
+        141, 57, 1,
+        142, 137, 1,
+        143, 116, 1,
+        144, 119, 1,
+        145, 141, 1,
+        146, 73, 1,
+        147, 26, 1,
+        148, 103, 1,
+        149, 125, 1,
+        150, 146, 1,
+        151, 149, 1,
+        152, 28, 1,
+        153, 149, 1,
+        154, 125, 1,
+        155, 104, 1,
+        156, 61, 1,
+        157, 128, 1,
+        158, 156, 1,
+        159, 122, 1,
+        160, 96, 1,
+        161, 92, 1,
+        162, 160, 1,
+        163, 154, 1,
+        164, 88, 1,
+        165, 160, 1,
+        166, 134, 1,
+        167, 116, 1,
+        168, 23, 1,
+        169, 167, 1,
+        170, 100, 1,
+        171, 169, 1,
+        172, 169, 1,
+        173, 127, 1,
+        174, 0, 1,
+        175, 78, 1,
+        176, 155, 1,
+        177, 124, 1,
+        178, 138, 1,
+        179, 41, 1,
+        180, 156, 1,
+        181, 173, 1,
+        182, 122, 1,
+        183, 173, 1,
+        184, 112, 1,
+        185, 15, 1,
+        186, 183, 1,
+        187, 171, 1,
+        188, 163, 1,
+        189, 85, 1,
+        190, 45, 1,
+        191, 171, 1,
+        192, 139, 1,
+        193, 188, 1,
+        194, 192, 1,
+        195, 78, 1,
+        196, 5, 1,
+        197, 187, 1,
+        198, 180, 1,
+        199, 195, 1,
+        200, 102, 1,
+        201, 89, 1,
+        202, 165, 1,
+        203, 144, 1,
+        204, 171, 1,
+        205, 152, 1,
+        206, 53, 1,
+        207, 19, 1,
+        208, 206, 1,
+        209, 165, 1,
+        210, 208, 1,
+        211, 76, 1,
+        212, 177, 1,
+        213, 189, 1,
+        214, 43, 1,
+        215, 120, 1,
+        216, 122, 1,
+        217, 189, 1,
+        218, 45, 1,
+        219, 217, 1,
+        220, 207, 1,
+        221, 202, 1,
+        222, 169, 1,
+        223, 194, 1,
+        224, 213, 1,
+        225, 178, 1,
+        226, 175, 1,
+        227, 221, 1,
+        228, 212, 1,
+        229, 220, 1,
+        230, 227, 1,
+        231, 30, 1,
+        232, 34, 1,
+        233, 91, 1,
+        234, 231, 1,
+        235, 154, 1,
+        236, 100, 1,
+        237, 166, 1,
+        238, 216, 1,
+        239, 229, 1,
+        240, 177, 1,
+        241, 123, 1,
+        242, 172, 1,
+        243, 71, 1,
+        244, 241, 1,
+        245, 236, 1,
+        246, 109, 1,
+        247, 4, 1,
+        248, 246, 1,
+        249, 166, 1,
+        250, 248, 1,
+        251, 243, 1,
+        252, 248, 1,
+        253, 39, 1,
+        254, 98, 1,
+        255, 253, 1
+};
+
+int32_t segments_3_1_512[] = { // current_idx, previous_idx, seg_type 0=i 1=d
+        2, 1, 0,
+        128, 127, 0,
+        256, 255, 1,
+        384, 383, 1,
+        0, 511, 1,
+        128, 127, 1,
+        256, 255, 1,
+        384, 383, 1,
+        0, 511, 1,
+        128, 127, 1,
+        256, 255, 1,
+        384, 383, 1
+};
+
+DLLEXPORT Argon2Profile argon2profile_3_1_512 = {
+        512,
+        1,
+        3,
+        524288, //256 blocks of 1024 bytes
+        blocks_refs_3_1_512,
+        sizeof(blocks_refs_3_1_512) / (3 * sizeof(int32_t)),
+        "3_1_512",
+        segments_3_1_512,
+        128,
+        12,
+        1,
+        32,
+        4
+};
diff --git a/src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c b/src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c
new file mode 100644
index 00000000..59890c49
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c
@@ -0,0 +1,168 @@
+#include <stdint.h>
+#include <stddef.h>
+#include "../../common/DLLExport.h"
+#include "Defs.h"
+
+int32_t blocks_refs_4_1_256[] = {
+        2, 0, 1,
+        3, 1, 1,
+        4, 2, 1,
+        5, 3, 1,
+        6, 0, 1,
+        7, 4, 1,
+        8, 5, 1,
+        9, 7, 1,
+        10, 7, 1,
+        11, 9, 1,
+        12, 5, 1,
+        13, 11, 1,
+        14, 3, 1,
+        15, 2, 1,
+        16, 12, 1,
+        17, 15, 1,
+        18, 15, 1,
+        19, 10, 1,
+        20, 4, 1,
+        21, 18, 1,
+        22, 17, 1,
+        23, 19, 1,
+        24, 2, 1,
+        25, 23, 1,
+        26, 22, 1,
+        27, 12, 1,
+        28, 23, 1,
+        29, 27, 1,
+        30, 26, 1,
+        31, 19, 1,
+        32, 27, 1,
+        33, 29, 1,
+        34, 32, 1,
+        35, 18, 1,
+        36, 32, 1,
+        37, 16, 1,
+        38, 35, 1,
+        39, 22, 1,
+        40, 30, 1,
+        41, 31, 1,
+        42, 39, 1,
+        43, 36, 1,
+        44, 18, 1,
+        45, 0, 1,
+        46, 36, 1,
+        47, 12, 1,
+        48, 28, 1,
+        49, 39, 1,
+        50, 4, 1,
+        51, 48, 1,
+        52, 48, 1,
+        53, 51, 1,
+        54, 50, 1,
+        55, 3, 1,
+        56, 54, 1,
+        57, 53, 1,
+        58, 48, 1,
+        59, 47, 1,
+        60, 25, 1,
+        61, 53, 1,
+        62, 31, 1,
+        63, 59, 1,
+        64, 45, 1,
+        65, 63, 1,
+        66, 48, 1,
+        67, 58, 1,
+        68, 40, 1,
+        69, 17, 1,
+        70, 62, 1,
+        71, 24, 1,
+        72, 60, 1,
+        73, 71, 1,
+        74, 72, 1,
+        75, 57, 1,
+        76, 69, 1,
+        77, 58, 1,
+        78, 74, 1,
+        79, 69, 1,
+        80, 75, 1,
+        81, 74, 1,
+        82, 56, 1,
+        83, 67, 1,
+        84, 15, 1,
+        85, 83, 1,
+        86, 69, 1,
+        87, 83, 1,
+        88, 85, 1,
+        89, 24, 1,
+        90, 52, 1,
+        91, 70, 1,
+        92, 88, 1,
+        93, 42, 1,
+        94, 61, 1,
+        95, 93, 1,
+        96, 22, 1,
+        97, 37, 1,
+        98, 15, 1,
+        99, 91, 1,
+        100, 14, 1,
+        101, 98, 1,
+        102, 24, 1,
+        103, 84, 1,
+        104, 44, 1,
+        105, 103, 1,
+        106, 12, 1,
+        107, 15, 1,
+        108, 79, 1,
+        109, 35, 1,
+        110, 4, 1,
+        111, 109, 1,
+        112, 90, 1,
+        113, 109, 1,
+        114, 43, 1,
+        115, 73, 1,
+        116, 113, 1,
+        117, 107, 1,
+        118, 51, 1,
+        119, 117, 1,
+        120, 118, 1,
+        121, 115, 1,
+        122, 74, 1,
+        123, 67, 1,
+        124, 102, 1,
+        125, 17, 1,
+        126, 113, 1,
+        127, 110, 1
+};
+
+int32_t segments_4_1_256[] = { // current_idx, previous_idx, seg_type 0=i 1=d
+    2, 1, 0,
+    64, 63, 0,
+    128, 127, 1,
+    192, 191, 1,
+    0, 255, 1,
+    64, 63, 1,
+    128, 127, 1,
+    192, 191, 1,
+    0, 255, 1,
+    64, 63, 1,
+    128, 127, 1,
+    192, 191, 1,
+    0, 255, 1,
+    64, 63, 1,
+    128, 127, 1,
+    192, 191, 1
+};
+
+DLLEXPORT Argon2Profile argon2profile_4_1_256 = {
+    256,
+    1,
+    4,
+    262144, //256 blocks of 1024 bytes
+    blocks_refs_4_1_256,
+    sizeof(blocks_refs_4_1_256) / (3 * sizeof(int32_t)),
+    "4_1_256",
+    segments_4_1_256,
+    64,
+    16,
+    1,
+    32,
+    4
+};
diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h
new file mode 100644
index 00000000..a70cd7f0
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h
@@ -0,0 +1,76 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
+   terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
+   your option.  The terms of these licenses can be found at:
+
+   - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+   - OpenSSL license   : https://www.openssl.org/source/license.html
+   - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+
+   More information about the BLAKE2 hash function can be found at
+   https://blake2.net.
+*/
+#ifndef BLAKE2_CONFIG_H
+#define BLAKE2_CONFIG_H
+
+/* These don't work everywhere */
+#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__) || defined(_M_X64)
+#define HAVE_SSE2
+#endif
+
+#if defined(__SSSE3__)
+#define HAVE_SSSE3
+#endif
+
+#if defined(__SSE4_1__)
+#define HAVE_SSE41
+#endif
+
+#if defined(__AVX__)
+#define HAVE_AVX
+#endif
+
+#if defined(__AVX2__)
+#define HAVE_AVX2
+#endif
+
+#if defined(__XOP__)
+#define HAVE_XOP
+#endif
+
+
+#ifdef HAVE_AVX2
+#ifndef HAVE_AVX
+#define HAVE_AVX
+#endif
+#endif
+
+#ifdef HAVE_XOP
+#ifndef HAVE_AVX
+#define HAVE_AVX
+#endif
+#endif
+
+#ifdef HAVE_AVX
+#ifndef HAVE_SSE41
+#define HAVE_SSE41
+#endif
+#endif
+
+#ifdef HAVE_SSE41
+#ifndef HAVE_SSSE3
+#define HAVE_SSSE3
+#endif
+#endif
+
+#ifdef HAVE_SSSE3
+#define HAVE_SSE2
+#endif
+
+#if !defined(HAVE_SSE2)
+#error "This code requires at least SSE2."
+#endif
+
+#endif
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h
new file mode 100644
index 00000000..e77ad92f
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h
@@ -0,0 +1,154 @@
+/*
+ * Argon2 reference source code package - reference C implementations
+ *
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+ *
+ * You may use this work under the terms of a Creative Commons CC0 1.0
+ * License/Waiver or the Apache Public License 2.0, at your option. The terms of
+ * these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * You should have received a copy of both of these licenses along with this
+ * software. If not, they may be obtained at the above URLs.
+ */
+
+#ifndef PORTABLE_BLAKE2_IMPL_H
+#define PORTABLE_BLAKE2_IMPL_H
+
+#include <stdint.h>
+#include <string.h>
+
+#if defined(_MSC_VER)
+#define BLAKE2_INLINE __inline
+#elif defined(__GNUC__) || defined(__clang__)
+#define BLAKE2_INLINE __inline__
+#else
+#define BLAKE2_INLINE
+#endif
+
+/* Argon2 Team - Begin Code */
+/*
+   Not an exhaustive list, but should cover the majority of modern platforms
+   Additionally, the code will always be correct---this is only a performance
+   tweak.
+*/
+#if (defined(__BYTE_ORDER__) &&                                                \
+     (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) ||                           \
+    defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
+    defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) ||       \
+    defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) ||                \
+    defined(_M_ARM)
+#define NATIVE_LITTLE_ENDIAN
+#endif
+/* Argon2 Team - End Code */
+
+static BLAKE2_INLINE uint32_t load32(const void *src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+    uint32_t w;
+    memcpy(&w, src, sizeof w);
+    return w;
+#else
+    const uint8_t *p = (const uint8_t *)src;
+    uint32_t w = *p++;
+    w |= (uint32_t)(*p++) << 8;
+    w |= (uint32_t)(*p++) << 16;
+    w |= (uint32_t)(*p++) << 24;
+    return w;
+#endif
+}
+
+static BLAKE2_INLINE uint64_t load64(const void *src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+    uint64_t w;
+    memcpy(&w, src, sizeof w);
+    return w;
+#else
+    const uint8_t *p = (const uint8_t *)src;
+    uint64_t w = *p++;
+    w |= (uint64_t)(*p++) << 8;
+    w |= (uint64_t)(*p++) << 16;
+    w |= (uint64_t)(*p++) << 24;
+    w |= (uint64_t)(*p++) << 32;
+    w |= (uint64_t)(*p++) << 40;
+    w |= (uint64_t)(*p++) << 48;
+    w |= (uint64_t)(*p++) << 56;
+    return w;
+#endif
+}
+
+static BLAKE2_INLINE void store32(void *dst, uint32_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+    memcpy(dst, &w, sizeof w);
+#else
+    uint8_t *p = (uint8_t *)dst;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+#endif
+}
+
+static BLAKE2_INLINE void store64(void *dst, uint64_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+    memcpy(dst, &w, sizeof w);
+#else
+    uint8_t *p = (uint8_t *)dst;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+#endif
+}
+
+static BLAKE2_INLINE uint64_t load48(const void *src) {
+    const uint8_t *p = (const uint8_t *)src;
+    uint64_t w = *p++;
+    w |= (uint64_t)(*p++) << 8;
+    w |= (uint64_t)(*p++) << 16;
+    w |= (uint64_t)(*p++) << 24;
+    w |= (uint64_t)(*p++) << 32;
+    w |= (uint64_t)(*p++) << 40;
+    return w;
+}
+
+static BLAKE2_INLINE void store48(void *dst, uint64_t w) {
+    uint8_t *p = (uint8_t *)dst;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+    w >>= 8;
+    *p++ = (uint8_t)w;
+}
+
+static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
+    return (w >> c) | (w << (32 - c));
+}
+
+static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
+    return (w >> c) | (w << (64 - c));
+}
+
+#endif
diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h
new file mode 100644
index 00000000..70e4aeb8
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h
@@ -0,0 +1,90 @@
+/*
+ * Argon2 reference source code package - reference C implementations
+ *
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+ *
+ * You may use this work under the terms of a Creative Commons CC0 1.0
+ * License/Waiver or the Apache Public License 2.0, at your option. The terms of
+ * these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * You should have received a copy of both of these licenses along with this
+ * software. If not, they may be obtained at the above URLs.
+ */
+
+#ifndef PORTABLE_BLAKE2_H
+#define PORTABLE_BLAKE2_H
+
+#include <limits.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+enum blake2b_constant {
+    BLAKE2B_BLOCKBYTES = 128,
+    BLAKE2B_OUTBYTES = 64,
+    BLAKE2B_KEYBYTES = 64,
+    BLAKE2B_SALTBYTES = 16,
+    BLAKE2B_PERSONALBYTES = 16
+};
+
+#pragma pack(push, 1)
+typedef struct __blake2b_param {
+    uint8_t digest_length;                   /* 1 */
+    uint8_t key_length;                      /* 2 */
+    uint8_t fanout;                          /* 3 */
+    uint8_t depth;                           /* 4 */
+    uint32_t leaf_length;                    /* 8 */
+    uint64_t node_offset;                    /* 16 */
+    uint8_t node_depth;                      /* 17 */
+    uint8_t inner_length;                    /* 18 */
+    uint8_t reserved[14];                    /* 32 */
+    uint8_t salt[BLAKE2B_SALTBYTES];         /* 48 */
+    uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
+} blake2b_param;
+#pragma pack(pop)
+
+typedef struct __blake2b_state {
+    uint64_t h[8];
+    uint64_t t[2];
+    uint64_t f[2];
+    uint8_t buf[BLAKE2B_BLOCKBYTES];
+    unsigned buflen;
+    unsigned outlen;
+    uint8_t last_node;
+} blake2b_state;
+
+/* Ensure param structs have not been wrongly padded */
+/* Poor man's static_assert */
+enum {
+    blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
+    blake2_size_check_2 =
+    1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
+};
+
+/* Streaming API */
+int blake2b_init(blake2b_state *S, size_t outlen);
+int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
+                     size_t keylen);
+int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
+int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
+int blake2b_update_static(blake2b_state *S, const char in, size_t inlen);
+int blake2b_final(blake2b_state *S, void *out, size_t outlen);
+
+/* Simple API */
+int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
+            const void *key, size_t keylen);
+
+/* Argon2 Team - Begin Code */
+int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
+/* Argon2 Team - End Code */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h
new file mode 100644
index 00000000..f79123d8
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h
@@ -0,0 +1,68 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
+   terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
+   your option.  The terms of these licenses can be found at:
+
+   - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+   - OpenSSL license   : https://www.openssl.org/source/license.html
+   - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+
+   More information about the BLAKE2 hash function can be found at
+   https://blake2.net.
+*/
+#ifndef BLAKE2B_LOAD_SSE2_H
+#define BLAKE2B_LOAD_SSE2_H
+
+#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
+#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
+#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
+#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
+#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
+#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
+#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
+#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
+#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
+#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
+#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
+#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
+#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
+#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
+#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
+#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
+#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
+#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
+#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
+#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
+#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
+#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
+#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
+#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
+#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
+#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
+#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
+#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
+#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
+#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
+#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
+#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
+#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
+#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
+#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
+#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
+#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
+#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
+#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
+#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
+#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
+#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
+#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
+#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
+#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
+#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
+#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
+#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
+
+
+#endif
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h
new file mode 100644
index 00000000..e8564b57
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h
@@ -0,0 +1,402 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
+   terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
+   your option.  The terms of these licenses can be found at:
+
+   - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+   - OpenSSL license   : https://www.openssl.org/source/license.html
+   - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+
+   More information about the BLAKE2 hash function can be found at
+   https://blake2.net.
+*/
+#ifndef BLAKE2B_LOAD_SSE41_H
+#define BLAKE2B_LOAD_SSE41_H
+
+#define LOAD_MSG_0_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m0, m1); \
+b1 = _mm_unpacklo_epi64(m2, m3); \
+} while(0)
+
+
+#define LOAD_MSG_0_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m0, m1); \
+b1 = _mm_unpackhi_epi64(m2, m3); \
+} while(0)
+
+
+#define LOAD_MSG_0_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m4, m5); \
+b1 = _mm_unpacklo_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_0_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m4, m5); \
+b1 = _mm_unpackhi_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_1_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m7, m2); \
+b1 = _mm_unpackhi_epi64(m4, m6); \
+} while(0)
+
+
+#define LOAD_MSG_1_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m5, m4); \
+b1 = _mm_alignr_epi8(m3, m7, 8); \
+} while(0)
+
+
+#define LOAD_MSG_1_3(b0, b1) \
+do \
+{ \
+b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
+b1 = _mm_unpackhi_epi64(m5, m2); \
+} while(0)
+
+
+#define LOAD_MSG_1_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m6, m1); \
+b1 = _mm_unpackhi_epi64(m3, m1); \
+} while(0)
+
+
+#define LOAD_MSG_2_1(b0, b1) \
+do \
+{ \
+b0 = _mm_alignr_epi8(m6, m5, 8); \
+b1 = _mm_unpackhi_epi64(m2, m7); \
+} while(0)
+
+
+#define LOAD_MSG_2_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m4, m0); \
+b1 = _mm_blend_epi16(m1, m6, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_2_3(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m5, m1, 0xF0); \
+b1 = _mm_unpackhi_epi64(m3, m4); \
+} while(0)
+
+
+#define LOAD_MSG_2_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m7, m3); \
+b1 = _mm_alignr_epi8(m2, m0, 8); \
+} while(0)
+
+
+#define LOAD_MSG_3_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m3, m1); \
+b1 = _mm_unpackhi_epi64(m6, m5); \
+} while(0)
+
+
+#define LOAD_MSG_3_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m4, m0); \
+b1 = _mm_unpacklo_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_3_3(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m1, m2, 0xF0); \
+b1 = _mm_blend_epi16(m2, m7, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_3_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m3, m5); \
+b1 = _mm_unpacklo_epi64(m0, m4); \
+} while(0)
+
+
+#define LOAD_MSG_4_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m4, m2); \
+b1 = _mm_unpacklo_epi64(m1, m5); \
+} while(0)
+
+
+#define LOAD_MSG_4_2(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m0, m3, 0xF0); \
+b1 = _mm_blend_epi16(m2, m7, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_4_3(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m7, m5, 0xF0); \
+b1 = _mm_blend_epi16(m3, m1, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_4_4(b0, b1) \
+do \
+{ \
+b0 = _mm_alignr_epi8(m6, m0, 8); \
+b1 = _mm_blend_epi16(m4, m6, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_5_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m1, m3); \
+b1 = _mm_unpacklo_epi64(m0, m4); \
+} while(0)
+
+
+#define LOAD_MSG_5_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m6, m5); \
+b1 = _mm_unpackhi_epi64(m5, m1); \
+} while(0)
+
+
+#define LOAD_MSG_5_3(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m2, m3, 0xF0); \
+b1 = _mm_unpackhi_epi64(m7, m0); \
+} while(0)
+
+
+#define LOAD_MSG_5_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m6, m2); \
+b1 = _mm_blend_epi16(m7, m4, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_6_1(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m6, m0, 0xF0); \
+b1 = _mm_unpacklo_epi64(m7, m2); \
+} while(0)
+
+
+#define LOAD_MSG_6_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m2, m7); \
+b1 = _mm_alignr_epi8(m5, m6, 8); \
+} while(0)
+
+
+#define LOAD_MSG_6_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m0, m3); \
+b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
+} while(0)
+
+
+#define LOAD_MSG_6_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m3, m1); \
+b1 = _mm_blend_epi16(m1, m5, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_7_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m6, m3); \
+b1 = _mm_blend_epi16(m6, m1, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_7_2(b0, b1) \
+do \
+{ \
+b0 = _mm_alignr_epi8(m7, m5, 8); \
+b1 = _mm_unpackhi_epi64(m0, m4); \
+} while(0)
+
+
+#define LOAD_MSG_7_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m2, m7); \
+b1 = _mm_unpacklo_epi64(m4, m1); \
+} while(0)
+
+
+#define LOAD_MSG_7_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m0, m2); \
+b1 = _mm_unpacklo_epi64(m3, m5); \
+} while(0)
+
+
+#define LOAD_MSG_8_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m3, m7); \
+b1 = _mm_alignr_epi8(m0, m5, 8); \
+} while(0)
+
+
+#define LOAD_MSG_8_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m7, m4); \
+b1 = _mm_alignr_epi8(m4, m1, 8); \
+} while(0)
+
+
+#define LOAD_MSG_8_3(b0, b1) \
+do \
+{ \
+b0 = m6; \
+b1 = _mm_alignr_epi8(m5, m0, 8); \
+} while(0)
+
+
+#define LOAD_MSG_8_4(b0, b1) \
+do \
+{ \
+b0 = _mm_blend_epi16(m1, m3, 0xF0); \
+b1 = m2; \
+} while(0)
+
+
+#define LOAD_MSG_9_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m5, m4); \
+b1 = _mm_unpackhi_epi64(m3, m0); \
+} while(0)
+
+
+#define LOAD_MSG_9_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m1, m2); \
+b1 = _mm_blend_epi16(m3, m2, 0xF0); \
+} while(0)
+
+
+#define LOAD_MSG_9_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m7, m4); \
+b1 = _mm_unpackhi_epi64(m1, m6); \
+} while(0)
+
+
+#define LOAD_MSG_9_4(b0, b1) \
+do \
+{ \
+b0 = _mm_alignr_epi8(m7, m5, 8); \
+b1 = _mm_unpacklo_epi64(m6, m0); \
+} while(0)
+
+
+#define LOAD_MSG_10_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m0, m1); \
+b1 = _mm_unpacklo_epi64(m2, m3); \
+} while(0)
+
+
+#define LOAD_MSG_10_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m0, m1); \
+b1 = _mm_unpackhi_epi64(m2, m3); \
+} while(0)
+
+
+#define LOAD_MSG_10_3(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m4, m5); \
+b1 = _mm_unpacklo_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_10_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpackhi_epi64(m4, m5); \
+b1 = _mm_unpackhi_epi64(m6, m7); \
+} while(0)
+
+
+#define LOAD_MSG_11_1(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m7, m2); \
+b1 = _mm_unpackhi_epi64(m4, m6); \
+} while(0)
+
+
+#define LOAD_MSG_11_2(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m5, m4); \
+b1 = _mm_alignr_epi8(m3, m7, 8); \
+} while(0)
+
+
+#define LOAD_MSG_11_3(b0, b1) \
+do \
+{ \
+b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
+b1 = _mm_unpackhi_epi64(m5, m2); \
+} while(0)
+
+
+#define LOAD_MSG_11_4(b0, b1) \
+do \
+{ \
+b0 = _mm_unpacklo_epi64(m6, m1); \
+b1 = _mm_unpackhi_epi64(m3, m1); \
+} while(0)
+
+
+#endif
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h
new file mode 100644
index 00000000..3e348e6f
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h
@@ -0,0 +1,154 @@
+/*
+   BLAKE2 reference source code package - optimized C implementations
+
+   Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
+   terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
+   your option.  The terms of these licenses can be found at:
+
+   - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+   - OpenSSL license   : https://www.openssl.org/source/license.html
+   - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+
+   More information about the BLAKE2 hash function can be found at
+   https://blake2.net.
+*/
+#ifndef BLAKE2B_ROUND_H
+#define BLAKE2B_ROUND_H
+
+#define LOADU(p)  _mm_loadu_si128( (const __m128i *)(p) )
+#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
+
+#define TOF(reg) _mm_castsi128_ps((reg))
+#define TOI(reg) _mm_castps_si128((reg))
+
+#define LIKELY(x) __builtin_expect((x),1)
+
+/* Microarchitecture-specific macros */
+#ifndef HAVE_XOP
+#ifdef HAVE_SSSE3
+#define _mm_roti_epi64(x, c) \
+    (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1))  \
+    : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
+    : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
+    : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x)))  \
+    : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
+#else
+#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
+#endif
+#else
+/* ... */
+#endif
+
+#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
+  row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
+  row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
+  \
+  row4l = _mm_xor_si128(row4l, row1l); \
+  row4h = _mm_xor_si128(row4h, row1h); \
+  \
+  row4l = _mm_roti_epi64(row4l, -32); \
+  row4h = _mm_roti_epi64(row4h, -32); \
+  \
+  row3l = _mm_add_epi64(row3l, row4l); \
+  row3h = _mm_add_epi64(row3h, row4h); \
+  \
+  row2l = _mm_xor_si128(row2l, row3l); \
+  row2h = _mm_xor_si128(row2h, row3h); \
+  \
+  row2l = _mm_roti_epi64(row2l, -24); \
+  row2h = _mm_roti_epi64(row2h, -24); \
+
+#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
+  row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
+  row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
+  \
+  row4l = _mm_xor_si128(row4l, row1l); \
+  row4h = _mm_xor_si128(row4h, row1h); \
+  \
+  row4l = _mm_roti_epi64(row4l, -16); \
+  row4h = _mm_roti_epi64(row4h, -16); \
+  \
+  row3l = _mm_add_epi64(row3l, row4l); \
+  row3h = _mm_add_epi64(row3h, row4h); \
+  \
+  row2l = _mm_xor_si128(row2l, row3l); \
+  row2h = _mm_xor_si128(row2h, row3h); \
+  \
+  row2l = _mm_roti_epi64(row2l, -63); \
+  row2h = _mm_roti_epi64(row2h, -63); \
+
+#if defined(HAVE_SSSE3)
+#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+  t0 = _mm_alignr_epi8(row2h, row2l, 8); \
+  t1 = _mm_alignr_epi8(row2l, row2h, 8); \
+  row2l = t0; \
+  row2h = t1; \
+  \
+  t0 = row3l; \
+  row3l = row3h; \
+  row3h = t0;    \
+  \
+  t0 = _mm_alignr_epi8(row4h, row4l, 8); \
+  t1 = _mm_alignr_epi8(row4l, row4h, 8); \
+  row4l = t1; \
+  row4h = t0;
+
+#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+  t0 = _mm_alignr_epi8(row2l, row2h, 8); \
+  t1 = _mm_alignr_epi8(row2h, row2l, 8); \
+  row2l = t0; \
+  row2h = t1; \
+  \
+  t0 = row3l; \
+  row3l = row3h; \
+  row3h = t0; \
+  \
+  t0 = _mm_alignr_epi8(row4l, row4h, 8); \
+  t1 = _mm_alignr_epi8(row4h, row4l, 8); \
+  row4l = t1; \
+  row4h = t0;
+#else
+
+#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+  t0 = row4l;\
+  t1 = row2l;\
+  row4l = row3l;\
+  row3l = row3h;\
+  row3h = row4l;\
+  row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
+  row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
+  row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
+  row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
+
+#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
+  t0 = row3l;\
+  row3l = row3h;\
+  row3h = t0;\
+  t0 = row2l;\
+  t1 = row4l;\
+  row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
+  row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
+  row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
+  row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
+
+#endif
+
+#if defined(HAVE_SSE41)
+#include "blake2b-load-sse41.h"
+#else
+#include "blake2b-load-sse2.h"
+#endif
+
+#define ROUND(r) \
+  LOAD_MSG_ ##r ##_1(b0, b1); \
+  G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+  LOAD_MSG_ ##r ##_2(b0, b1); \
+  G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+  DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
+  LOAD_MSG_ ##r ##_3(b0, b1); \
+  G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+  LOAD_MSG_ ##r ##_4(b0, b1); \
+  G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+  UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
+
+#endif
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c
new file mode 100644
index 00000000..c40a7991
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c
@@ -0,0 +1,514 @@
+/*
+ * Argon2 reference source code package - reference C implementations
+ *
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+ *
+ * You may use this work under the terms of a Creative Commons CC0 1.0
+ * License/Waiver or the Apache Public License 2.0, at your option. The terms of
+ * these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * You should have received a copy of both of these licenses along with this
+ * software. If not, they may be obtained at the above URLs.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "blake2.h"
+#include "blake2-impl.h"
+
+#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64))
+    #include "blake2-config.h"
+
+    #ifdef _MSC_VER
+    #include <intrin.h> /* for _mm_set_epi64x */
+    #endif
+    #include <emmintrin.h>
+    #if defined(HAVE_SSSE3)
+    #include <tmmintrin.h>
+    #endif
+    #if defined(HAVE_SSE41)
+    #include <smmintrin.h>
+    #endif
+    #if defined(HAVE_AVX)
+    #include <immintrin.h>
+    #endif
+    #if defined(HAVE_XOP)
+    #include <x86intrin.h>
+    #endif
+
+    #include "blake2b-round.h"
+#endif
+
+static const uint64_t blake2b_IV[8] = {
+        UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
+        UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
+        UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
+        UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)};
+
+static const unsigned int blake2b_sigma[12][16] = {
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+        {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+        {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
+        {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
+        {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
+        {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
+        {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
+        {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
+        {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+};
+
+static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) {
+    S->f[1] = (uint64_t)-1;
+}
+
+static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) {
+    if (S->last_node) {
+        blake2b_set_lastnode(S);
+    }
+    S->f[0] = (uint64_t)-1;
+}
+
+static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S,
+                                                    uint64_t inc) {
+    S->t[0] += inc;
+    S->t[1] += (S->t[0] < inc);
+}
+
+static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) {
+    blake2b_set_lastblock(S); /* invalidate for further use */
+}
+
+static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) {
+    memset(S, 0, sizeof(*S));
+    memcpy(S->h, blake2b_IV, sizeof(S->h));
+}
+
+int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
+    const unsigned char *p = (const unsigned char *)P;
+    unsigned int i;
+
+    if (NULL == P || NULL == S) {
+        return -1;
+    }
+
+    blake2b_init0(S);
+    /* IV XOR Parameter Block */
+    for (i = 0; i < 8; ++i) {
+        S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
+    }
+    S->outlen = P->digest_length;
+    return 0;
+}
+
+/* Sequential blake2b initialization */
+int blake2b_init(blake2b_state *S, size_t outlen) {
+    blake2b_param P;
+
+    if (S == NULL) {
+        return -1;
+    }
+
+    if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
+        blake2b_invalidate_state(S);
+        return -1;
+    }
+
+    /* Setup Parameter Block for unkeyed BLAKE2 */
+    P.digest_length = (uint8_t)outlen;
+    P.key_length = 0;
+    P.fanout = 1;
+    P.depth = 1;
+    P.leaf_length = 0;
+    P.node_offset = 0;
+    P.node_depth = 0;
+    P.inner_length = 0;
+    memset(P.reserved, 0, sizeof(P.reserved));
+    memset(P.salt, 0, sizeof(P.salt));
+    memset(P.personal, 0, sizeof(P.personal));
+
+    return blake2b_init_param(S, &P);
+}
+
+int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
+                     size_t keylen) {
+    blake2b_param P;
+
+    if (S == NULL) {
+        return -1;
+    }
+
+    if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
+        blake2b_invalidate_state(S);
+        return -1;
+    }
+
+    if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
+        blake2b_invalidate_state(S);
+        return -1;
+    }
+
+    /* Setup Parameter Block for keyed BLAKE2 */
+    P.digest_length = (uint8_t)outlen;
+    P.key_length = (uint8_t)keylen;
+    P.fanout = 1;
+    P.depth = 1;
+    P.leaf_length = 0;
+    P.node_offset = 0;
+    P.node_depth = 0;
+    P.inner_length = 0;
+    memset(P.reserved, 0, sizeof(P.reserved));
+    memset(P.salt, 0, sizeof(P.salt));
+    memset(P.personal, 0, sizeof(P.personal));
+
+    if (blake2b_init_param(S, &P) < 0) {
+        blake2b_invalidate_state(S);
+        return -1;
+    }
+
+    {
+        uint8_t block[BLAKE2B_BLOCKBYTES];
+        memset(block, 0, BLAKE2B_BLOCKBYTES);
+        memcpy(block, key, keylen);
+        blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
+    }
+    return 0;
+}
+
+#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64))
+static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
+{
+    __m128i row1l, row1h;
+    __m128i row2l, row2h;
+    __m128i row3l, row3h;
+    __m128i row4l, row4h;
+    __m128i b0, b1;
+    __m128i t0, t1;
+#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
+    const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
+    const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
+#endif
+#if defined(HAVE_SSE41)
+    const __m128i m0 = LOADU( block + 00 );
+    const __m128i m1 = LOADU( block + 16 );
+    const __m128i m2 = LOADU( block + 32 );
+    const __m128i m3 = LOADU( block + 48 );
+    const __m128i m4 = LOADU( block + 64 );
+    const __m128i m5 = LOADU( block + 80 );
+    const __m128i m6 = LOADU( block + 96 );
+    const __m128i m7 = LOADU( block + 112 );
+#else
+  const uint64_t  m0 = load64(block +  0 * sizeof(uint64_t));
+  const uint64_t  m1 = load64(block +  1 * sizeof(uint64_t));
+  const uint64_t  m2 = load64(block +  2 * sizeof(uint64_t));
+  const uint64_t  m3 = load64(block +  3 * sizeof(uint64_t));
+  const uint64_t  m4 = load64(block +  4 * sizeof(uint64_t));
+  const uint64_t  m5 = load64(block +  5 * sizeof(uint64_t));
+  const uint64_t  m6 = load64(block +  6 * sizeof(uint64_t));
+  const uint64_t  m7 = load64(block +  7 * sizeof(uint64_t));
+  const uint64_t  m8 = load64(block +  8 * sizeof(uint64_t));
+  const uint64_t  m9 = load64(block +  9 * sizeof(uint64_t));
+  const uint64_t m10 = load64(block + 10 * sizeof(uint64_t));
+  const uint64_t m11 = load64(block + 11 * sizeof(uint64_t));
+  const uint64_t m12 = load64(block + 12 * sizeof(uint64_t));
+  const uint64_t m13 = load64(block + 13 * sizeof(uint64_t));
+  const uint64_t m14 = load64(block + 14 * sizeof(uint64_t));
+  const uint64_t m15 = load64(block + 15 * sizeof(uint64_t));
+#endif
+    row1l = LOADU( &S->h[0] );
+    row1h = LOADU( &S->h[2] );
+    row2l = LOADU( &S->h[4] );
+    row2h = LOADU( &S->h[6] );
+    row3l = LOADU( &blake2b_IV[0] );
+    row3h = LOADU( &blake2b_IV[2] );
+    row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
+    row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
+    ROUND( 0 );
+    ROUND( 1 );
+    ROUND( 2 );
+    ROUND( 3 );
+    ROUND( 4 );
+    ROUND( 5 );
+    ROUND( 6 );
+    ROUND( 7 );
+    ROUND( 8 );
+    ROUND( 9 );
+    ROUND( 10 );
+    ROUND( 11 );
+    row1l = _mm_xor_si128( row3l, row1l );
+    row1h = _mm_xor_si128( row3h, row1h );
+    STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
+    STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
+    row2l = _mm_xor_si128( row4l, row2l );
+    row2h = _mm_xor_si128( row4h, row2h );
+    STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
+    STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
+}
+#else
+static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
+    uint64_t m[16];
+    uint64_t v[16];
+    unsigned int i, r;
+
+    for (i = 0; i < 16; ++i) {
+        m[i] = load64(block + i * sizeof(m[i]));
+    }
+
+    for (i = 0; i < 8; ++i) {
+        v[i] = S->h[i];
+    }
+
+    v[8] = blake2b_IV[0];
+    v[9] = blake2b_IV[1];
+    v[10] = blake2b_IV[2];
+    v[11] = blake2b_IV[3];
+    v[12] = blake2b_IV[4] ^ S->t[0];
+    v[13] = blake2b_IV[5] ^ S->t[1];
+    v[14] = blake2b_IV[6] ^ S->f[0];
+    v[15] = blake2b_IV[7] ^ S->f[1];
+
+#define G(r, i, a, b, c, d)                                                    \
+    do {                                                                       \
+        a = a + b + m[blake2b_sigma[r][2 * i + 0]];                            \
+        d = rotr64(d ^ a, 32);                                                 \
+        c = c + d;                                                             \
+        b = rotr64(b ^ c, 24);                                                 \
+        a = a + b + m[blake2b_sigma[r][2 * i + 1]];                            \
+        d = rotr64(d ^ a, 16);                                                 \
+        c = c + d;                                                             \
+        b = rotr64(b ^ c, 63);                                                 \
+    } while ((void)0, 0)
+
+#define ROUND(r)                                                               \
+    do {                                                                       \
+        G(r, 0, v[0], v[4], v[8], v[12]);                                      \
+        G(r, 1, v[1], v[5], v[9], v[13]);                                      \
+        G(r, 2, v[2], v[6], v[10], v[14]);                                     \
+        G(r, 3, v[3], v[7], v[11], v[15]);                                     \
+        G(r, 4, v[0], v[5], v[10], v[15]);                                     \
+        G(r, 5, v[1], v[6], v[11], v[12]);                                     \
+        G(r, 6, v[2], v[7], v[8], v[13]);                                      \
+        G(r, 7, v[3], v[4], v[9], v[14]);                                      \
+    } while ((void)0, 0)
+
+    for (r = 0; r < 12; ++r) {
+        ROUND(r);
+    }
+
+    for (i = 0; i < 8; ++i) {
+        S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+    }
+
+#undef G
+#undef ROUND
+}
+#endif
+
+int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
+    const uint8_t *pin = (const uint8_t *)in;
+
+    if (inlen == 0) {
+        return 0;
+    }
+
+    /* Sanity check */
+    if (S == NULL || in == NULL) {
+        return -1;
+    }
+
+    /* Is this a reused state? */
+    if (S->f[0] != 0) {
+        return -1;
+    }
+
+    if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
+        /* Complete current block */
+        size_t left = S->buflen;
+        size_t fill = BLAKE2B_BLOCKBYTES - left;
+        memcpy(&S->buf[left], pin, fill);
+        blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+        blake2b_compress(S, S->buf);
+        S->buflen = 0;
+        inlen -= fill;
+        pin += fill;
+        /* Avoid buffer copies when possible */
+        while (inlen > BLAKE2B_BLOCKBYTES) {
+            blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+            blake2b_compress(S, pin);
+            inlen -= BLAKE2B_BLOCKBYTES;
+            pin += BLAKE2B_BLOCKBYTES;
+        }
+    }
+    memcpy(&S->buf[S->buflen], pin, inlen);
+    S->buflen += (unsigned int)inlen;
+    return 0;
+}
+
+int blake2b_update_static(blake2b_state *S, const char in, size_t inlen) {
+    if (inlen == 0) {
+        return 0;
+    }
+
+    /* Sanity check */
+    if (S == NULL) {
+        return -1;
+    }
+
+    /* Is this a reused state? */
+    if (S->f[0] != 0) {
+        return -1;
+    }
+
+    if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
+        /* Complete current block */
+        size_t left = S->buflen;
+        size_t fill = BLAKE2B_BLOCKBYTES - left;
+        memset(&S->buf[left], in, fill);
+        blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+        blake2b_compress(S, S->buf);
+        S->buflen = 0;
+        inlen -= fill;
+        /* Avoid buffer copies when possible */
+        while (inlen > BLAKE2B_BLOCKBYTES) {
+            memset(S->buf, in, BLAKE2B_BLOCKBYTES);
+            blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+            blake2b_compress(S, S->buf);
+            inlen -= BLAKE2B_BLOCKBYTES;
+        }
+    }
+    memset(&S->buf[S->buflen], in, inlen);
+    S->buflen += (unsigned int)inlen;
+    return 0;
+}
+
+
+int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
+    uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
+    unsigned int i;
+
+    /* Sanity checks */
+    if (S == NULL || out == NULL || outlen < S->outlen) {
+        return -1;
+    }
+
+    /* Is this a reused state? */
+    if (S->f[0] != 0) {
+        return -1;
+    }
+
+    blake2b_increment_counter(S, S->buflen);
+    blake2b_set_lastblock(S);
+    memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
+    blake2b_compress(S, S->buf);
+
+    for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
+        store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
+    }
+
+    memcpy(out, buffer, S->outlen);
+    return 0;
+}
+
+int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
+            const void *key, size_t keylen) {
+    blake2b_state S;
+    int ret = -1;
+
+    /* Verify parameters */
+    if (NULL == in && inlen > 0) {
+        goto fail;
+    }
+
+    if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
+        goto fail;
+    }
+
+    if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
+        goto fail;
+    }
+
+    if (keylen > 0) {
+        if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
+            goto fail;
+        }
+    } else {
+        if (blake2b_init(&S, outlen) < 0) {
+            goto fail;
+        }
+    }
+
+    if (blake2b_update(&S, in, inlen) < 0) {
+        goto fail;
+    }
+    ret = blake2b_final(&S, out, outlen);
+
+    fail:
+    return ret;
+}
+
+/* Argon2 Team - Begin Code */
+int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
+    uint8_t *out = (uint8_t *)pout;
+    blake2b_state blake_state;
+    uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
+    int ret = -1;
+
+    if (outlen > UINT32_MAX) {
+        goto fail;
+    }
+
+    /* Ensure little-endian byte order! */
+    store32(outlen_bytes, (uint32_t)outlen);
+
+#define TRY(statement)                                                         \
+    do {                                                                       \
+        ret = statement;                                                       \
+        if (ret < 0) {                                                         \
+            goto fail;                                                         \
+        }                                                                      \
+    } while ((void)0, 0)
+
+    if (outlen <= BLAKE2B_OUTBYTES) {
+        TRY(blake2b_init(&blake_state, outlen));
+        TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
+        TRY(blake2b_update(&blake_state, in, inlen));
+        TRY(blake2b_final(&blake_state, out, outlen));
+    } else {
+        uint32_t toproduce;
+        uint8_t out_buffer[BLAKE2B_OUTBYTES];
+        uint8_t in_buffer[BLAKE2B_OUTBYTES];
+        TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
+        TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
+        TRY(blake2b_update(&blake_state, in, inlen));
+        TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
+        memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+        out += BLAKE2B_OUTBYTES / 2;
+        toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
+
+        while (toproduce > BLAKE2B_OUTBYTES) {
+            memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+            TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
+                        BLAKE2B_OUTBYTES, NULL, 0));
+            memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+            out += BLAKE2B_OUTBYTES / 2;
+            toproduce -= BLAKE2B_OUTBYTES / 2;
+        }
+
+        memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+        TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
+                    0));
+        memcpy(out, out_buffer, toproduce);
+    }
+    fail:
+    return ret;
+#undef TRY
+}
+/* Argon2 Team - End Code */
diff --git a/src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp
new file mode 100755
index 00000000..08e4c019
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp
@@ -0,0 +1,227 @@
+//
+// Created by Haifa Bogdan Adnan on 03/08/2018.
+//
+
+#if defined(__x86_64__) || defined(__i386__) || defined(_WIN64)
+    #include <cpuinfo_x86.h>
+#endif
+#if defined(__arm__)
+    #include <cpuinfo_arm.h>
+#endif
+
+#include <crypto/Argon2_constants.h>
+
+#include "../../common/common.h"
+
+#include "crypto/argon2_hasher/hash/Hasher.h"
+#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
+
+#include "CpuHasher.h"
+#include "crypto/argon2_hasher/common/DLLExport.h"
+
+CpuHasher::CpuHasher() : Hasher() {
+    m_type = "CPU";
+    m_subType = "CPU";
+    m_shortSubType = "CPU";
+    m_optimization = "REF";
+    m_computingThreads = 0;
+    m_availableProcessingThr = 1;
+    m_availableMemoryThr = 1;
+    m_argon2BlocksFillerPtr = nullptr;
+    m_dllHandle = nullptr;
+    m_profile = nullptr;
+    m_threadData = nullptr;
+}
+
+CpuHasher::~CpuHasher() {
+    this->cleanup();
+}
+
+bool CpuHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
+    m_profile = getArgon2Profile(algorithm, variant);
+    m_description = detectFeaturesAndMakeDescription();
+    return true;
+}
+
+bool CpuHasher::configure(xmrig::HasherConfig &config) {
+    m_intensity = 100;
+
+    if(config.cpuOptimization() != "") {
+        m_description += "Overiding detected optimization feature with " + config.cpuOptimization() + ".\n";
+        m_optimization = config.cpuOptimization();
+    }
+
+    loadArgon2BlockFiller();
+
+    if(m_argon2BlocksFillerPtr == NULL) {
+        m_intensity = 0;
+        m_description += "Status: DISABLED - argon2 hashing module not found.";
+        return false;
+    }
+
+    m_computingThreads = min(m_availableProcessingThr, m_availableMemoryThr);
+
+    if (m_computingThreads == 0) {
+        m_intensity = 0;
+        m_description += "Status: DISABLED - not enough resources.";
+        return false;
+    }
+
+    if(config.cpuThreads() > -1) {
+        m_intensity = min(100.0 * config.cpuThreads() / m_computingThreads, 100.0);
+        m_computingThreads = min(config.cpuThreads(), m_computingThreads);
+    }
+
+    if (m_intensity == 0) {
+        m_description += "Status: DISABLED - by user.";
+        return false;
+    }
+
+    m_deviceInfo.intensity = m_intensity;
+
+    storeDeviceInfo(0, m_deviceInfo);
+
+    m_threadData = new CpuHasherThread[m_computingThreads];
+    for(int i=0; i < m_computingThreads; i++) {
+        void *buffer = NULL;
+        void *mem = allocateMemory(buffer);
+        if(mem == NULL) {
+            m_intensity = 0;
+            m_description += "Status: DISABLED - error allocating memory.";
+            return false;
+        }
+        m_threadData[i].mem = buffer;
+        m_threadData[i].argon2 = new Argon2(NULL, m_argon2BlocksFillerPtr, NULL, mem, mem);
+        m_threadData[i].hashData.outSize = xmrig::ARGON2_HASHLEN + sizeof(uint32_t);
+    }
+
+    m_description += "Status: ENABLED - with " + to_string(m_computingThreads) + " threads.";
+
+    return true;
+}
+
+string CpuHasher::detectFeaturesAndMakeDescription() {
+    stringstream ss;
+#if defined(__x86_64__) || defined(__i386__) || defined(_WIN64)
+    char brand_string[49];
+    cpu_features::FillX86BrandString(brand_string);
+    m_deviceInfo.name = brand_string;
+
+    ss << brand_string << endl;
+
+    cpu_features::X86Features features = cpu_features::GetX86Info().features;
+    ss << "Optimization features: ";
+
+#if defined(__x86_64__) || defined(_WIN64)
+    ss << "SSE2 ";
+    m_optimization = "SSE2";
+#else
+    ss << "none";
+    m_optimization = "REF";
+#endif
+
+    if(features.ssse3 || features.avx2 || features.avx512f) {
+        if (features.ssse3) {
+            ss << "SSSE3 ";
+            m_optimization = "SSSE3";
+        }
+        if (features.avx) {
+            ss << "AVX ";
+            m_optimization = "AVX";
+        }
+        if (features.avx2) {
+            ss << "AVX2 ";
+            m_optimization = "AVX2";
+        }
+        if (features.avx512f) {
+            ss << "AVX512F ";
+            m_optimization = "AVX512F";
+        }
+    }
+    ss << endl;
+#endif
+#if defined(__arm__)
+    m_deviceInfo.name = "ARM processor";
+
+    cpu_features::ArmFeatures features = cpu_features::GetArmInfo().features;
+    ss << "ARM processor" << endl;
+    ss << "Optimization features: ";
+
+    m_optimization = "REF";
+
+    if(features.neon) {
+        ss << "NEON";
+        m_optimization = "NEON";
+    }
+    else {
+        ss << "none";
+    }
+    ss << endl;
+#endif
+    ss << "Selecting " << m_optimization << " as candidate for hashing algorithm." << endl;
+
+    m_availableProcessingThr = thread::hardware_concurrency();
+    ss << "Parallelism: " << m_availableProcessingThr << " concurent threads supported." << endl;
+
+    //check available memory
+    vector<void *> memoryTest;
+    for(m_availableMemoryThr = 0;m_availableMemoryThr < m_availableProcessingThr;m_availableMemoryThr++) {
+        void *memory = malloc(m_profile->memSize + 64); //64 bytes for alignament - to work on AVX512F optimisations
+        if(memory == NULL)
+            break;
+        memoryTest.push_back(memory);
+    }
+    for(vector<void*>::iterator it=memoryTest.begin(); it != memoryTest.end(); ++it) {
+        free(*it);
+    }
+    ss << "Memory: there is enough memory for " << m_availableMemoryThr << " concurent threads." << endl;
+
+    return ss.str();
+}
+
+void CpuHasher::cleanup() {
+    for(int i=0; i < m_computingThreads; i++) {
+        delete m_threadData[i].argon2;
+        free(m_threadData[i].mem);
+    }
+    delete[] m_threadData;
+    if(m_dllHandle != NULL)
+        dlclose(m_dllHandle);
+}
+
+void CpuHasher::loadArgon2BlockFiller() {
+    string module_path = m_appFolder;
+    module_path += "/modules/argon2_fill_blocks_" + m_optimization + ".opt";
+
+    m_dllHandle = dlopen(module_path.c_str(), RTLD_LAZY);
+    if(m_dllHandle != NULL)
+        m_argon2BlocksFillerPtr = (argon2BlocksFillerPtr)dlsym(m_dllHandle, "fill_memory_blocks");
+}
+
+int CpuHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
+    CpuHasherThread &threadData = m_threadData[threadIdx];
+    threadData.hashData.input = input;
+    threadData.hashData.inSize = size;
+    threadData.hashData.output = output;
+    return threadData.argon2->generateHashes(*m_profile, threadData.hashData);
+}
+
+void *CpuHasher::allocateMemory(void *&buffer) {
+    size_t mem_size = m_profile->memSize + 64;
+    void *mem = malloc(mem_size);
+    buffer = mem;
+    return align(64, m_profile->memSize, mem, mem_size);
+}
+
+size_t CpuHasher::parallelism(int workerIdx) {
+    if(workerIdx < 0 || workerIdx > computingThreads())
+        return 0;
+
+    return 1;
+}
+
+size_t CpuHasher::deviceCount() {
+    return computingThreads();
+}
+
+REGISTER_HASHER(CpuHasher);
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/cpu/CpuHasher.h b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.h
new file mode 100644
index 00000000..888421c6
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.h
@@ -0,0 +1,41 @@
+//
+// Created by Haifa Bogdan Adnan on 03/08/2018.
+//
+
+#ifndef ARGON2_CPU_HASHER_H
+#define ARGON2_CPU_HASHER_H
+
+struct CpuHasherThread {
+    Argon2 *argon2;
+    HashData hashData;
+    void *mem;
+};
+
+class CpuHasher : public Hasher {
+public:
+    CpuHasher();
+    ~CpuHasher();
+
+    virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
+    virtual bool configure(xmrig::HasherConfig &config);
+    virtual void cleanup();
+    virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
+    virtual size_t parallelism(int workerIdx);
+    virtual size_t deviceCount();
+
+private:
+    string detectFeaturesAndMakeDescription();
+    void loadArgon2BlockFiller();
+    void *allocateMemory(void *&buffer);
+
+    DeviceInfo m_deviceInfo;
+    string m_optimization;
+    int m_availableProcessingThr;
+    int m_availableMemoryThr;
+    void *m_dllHandle;
+    Argon2Profile *m_profile;
+    argon2BlocksFillerPtr m_argon2BlocksFillerPtr;
+    CpuHasherThread *m_threadData;
+};
+
+#endif //ARGON2_CPU_HASHER_H
diff --git a/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h
new file mode 100644
index 00000000..8048503c
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h
@@ -0,0 +1,567 @@
+/*
+ * Argon2 reference source code package - reference C implementations
+ *
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+ *
+ * You may use this work under the terms of a Creative Commons CC0 1.0
+ * License/Waiver or the Apache Public License 2.0, at your option. The terms of
+ * these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * You should have received a copy of both of these licenses along with this
+ * software. If not, they may be obtained at the above URLs.
+ */
+
+#ifndef BLAKE_ROUND_MKA_OPT_H
+#define BLAKE_ROUND_MKA_OPT_H
+
+#include "../../argon2/blake2/blake2-impl.h"
+
+#if !defined(__NEON__)
+#include <emmintrin.h>
+#if defined(__SSSE3__)
+#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
+#endif
+
+#if (defined(__XOP__) || defined(__AVX__)) && (defined(__GNUC__) || defined(__clang__))
+#include <x86intrin.h>
+#endif
+#else
+#include <arm_neon.h>
+#endif
+
+#if !defined(__NEON__)
+#if !defined(__AVX512F__)
+#if !defined(__AVX2__)
+#if !defined(__XOP__)
+#if defined(__SSSE3__)
+#define r16                                                                    \
+    (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
+#define r24                                                                    \
+    (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
+#define _mm_roti_epi64(x, c)                                                   \
+    (-(c) == 32)                                                               \
+        ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))                      \
+        : (-(c) == 24)                                                         \
+              ? _mm_shuffle_epi8((x), r24)                                     \
+              : (-(c) == 16)                                                   \
+                    ? _mm_shuffle_epi8((x), r16)                               \
+                    : (-(c) == 63)                                             \
+                          ? _mm_xor_si128(_mm_srli_epi64((x), -(c)),           \
+                                          _mm_add_epi64((x), (x)))             \
+                          : _mm_xor_si128(_mm_srli_epi64((x), -(c)),           \
+                                          _mm_slli_epi64((x), 64 - (-(c))))
+#else /* defined(__SSE2__) */
+#define _mm_roti_epi64(r, c)                                                   \
+    _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
+#endif
+#else
+#endif
+
+static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
+    const __m128i z = _mm_mul_epu32(x, y);
+    return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
+}
+
+#define G1(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
+    do {                                                                       \
+        A0 = fBlaMka(A0, B0);                                                  \
+        A1 = fBlaMka(A1, B1);                                                  \
+                                                                               \
+        D0 = _mm_xor_si128(D0, A0);                                            \
+        D1 = _mm_xor_si128(D1, A1);                                            \
+                                                                               \
+        D0 = _mm_roti_epi64(D0, -32);                                          \
+        D1 = _mm_roti_epi64(D1, -32);                                          \
+                                                                               \
+        C0 = fBlaMka(C0, D0);                                                  \
+        C1 = fBlaMka(C1, D1);                                                  \
+                                                                               \
+        B0 = _mm_xor_si128(B0, C0);                                            \
+        B1 = _mm_xor_si128(B1, C1);                                            \
+                                                                               \
+        B0 = _mm_roti_epi64(B0, -24);                                          \
+        B1 = _mm_roti_epi64(B1, -24);                                          \
+    } while ((void)0, 0)
+
+#define G2(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
+    do {                                                                       \
+        A0 = fBlaMka(A0, B0);                                                  \
+        A1 = fBlaMka(A1, B1);                                                  \
+                                                                               \
+        D0 = _mm_xor_si128(D0, A0);                                            \
+        D1 = _mm_xor_si128(D1, A1);                                            \
+                                                                               \
+        D0 = _mm_roti_epi64(D0, -16);                                          \
+        D1 = _mm_roti_epi64(D1, -16);                                          \
+                                                                               \
+        C0 = fBlaMka(C0, D0);                                                  \
+        C1 = fBlaMka(C1, D1);                                                  \
+                                                                               \
+        B0 = _mm_xor_si128(B0, C0);                                            \
+        B1 = _mm_xor_si128(B1, C1);                                            \
+                                                                               \
+        B0 = _mm_roti_epi64(B0, -63);                                          \
+        B1 = _mm_roti_epi64(B1, -63);                                          \
+    } while ((void)0, 0)
+
+#if defined(__SSSE3__)
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                            \
+    do {                                                                       \
+        __m128i t0 = _mm_alignr_epi8(B1, B0, 8);                               \
+        __m128i t1 = _mm_alignr_epi8(B0, B1, 8);                               \
+        B0 = t0;                                                               \
+        B1 = t1;                                                               \
+                                                                               \
+        t0 = C0;                                                               \
+        C0 = C1;                                                               \
+        C1 = t0;                                                               \
+                                                                               \
+        t0 = _mm_alignr_epi8(D1, D0, 8);                                       \
+        t1 = _mm_alignr_epi8(D0, D1, 8);                                       \
+        D0 = t1;                                                               \
+        D1 = t0;                                                               \
+    } while ((void)0, 0)
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                          \
+    do {                                                                       \
+        __m128i t0 = _mm_alignr_epi8(B0, B1, 8);                               \
+        __m128i t1 = _mm_alignr_epi8(B1, B0, 8);                               \
+        B0 = t0;                                                               \
+        B1 = t1;                                                               \
+                                                                               \
+        t0 = C0;                                                               \
+        C0 = C1;                                                               \
+        C1 = t0;                                                               \
+                                                                               \
+        t0 = _mm_alignr_epi8(D0, D1, 8);                                       \
+        t1 = _mm_alignr_epi8(D1, D0, 8);                                       \
+        D0 = t1;                                                               \
+        D1 = t0;                                                               \
+    } while ((void)0, 0)
+#else /* SSE2 */
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                            \
+    do {                                                                       \
+        __m128i t0 = D0;                                                       \
+        __m128i t1 = B0;                                                       \
+        D0 = C0;                                                               \
+        C0 = C1;                                                               \
+        C1 = D0;                                                               \
+        D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0));               \
+        D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1));               \
+        B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1));               \
+        B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1));               \
+    } while ((void)0, 0)
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                          \
+    do {                                                                       \
+        __m128i t0, t1;                                                        \
+        t0 = C0;                                                               \
+        C0 = C1;                                                               \
+        C1 = t0;                                                               \
+        t0 = B0;                                                               \
+        t1 = D0;                                                               \
+        B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0));               \
+        B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1));               \
+        D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1));               \
+        D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1));               \
+    } while ((void)0, 0)
+#endif
+
+#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1)                           \
+    do {                                                                       \
+        G1(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+        G2(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+                                                                               \
+        DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1);                           \
+                                                                               \
+        G1(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+        G2(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+                                                                               \
+        UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1);                         \
+    } while ((void)0, 0)
+#else /* __AVX2__ */
+
+#include <immintrin.h>
+
+#define rotr32(x)   _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
+#define rotr24(x)   _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
+#define rotr16(x)   _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
+#define rotr63(x)   _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
+
+#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do { \
+        __m256i ml = _mm256_mul_epu32(A0, B0); \
+        ml = _mm256_add_epi64(ml, ml); \
+        A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
+        D0 = _mm256_xor_si256(D0, A0); \
+        D0 = rotr32(D0); \
+        \
+        ml = _mm256_mul_epu32(C0, D0); \
+        ml = _mm256_add_epi64(ml, ml); \
+        C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
+        \
+        B0 = _mm256_xor_si256(B0, C0); \
+        B0 = rotr24(B0); \
+        \
+        ml = _mm256_mul_epu32(A1, B1); \
+        ml = _mm256_add_epi64(ml, ml); \
+        A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
+        D1 = _mm256_xor_si256(D1, A1); \
+        D1 = rotr32(D1); \
+        \
+        ml = _mm256_mul_epu32(C1, D1); \
+        ml = _mm256_add_epi64(ml, ml); \
+        C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
+        \
+        B1 = _mm256_xor_si256(B1, C1); \
+        B1 = rotr24(B1); \
+    } while((void)0, 0);
+
+#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do { \
+        __m256i ml = _mm256_mul_epu32(A0, B0); \
+        ml = _mm256_add_epi64(ml, ml); \
+        A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
+        D0 = _mm256_xor_si256(D0, A0); \
+        D0 = rotr16(D0); \
+        \
+        ml = _mm256_mul_epu32(C0, D0); \
+        ml = _mm256_add_epi64(ml, ml); \
+        C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
+        B0 = _mm256_xor_si256(B0, C0); \
+        B0 = rotr63(B0); \
+        \
+        ml = _mm256_mul_epu32(A1, B1); \
+        ml = _mm256_add_epi64(ml, ml); \
+        A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
+        D1 = _mm256_xor_si256(D1, A1); \
+        D1 = rotr16(D1); \
+        \
+        ml = _mm256_mul_epu32(C1, D1); \
+        ml = _mm256_add_epi64(ml, ml); \
+        C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
+        B1 = _mm256_xor_si256(B1, C1); \
+        B1 = rotr63(B1); \
+    } while((void)0, 0);
+
+#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+    do { \
+        B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
+        C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+        D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
+        \
+        B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
+        C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+        D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
+    } while((void)0, 0);
+
+#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do { \
+        __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
+        __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
+        B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+        B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+        \
+        tmp1 = C0; \
+        C0 = C1; \
+        C1 = tmp1; \
+        \
+        tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
+        tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
+        D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+        D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+    } while(0);
+
+#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+    do { \
+        B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
+        C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+        D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
+        \
+        B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
+        C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+        D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
+    } while((void)0, 0);
+
+#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do { \
+        __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
+        __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
+        B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+        B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+        \
+        tmp1 = C0; \
+        C0 = C1; \
+        C1 = tmp1; \
+        \
+        tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
+        tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
+        D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+        D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+    } while((void)0, 0);
+
+#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do{ \
+        G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+        \
+        G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+    } while((void)0, 0);
+
+#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do{ \
+        G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    } while((void)0, 0);
+
+#endif /* __AVX2__ */
+
+#else /* __AVX512F__ */
+
+#include <immintrin.h>
+
+#define ror64(x, n) _mm512_ror_epi64((x), (n))
+
+static BLAKE2_INLINE __m512i muladd(__m512i x, __m512i y)
+{
+    __m512i z = _mm512_mul_epu32(x, y);
+    return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
+}
+
+#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
+    do { \
+        A0 = muladd(A0, B0); \
+        A1 = muladd(A1, B1); \
+\
+        D0 = _mm512_xor_si512(D0, A0); \
+        D1 = _mm512_xor_si512(D1, A1); \
+\
+        D0 = ror64(D0, 32); \
+        D1 = ror64(D1, 32); \
+\
+        C0 = muladd(C0, D0); \
+        C1 = muladd(C1, D1); \
+\
+        B0 = _mm512_xor_si512(B0, C0); \
+        B1 = _mm512_xor_si512(B1, C1); \
+\
+        B0 = ror64(B0, 24); \
+        B1 = ror64(B1, 24); \
+    } while ((void)0, 0)
+
+#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
+    do { \
+        A0 = muladd(A0, B0); \
+        A1 = muladd(A1, B1); \
+\
+        D0 = _mm512_xor_si512(D0, A0); \
+        D1 = _mm512_xor_si512(D1, A1); \
+\
+        D0 = ror64(D0, 16); \
+        D1 = ror64(D1, 16); \
+\
+        C0 = muladd(C0, D0); \
+        C1 = muladd(C1, D1); \
+\
+        B0 = _mm512_xor_si512(B0, C0); \
+        B1 = _mm512_xor_si512(B1, C1); \
+\
+        B0 = ror64(B0, 63); \
+        B1 = ror64(B1, 63); \
+    } while ((void)0, 0)
+
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+    do { \
+        B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
+        B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
+\
+        C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+        C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+\
+        D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
+        D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
+    } while ((void)0, 0)
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+    do { \
+        B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
+        B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
+\
+        C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+        C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+\
+        D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
+        D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
+    } while ((void)0, 0)
+
+#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
+    do { \
+        G1(A0, B0, C0, D0, A1, B1, C1, D1); \
+        G2(A0, B0, C0, D0, A1, B1, C1, D1); \
+\
+        DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
+\
+        G1(A0, B0, C0, D0, A1, B1, C1, D1); \
+        G2(A0, B0, C0, D0, A1, B1, C1, D1); \
+\
+        UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
+    } while ((void)0, 0)
+
+#define SWAP_HALVES(A0, A1) \
+    do { \
+        __m512i t0, t1; \
+        t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
+        t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
+        A0 = t0; \
+        A1 = t1; \
+    } while((void)0, 0)
+
+#define SWAP_QUARTERS(A0, A1) \
+    do { \
+        SWAP_HALVES(A0, A1); \
+        A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
+        A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
+    } while((void)0, 0)
+
+#define UNSWAP_QUARTERS(A0, A1) \
+    do { \
+        A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
+        A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
+        SWAP_HALVES(A0, A1); \
+    } while((void)0, 0)
+
+#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
+    do { \
+        SWAP_HALVES(A0, B0); \
+        SWAP_HALVES(C0, D0); \
+        SWAP_HALVES(A1, B1); \
+        SWAP_HALVES(C1, D1); \
+        BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
+        SWAP_HALVES(A0, B0); \
+        SWAP_HALVES(C0, D0); \
+        SWAP_HALVES(A1, B1); \
+        SWAP_HALVES(C1, D1); \
+    } while ((void)0, 0)
+
+#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do { \
+        SWAP_QUARTERS(A0, A1); \
+        SWAP_QUARTERS(B0, B1); \
+        SWAP_QUARTERS(C0, C1); \
+        SWAP_QUARTERS(D0, D1); \
+        BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
+        UNSWAP_QUARTERS(A0, A1); \
+        UNSWAP_QUARTERS(B0, B1); \
+        UNSWAP_QUARTERS(C0, C1); \
+        UNSWAP_QUARTERS(D0, D1); \
+    } while ((void)0, 0)
+
+#endif /* __AVX512F__ */
+
+#else /* __NEON__ */
+
+static BLAKE2_INLINE uint64x2_t fBlaMka(uint64x2_t x, uint64x2_t y) {
+    const uint64x2_t z = vmull_u32(vmovn_u64(x), vmovn_u64(y));
+    return vaddq_u64(vaddq_u64(x, y), vaddq_u64(z, z));
+}
+
+#define vrorq_n_u64_32(x) vreinterpretq_u64_u32(vrev64q_u32(vreinterpretq_u32_u64((x))))
+
+#define vrorq_n_u64_24(x) vcombine_u64( \
+    vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_low_u64(x)), vreinterpret_u8_u64(vget_low_u64(x)), 3)), \
+    vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_high_u64(x)), vreinterpret_u8_u64(vget_high_u64(x)), 3)))
+
+#define vrorq_n_u64_16(x) vcombine_u64( \
+    vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_low_u64(x)), vreinterpret_u8_u64(vget_low_u64(x)), 2)), \
+    vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_high_u64(x)), vreinterpret_u8_u64(vget_high_u64(x)), 2)))
+
+#define vrorq_n_u64_63(x) veorq_u64(vaddq_u64(x, x), vshrq_n_u64(x, 63))
+
+#define G1(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
+do {                                                                       \
+    A0 = fBlaMka(A0, B0);                                                  \
+    A1 = fBlaMka(A1, B1);                                                  \
+    \
+    D0 = veorq_u64(D0, A0);                                            \
+    D1 = veorq_u64(D1, A1);                                            \
+    \
+    D0 = vrorq_n_u64_32(D0);                                          \
+    D1 = vrorq_n_u64_32(D1);                                          \
+    \
+    C0 = fBlaMka(C0, D0);                                                  \
+    C1 = fBlaMka(C1, D1);                                                  \
+    \
+    B0 = veorq_u64(B0, C0);                                            \
+    B1 = veorq_u64(B1, C1);                                            \
+    \
+    B0 = vrorq_n_u64_24(B0);                                          \
+    B1 = vrorq_n_u64_24(B1);                                          \
+} while ((void)0, 0)
+
+#define G2(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
+do {                                                                       \
+    A0 = fBlaMka(A0, B0);                                                  \
+    A1 = fBlaMka(A1, B1);                                                  \
+    \
+    D0 = veorq_u64(D0, A0);                                            \
+    D1 = veorq_u64(D1, A1);                                            \
+    \
+    D0 = vrorq_n_u64_16(D0);                                          \
+    D1 = vrorq_n_u64_16(D1);                                          \
+    \
+    C0 = fBlaMka(C0, D0);                                                  \
+    C1 = fBlaMka(C1, D1);                                                  \
+    \
+    B0 = veorq_u64(B0, C0);                                            \
+    B1 = veorq_u64(B1, C1);                                            \
+    \
+    B0 = vrorq_n_u64_63(B0);                                          \
+    B1 = vrorq_n_u64_63(B1);                                          \
+} while ((void)0, 0)
+
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+    t0 = vextq_u64(B0, B1, 1); \
+    t1 = vextq_u64(B1, B0, 1); \
+    B0 = t0; B1 = t1; t0 = C0;  C0 = C1; C1 = t0; \
+    t0 = vextq_u64(D1, D0, 1); t1 = vextq_u64(D0, D1, 1); \
+    D0 = t0; D1 = t1;
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+    t0 = vextq_u64(B1, B0, 1); \
+    t1 = vextq_u64(B0, B1, 1); \
+    B0 = t0; B1 = t1; t0 = C0; C0 = C1; C1 = t0; \
+    t0 = vextq_u64(D0, D1, 1); t1 = vextq_u64(D1, D0, 1); \
+    D0 = t0; D1 = t1;
+
+#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1)                           \
+do {                                                                       \
+    G1(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+    G2(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+    \
+    DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1);                           \
+    \
+    G1(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+    G2(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+    \
+    UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1);                         \
+} while ((void)0, 0)
+
+#endif /* __NEON__ */
+
+#endif /* BLAKE_ROUND_MKA_OPT_H */
diff --git a/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h
new file mode 100644
index 00000000..fb07a969
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h
@@ -0,0 +1,55 @@
+/*
+ * Argon2 reference source code package - reference C implementations
+ *
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+ *
+ * You may use this work under the terms of a Creative Commons CC0 1.0
+ * License/Waiver or the Apache Public License 2.0, at your option. The terms of
+ * these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * You should have received a copy of both of these licenses along with this
+ * software. If not, they may be obtained at the above URLs.
+ */
+
+#ifndef BLAKE_ROUND_MKA_H
+#define BLAKE_ROUND_MKA_H
+
+#include "../../argon2/blake2/blake2-impl.h"
+
+/* designed by the Lyra PHC team */
+static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
+    const uint64_t m = UINT64_C(0xFFFFFFFF);
+    const uint64_t xy = (x & m) * (y & m);
+    return x + y + 2 * xy;
+}
+
+#define G(a, b, c, d)                                                          \
+    do {                                                                       \
+        a = fBlaMka(a, b);                                                     \
+        d = rotr64(d ^ a, 32);                                                 \
+        c = fBlaMka(c, d);                                                     \
+        b = rotr64(b ^ c, 24);                                                 \
+        a = fBlaMka(a, b);                                                     \
+        d = rotr64(d ^ a, 16);                                                 \
+        c = fBlaMka(c, d);                                                     \
+        b = rotr64(b ^ c, 63);                                                 \
+    } while ((void)0, 0)
+
+#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,   \
+                           v12, v13, v14, v15)                                 \
+    do {                                                                       \
+        G(v0, v4, v8, v12);                                                    \
+        G(v1, v5, v9, v13);                                                    \
+        G(v2, v6, v10, v14);                                                   \
+        G(v3, v7, v11, v15);                                                   \
+        G(v0, v5, v10, v15);                                                   \
+        G(v1, v6, v11, v12);                                                   \
+        G(v2, v7, v8, v13);                                                    \
+        G(v3, v4, v9, v14);                                                    \
+    } while ((void)0, 0)
+
+#endif
diff --git a/src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c
new file mode 100755
index 00000000..c01261ab
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c
@@ -0,0 +1,448 @@
+//
+// Created by Haifa Bogdan Adnan on 06/08/2018.
+//
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "../../../common/DLLImport.h"
+#include "../../argon2/Defs.h"
+#include "../../../common/DLLExport.h"
+
+#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64) || defined(__NEON__))
+#include "blamka-round-opt.h"
+#else
+#include "blamka-round-ref.h"
+#endif
+
+void copy_block(block *dst, const block *src) {
+    memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
+}
+
+void xor_block(block *dst, const block *src) {
+    int i;
+    for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
+        dst->v[i] ^= src->v[i];
+    }
+}
+
+#ifndef BUILD_REF
+
+#if defined(__AVX512F__)
+static void fill_block(__m512i *state, const block *ref_block,
+                       block *next_block, int with_xor, int keep) {
+    __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
+    unsigned int i;
+
+    if (with_xor) {
+        for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
+            state[i] = _mm512_xor_si512(
+                state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
+            block_XY[i] = _mm512_xor_si512(
+                state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i));
+        }
+    } else {
+        for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
+            block_XY[i] = state[i] = _mm512_xor_si512(
+                state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
+        }
+    }
+
+    for (i = 0; i < 2; ++i) {
+        BLAKE2_ROUND_1(
+            state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
+            state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
+    }
+
+    for (i = 0; i < 2; ++i) {
+        BLAKE2_ROUND_2(
+            state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
+            state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
+    }
+
+    if(keep) {
+        for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
+            state[i] = _mm512_xor_si512(state[i], block_XY[i]);
+            _mm512_storeu_si512((__m512i *)next_block->v + i, state[i]);
+        }
+    }
+    else {
+        for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
+            state[i] = _mm512_xor_si512(state[i], block_XY[i]);
+        }
+    }
+}
+#elif defined(__AVX2__)
+static void fill_block(__m256i *state, const block *ref_block,
+                       block *next_block, int with_xor, int keep) {
+    __m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
+    unsigned int i;
+
+    if (with_xor) {
+        for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+            state[i] = _mm256_xor_si256(
+                    state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
+            block_XY[i] = _mm256_xor_si256(
+                    state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
+        }
+    } else {
+        for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+            block_XY[i] = state[i] = _mm256_xor_si256(
+                    state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
+        }
+    }
+
+    for (i = 0; i < 4; ++i) {
+        BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
+                       state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
+    }
+
+    for (i = 0; i < 4; ++i) {
+        BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
+                       state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
+    }
+
+    if(keep) {
+        for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+            state[i] = _mm256_xor_si256(state[i], block_XY[i]);
+            _mm256_store_si256((__m256i *)next_block->v + i, state[i]);
+        }
+    }
+    else {
+        for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+            state[i] = _mm256_xor_si256(state[i], block_XY[i]);
+        }
+    }
+}
+#elif defined(__AVX__)
+
+#define I2D(x) _mm256_castsi256_pd(x)
+#define D2I(x) _mm256_castpd_si256(x)
+
+static void fill_block(__m128i *state, const block *ref_block,
+                       block *next_block, int with_xor, int keep) {
+    __m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
+    unsigned int i;
+
+    __m256i t;
+    __m256i *s256 = (__m256i *) state, *block256 = (__m256i *) block_XY;
+
+    if (with_xor) {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) {
+            t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \
+                I2D(_mm256_loadu_si256((const __m256i *)ref_block->v + i))));
+            _mm256_storeu_si256(s256 + i, t);
+            t = D2I(_mm256_xor_pd(I2D(t), \
+                I2D(_mm256_loadu_si256((const __m256i *)next_block->v + i))));
+            _mm256_storeu_si256(block256 + i, t);
+        }
+    } else {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) {
+            t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \
+                I2D(_mm256_loadu_si256((const __m256i *)ref_block->v + i))));
+            _mm256_storeu_si256(s256 + i, t);
+            _mm256_storeu_si256(block256 + i, t);
+        }
+    }
+
+    for (i = 0; i < 8; ++i) {
+        BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
+                     state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
+                     state[8 * i + 6], state[8 * i + 7]);
+    }
+
+    for (i = 0; i < 8; ++i) {
+        BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
+                     state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
+                     state[8 * 6 + i], state[8 * 7 + i]);
+    }
+
+    if(keep) {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) {
+            t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \
+                I2D(_mm256_loadu_si256(block256 + i))));
+
+            _mm256_storeu_si256(s256 + i, t);
+            _mm256_storeu_si256((__m256i *)next_block->v + i, t);
+        }
+    }
+    else {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) {
+            t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \
+                I2D(_mm256_loadu_si256(block256 + i))));
+
+            _mm256_storeu_si256(s256 + i, t);
+        }
+    }
+
+}
+#elif defined(__NEON__)
+static void fill_block(uint64x2_t *state, const block *ref_block,
+                       block *next_block, int with_xor, int keep) {
+    uint64x2_t block_XY[ARGON2_OWORDS_IN_BLOCK];
+    uint64x2_t t0, t1;
+
+    unsigned int i;
+
+    if (with_xor) {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+            state[i] = veorq_u64(state[i], vld1q_u64(ref_block->v + i*2));
+            block_XY[i] = veorq_u64(state[i], vld1q_u64(next_block->v + i*2));
+        }
+    } else {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+            block_XY[i] = state[i] = veorq_u64(state[i], vld1q_u64(ref_block->v + i*2));
+        }
+    }
+
+    for (i = 0; i < 8; ++i) {
+        BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
+                     state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
+                     state[8 * i + 6], state[8 * i + 7]);
+    }
+
+    for (i = 0; i < 8; ++i) {
+        BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
+                     state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
+                     state[8 * 6 + i], state[8 * 7 + i]);
+    }
+
+    if(keep) {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+            state[i] = veorq_u64(state[i], block_XY[i]);
+            vst1q_u64(next_block->v + i*2, state[i]);
+        }
+    }
+    else {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+            state[i] = veorq_u64(state[i], block_XY[i]);
+        }
+    }
+}
+#else
+static void fill_block(__m128i *state, const block *ref_block,
+                       block *next_block, int with_xor, int keep) {
+    __m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
+    unsigned int i;
+
+    if (with_xor) {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+            state[i] = _mm_xor_si128(
+                state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
+            block_XY[i] = _mm_xor_si128(
+                state[i], _mm_loadu_si128((const __m128i *)next_block->v + i));
+        }
+    } else {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+            block_XY[i] = state[i] = _mm_xor_si128(
+                state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
+        }
+    }
+
+    for (i = 0; i < 8; ++i) {
+        BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
+            state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
+            state[8 * i + 6], state[8 * i + 7]);
+    }
+
+    for (i = 0; i < 8; ++i) {
+        BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
+            state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
+            state[8 * 6 + i], state[8 * 7 + i]);
+    }
+
+    if(keep) {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+            state[i] = _mm_xor_si128(state[i], block_XY[i]);
+            _mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
+        }
+    }
+    else {
+        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+            state[i] = _mm_xor_si128(state[i], block_XY[i]);
+        }
+    }
+}
+#endif
+
+#else
+static void fill_block(block *prev_block, const block *ref_block,
+                       block *next_block, int with_xor, int keep) {
+    block block_tmp;
+    unsigned i;
+
+    xor_block(prev_block, ref_block);
+    copy_block(&block_tmp, prev_block);
+
+    if (with_xor && next_block != NULL) {
+        xor_block(&block_tmp, next_block);
+    }
+
+    /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
+     (16,17,..31)... finally (112,113,...127) */
+    for (i = 0; i < 8; ++i) {
+        BLAKE2_ROUND_NOMSG(
+                           prev_block->v[16 * i], prev_block->v[16 * i + 1], prev_block->v[16 * i + 2],
+                           prev_block->v[16 * i + 3], prev_block->v[16 * i + 4], prev_block->v[16 * i + 5],
+                           prev_block->v[16 * i + 6], prev_block->v[16 * i + 7], prev_block->v[16 * i + 8],
+                           prev_block->v[16 * i + 9], prev_block->v[16 * i + 10], prev_block->v[16 * i + 11],
+                           prev_block->v[16 * i + 12], prev_block->v[16 * i + 13], prev_block->v[16 * i + 14],
+                           prev_block->v[16 * i + 15]);
+    }
+
+    /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
+     (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
+    for (i = 0; i < 8; i++) {
+        BLAKE2_ROUND_NOMSG(
+                           prev_block->v[2 * i], prev_block->v[2 * i + 1], prev_block->v[2 * i + 16],
+                           prev_block->v[2 * i + 17], prev_block->v[2 * i + 32], prev_block->v[2 * i + 33],
+                           prev_block->v[2 * i + 48], prev_block->v[2 * i + 49], prev_block->v[2 * i + 64],
+                           prev_block->v[2 * i + 65], prev_block->v[2 * i + 80], prev_block->v[2 * i + 81],
+                           prev_block->v[2 * i + 96], prev_block->v[2 * i + 97], prev_block->v[2 * i + 112],
+                           prev_block->v[2 * i + 113]);
+    }
+
+    xor_block(prev_block, &block_tmp);
+    if(keep)
+        copy_block(next_block, prev_block);
+}
+
+#endif
+
+DLLEXPORT void *fill_memory_blocks(int threads, Argon2Profile *profile, void *user_data) {
+    void *memory = user_data;
+#ifndef  BUILD_REF
+#if defined(__AVX512F__)
+    __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
+    uint64_t buff_512[8];
+#elif defined(__AVX2__)
+    __m256i state[ARGON2_HWORDS_IN_BLOCK];
+    uint64_t buff_256[4];
+#elif defined(__x86_64__) || defined(_WIN64)
+    __m128i state[ARGON2_OWORDS_IN_BLOCK];
+#elif defined(__NEON__)
+    uint64x2_t state[ARGON2_OWORDS_IN_BLOCK];
+#endif
+#else
+    block state_;
+    block *state = &state_;
+#endif
+    int lane_length = profile->segSize * 4;
+    int seg_length = profile->segSize;
+    int suc_idx = profile->succesiveIdxs;
+
+    for(int thr = 0; thr < threads;thr++) {
+        block *ref_block = NULL, *curr_block = NULL;
+
+        int32_t ref_idx = 0;
+        int32_t cur_idx = 0;
+        int32_t prev_idx = 0;
+        int32_t seg_type = 0;
+        int32_t idx = 0;
+        int32_t keep = 1;
+        int32_t with_xor = 0;
+
+        block *blocks = (block *)((uint8_t*)memory + thr * profile->memSize);
+
+        int32_t *address = profile->blockRefs;
+
+        for(uint32_t s = 0; s < profile->segCount; s++) {
+            cur_idx = profile->segments[s * 3];
+            prev_idx = profile->segments[s * 3 + 1];
+            seg_type = profile->segments[s * 3 + 2];
+            keep = 1;
+            with_xor = (s >= profile->thrCost * 4) ? 1 : 0;
+
+            idx = (s < profile->thrCost) ? 2 : 0;
+
+            int32_t lane = s % profile->thrCost;
+            int32_t slice = (s / profile->thrCost) % 4;
+            int32_t pass = (s / profile->thrCost) / 4;
+
+            memcpy(state, (void *) (blocks + prev_idx), ARGON2_BLOCK_SIZE);
+
+            if(seg_type == 0) {
+                if(s < profile->thrCost)
+                    address = &profile->blockRefs[(s * (profile->segSize - 2)) * 3];
+                else
+                    address = &profile->blockRefs[(profile->thrCost * (profile->segSize - 2) + (s - profile->thrCost) * profile->segSize) * 3];
+            }
+
+            for (int i = idx; i < seg_length; ++i, cur_idx ++) {
+                if (seg_type == 1) { // data dependent addressing
+#ifndef  BUILD_REF
+#if defined(__AVX512F__)
+                    _mm512_storeu_si512(buff_512, state[0]);
+                    uint64_t pseudo_rand = buff_512[0];
+#elif defined(__AVX2__)
+                    _mm256_storeu_si256(buff_256, state[0]);
+                    uint64_t pseudo_rand = buff_256[0];
+#elif defined(__x86_64__) || defined(_WIN64)
+                    uint64_t pseudo_rand = _mm_cvtsi128_si64(state[0]);
+#elif defined(__NEON__)
+                    uint64_t pseudo_rand = 0;
+                    vst1q_lane_u64(&pseudo_rand, state[0], 0);
+#endif
+#else
+                    uint64_t pseudo_rand = state->v[0];
+#endif
+                    uint64_t ref_lane = ((pseudo_rand >> 32)) % profile->thrCost;
+                    uint32_t reference_area_size = 0;
+                    if(pass > 0) {
+                        if (lane == ref_lane) {
+                            reference_area_size = lane_length - seg_length + i - 1;
+                        } else {
+                            reference_area_size = lane_length - seg_length + ((i == 0) ? (-1) : 0);
+                        }
+                    }
+                    else {
+                        if (lane == ref_lane) {
+                            reference_area_size = slice * seg_length + i - 1;
+                        } else {
+                            reference_area_size = slice * seg_length + ((i == 0) ? (-1) : 0);
+                        }
+                    }
+                    uint64_t relative_position = pseudo_rand & 0xFFFFFFFF;
+                    relative_position = relative_position * relative_position >> 32;
+
+                    relative_position = reference_area_size - 1 -
+                                        (reference_area_size * relative_position >> 32);
+
+                    ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length;
+                }
+                else {
+                    ref_idx = address[1];
+                    if(suc_idx == 0)
+                        cur_idx = address[0];
+                    keep = address[2];
+
+                    address += 3;
+                }
+
+                ref_block = blocks + ref_idx;
+                curr_block = blocks + cur_idx;
+
+                fill_block(state, ref_block, curr_block, with_xor, keep);
+            }
+        }
+
+        uint32_t dst = -1;
+        for(; address < (profile->blockRefs + profile->blockRefsSize * 3); address += 3) {
+            if (address[2] == -1) {
+                curr_block = blocks + address[0];
+                ref_block = blocks + address[1];
+                dst = address[0];
+                xor_block(curr_block, ref_block);
+            }
+        }
+        if(dst != -1)
+            copy_block(blocks, blocks + dst);
+        else
+            copy_block(blocks, state);
+    }
+
+    return memory;
+}
+
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format
new file mode 100755
index 00000000..06ea346a
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format
@@ -0,0 +1,4 @@
+---
+Language:        Cpp
+BasedOnStyle:  Google
+...
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore
new file mode 100755
index 00000000..0690aa44
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore
@@ -0,0 +1 @@
+cmake_build/
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml
new file mode 100755
index 00000000..deafdfa7
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml
@@ -0,0 +1,91 @@
+language: c
+
+sudo: false
+
+cache:
+  directories:
+    - $HOME/cpu_features_archives
+
+matrix:
+  include:
+  - os: linux
+    compiler: gcc
+    env:
+      TOOLCHAIN=NATIVE
+      TARGET=native
+  - os: linux
+    compiler: clang
+    env:
+      TOOLCHAIN=NATIVE
+      TARGET=native
+  - os: osx
+    compiler: gcc
+    env:
+      TOOLCHAIN=NATIVE
+      TARGET=native
+  - os: osx
+    compiler: clang
+    env:
+      TOOLCHAIN=NATIVE
+      TARGET=native
+  - os: linux-ppc64le
+    compiler: gcc
+    env:
+      TOOLCHAIN=NATIVE
+      TARGET=native
+  - os: linux-ppc64le
+    compiler: clang
+    env:
+      TOOLCHAIN=NATIVE
+      TARGET=native
+  # Toolchains for little-endian, 64-bit ARMv8 for GNU/Linux systems
+  - os: linux
+    env:
+      TOOLCHAIN=LINARO
+      TARGET=aarch64-linux-gnu
+      QEMU_ARCH=aarch64
+  # Toolchains for little-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
+  - os: linux
+    env:
+      TOOLCHAIN=LINARO
+      TARGET=arm-linux-gnueabihf
+      QEMU_ARCH=arm
+  # Toolchains for little-endian, 32-bit ARMv8 for GNU/Linux systems
+  - os: linux
+    env:
+      TOOLCHAIN=LINARO
+      TARGET=armv8l-linux-gnueabihf
+      QEMU_ARCH=arm
+  # Toolchains for little-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
+  - os: linux
+    env:
+      TOOLCHAIN=LINARO
+      TARGET=arm-linux-gnueabi
+      QEMU_ARCH=arm
+  # Toolchains for big-endian, 64-bit ARMv8 for GNU/Linux systems
+  - os: linux
+    env:
+      TOOLCHAIN=LINARO
+      TARGET=aarch64_be-linux-gnu
+      QEMU_ARCH=DISABLED
+  # Toolchains for big-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
+  - os: linux
+    env:
+      TOOLCHAIN=LINARO
+      TARGET=armeb-linux-gnueabihf
+      QEMU_ARCH=DISABLED
+  # Toolchains for big-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
+  - os: linux
+    env:
+      TOOLCHAIN=LINARO
+      TARGET=armeb-linux-gnueabi
+      QEMU_ARCH=DISABLED
+  - os: linux
+    env:
+      TOOLCHAIN=CODESCAPE
+      TARGET=mips-mti-linux-gnu
+      QEMU_ARCH=DISABLED
+
+script:
+  - cmake --version
+  - bash -e -x ./scripts/run_integration.sh
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt
new file mode 100755
index 00000000..591c1164
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt
@@ -0,0 +1,165 @@
+cmake_minimum_required(VERSION 3.0)
+
+project(CpuFeatures VERSION 0.1.0)
+
+# Default Build Type to be Release
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE "Release" CACHE STRING
+      "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+      FORCE)
+endif(NOT CMAKE_BUILD_TYPE)
+
+# BUILD_TESTING is a standard CMake variable, but we declare it here to make it
+# prominent in the GUI.
+option(BUILD_TESTING "Enable test (depends on googletest)." OFF)
+# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make
+# it prominent in the GUI.
+option(BUILD_SHARED_LIBS "Build library as shared." OFF)
+
+#
+# library : cpu_features
+#
+
+set(_HDRS
+  include/cpuinfo_aarch64.h
+  include/cpuinfo_arm.h
+  include/cpuinfo_mips.h
+  include/cpuinfo_ppc.h
+  include/cpuinfo_x86.h
+  include/cpu_features_macros.h
+)
+
+add_library(cpu_features
+  ${_HDRS}
+  include/internal/bit_utils.h
+  include/internal/linux_features_aggregator.h
+  include/internal/cpuid_x86.h
+  include/internal/filesystem.h
+  include/internal/hwcaps.h
+  include/internal/stack_line_reader.h
+  include/internal/string_view.h
+  include/cpu_features_macros.h
+  src/linux_features_aggregator.c
+  src/cpuid_x86_clang_gcc.c
+  src/cpuid_x86_msvc.c
+  src/cpuinfo_aarch64.c
+  src/cpuinfo_arm.c
+  src/cpuinfo_mips.c
+  src/cpuinfo_ppc.c
+  src/cpuinfo_x86.c
+  src/filesystem.c
+  src/hwcaps.c
+  src/stack_line_reader.c
+  src/string_view.c
+)
+
+target_include_directories(cpu_features
+  PUBLIC
+  $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
+  $<INSTALL_INTERFACE:include/cpu_features>
+  PRIVATE
+  include/internal
+)
+set_target_properties(cpu_features PROPERTIES PUBLIC_HEADER "${_HDRS}")
+target_compile_definitions(cpu_features
+  PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024)
+target_link_libraries(cpu_features PUBLIC ${CMAKE_DL_LIBS})
+
+# The use of shared libraries is discouraged.
+# For API / ABI compatibility reasons, it is recommended to build and use
+# cpu_features in a subdirectory of your project or as an embedded dependency.
+if(BUILD_SHARED_LIBS)
+  set_property(TARGET cpu_features PROPERTY POSITION_INDEPENDENT_CODE ON)
+endif()
+add_library(CpuFeature::cpu_features ALIAS cpu_features)
+
+#
+# program : list_cpu_features
+#
+
+add_executable(list_cpu_features src/utils/list_cpu_features.c)
+target_link_libraries(list_cpu_features PRIVATE cpu_features)
+add_executable(CpuFeature::list_cpu_features ALIAS list_cpu_features)
+
+#
+# tests
+#
+
+include(CTest)
+if(BUILD_TESTING)
+  # Automatically incorporate googletest into the CMake Project if target not
+  # found.
+  if(NOT TARGET gtest OR NOT TARGET gmock_main)
+    # Download and unpack googletest at configure time.
+    configure_file(
+      cmake/googletest.CMakeLists.txt.in
+      googletest-download/CMakeLists.txt
+    )
+
+    execute_process(
+      COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
+      RESULT_VARIABLE result
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download)
+
+    if(result)
+      message(FATAL_ERROR "CMake step for googletest failed: ${result}")
+    endif()
+
+    execute_process(
+      COMMAND ${CMAKE_COMMAND} --build .
+      RESULT_VARIABLE result
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download)
+
+    if(result)
+      message(FATAL_ERROR "Build step for googletest failed: ${result}")
+    endif()
+
+    # Prevent overriding the parent project's compiler/linker settings on
+    # Windows.
+    set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+
+    # Add googletest directly to our build. This defines the gtest and
+    # gtest_main targets.
+    add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src
+                     ${CMAKE_BINARY_DIR}/googletest-build
+                     EXCLUDE_FROM_ALL)
+  endif()
+
+  add_subdirectory(test)
+endif()
+
+#
+# Install
+#
+
+include(GNUInstallDirs)
+install(TARGETS cpu_features list_cpu_features
+  EXPORT CpuFeaturesTargets
+  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cpu_features
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
+install(EXPORT CpuFeaturesTargets
+  NAMESPACE CpuFeatures::
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures
+  COMPONENT Devel
+)
+include(CMakePackageConfigHelpers)
+configure_package_config_file(cmake/CpuFeaturesConfig.cmake.in
+  "${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake"
+  INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures"
+  NO_SET_AND_CHECK_MACRO
+  NO_CHECK_REQUIRED_COMPONENTS_MACRO
+)
+write_basic_package_version_file(
+  "${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake"
+  COMPATIBILITY SameMajorVersion
+)
+install(
+  FILES
+  "${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake"
+  "${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake"
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures"
+  COMPONENT Devel
+)
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md
new file mode 100755
index 00000000..c980350f
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md
@@ -0,0 +1,23 @@
+# How to Contribute
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE b/src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE
new file mode 100755
index 00000000..7a4a3ea2
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md b/src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md
new file mode 100755
index 00000000..039175b3
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md
@@ -0,0 +1,165 @@
+# cpu_features [![Build Status](https://travis-ci.org/google/cpu_features.svg?branch=master)](https://travis-ci.org/google/cpu_features) [![Build status](https://ci.appveyor.com/api/projects/status/46d1owsj7n8dsylq/branch/master?svg=true)](https://ci.appveyor.com/project/gchatelet/cpu-features/branch/master)
+
+A cross-platform C library to retrieve CPU features (such as available
+instructions) at runtime.
+
+## Table of Contents
+
+- [Design Rationale](#rationale)
+- [Code samples](#codesample)
+- [Running sample code](#usagesample)
+- [What's supported](#support)
+- [License](#license)
+- [Build with cmake](#cmake)
+
+<a name="rationale"></a>
+## Design Rationale
+
+-   **Simple to use.** See the snippets below for examples.
+-   **Extensible.** Easy to add missing features or architectures.
+-   **Compatible with old compilers** and available on many architectures so it
+    can be used widely. To ensure that cpu_features works on as many platforms
+    as possible, we implemented it in a highly portable version of C: C99.
+-   **Sandbox-compatible.** The library uses a variety of strategies to cope
+    with sandboxed environments or when `cpuid` is unavailable. This is useful
+    when running integration tests in hermetic environments.
+-   **Thread safe, no memory allocation, and raises no exceptions.**
+    cpu_features is suitable for implementing fundamental libc functions like
+    `malloc`, `memcpy`, and `memcmp`.
+-   **Unit tested.**
+
+<a name="codesample"></a>
+### Checking features at runtime
+
+Here's a simple example that executes a codepath if the CPU supports both the
+AES and the SSE4.2 instruction sets:
+
+```c
+#include "cpuinfo_x86.h"
+
+static const X86Features features = GetX86Info().features;
+
+void Compute(void) {
+  if (features.aes && features.sse4_2) {
+    // Run optimized code.
+  } else {
+    // Run standard code.
+  }
+}
+```
+
+### Caching for faster evaluation of complex checks
+
+If you wish, you can read all the features at once into a global variable, and
+then query for the specific features you care about. Below, we store all the ARM
+features and then check whether AES and NEON are supported.
+
+```c
+#include <stdbool.h>
+#include "cpuinfo_arm.h"
+
+static const ArmFeatures features = GetArmInfo().features;
+static const bool has_aes_and_neon = features.aes && features.neon;
+
+// use has_aes_and_neon.
+```
+
+This is a good approach to take if you're checking for combinations of features
+when using a compiler that is slow to extract individual bits from bit-packed
+structures.
+
+### Checking compile time flags
+
+The following code determines whether the compiler was told to use the AVX
+instruction set (e.g., `g++ -mavx`) and sets `has_avx` accordingly.
+
+```c
+#include <stdbool.h>
+#include "cpuinfo_x86.h"
+
+static const X86Features features = GetX86Info().features;
+static const bool has_avx = CPU_FEATURES_COMPILED_X86_AVX || features.avx;
+
+// use has_avx.
+```
+
+`CPU_FEATURES_COMPILED_X86_AVX` is set to 1 if the compiler was instructed to
+use AVX and 0 otherwise, combining compile time and runtime knowledge.
+
+### Rejecting poor hardware implementations based on microarchitecture
+
+On x86, the first incarnation of a feature in a microarchitecture might not be
+the most efficient (e.g. AVX on Sandy Bridge). We provide a function to retrieve
+the underlying microarchitecture so you can decide whether to use it.
+
+Below, `has_fast_avx` is set to 1 if the CPU supports the AVX instruction
+set&mdash;but only if it's not Sandy Bridge.
+
+```c
+#include <stdbool.h>
+#include "cpuinfo_x86.h"
+
+static const X86Info info = GetX86Info();
+static const X86Microarchitecture uarch = GetX86Microarchitecture(&info);
+static const bool has_fast_avx = info.features.avx && uarch != INTEL_SNB;
+
+// use has_fast_avx.
+```
+
+This feature is currently available only for x86 microarchitectures.
+
+<a name="usagesample"></a>
+### Running sample code
+
+Building `cpu_features` brings a small executable to test the library.
+
+```shell
+ % ./build/list_cpu_features
+arch            : x86
+brand           :        Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz
+family          :   6 (0x06)
+model           :  45 (0x2D)
+stepping        :   7 (0x07)
+uarch           : INTEL_SNB
+flags           : aes,avx,cx16,smx,sse4_1,sse4_2,ssse3
+```
+
+```shell
+% ./build/list_cpu_features --json
+{"arch":"x86","brand":"       Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz","family":6,"model":45,"stepping":7,"uarch":"INTEL_SNB","flags":["aes","avx","cx16","smx","sse4_1","sse4_2","ssse3"]}
+```
+
+<a name="support"></a>
+## What's supported
+
+|         | x86³ |   ARM   | AArch64 | MIPSel |  POWER  |
+|---------|:----:|:-------:|:-------:|:------:|:-------:|
+| Android | yes² |   yes¹  |   yes¹  |  yes¹  |   N/A   |
+| iOS     |  N/A | not yet | not yet |   N/A  |   N/A   |
+| Linux   | yes² |   yes¹  |   yes¹  |  yes¹  |   yes¹  |
+| MacOs   | yes² |   N/A   | not yet |   N/A  |    no   |
+| Windows | yes² | not yet | not yet |   N/A  |   N/A   |
+
+1.  **Features revealed from Linux.** We gather data from several sources
+    depending on availability:
+    +   from glibc's
+        [getauxval](https://www.gnu.org/software/libc/manual/html_node/Auxiliary-Vector.html)
+    +   by parsing `/proc/self/auxv`
+    +   by parsing `/proc/cpuinfo`
+2.  **Features revealed from CPU.** features are retrieved by using the `cpuid`
+    instruction.
+3.  **Microarchitecture detection.** On x86 some features are not always
+    implemented efficiently in hardware (e.g. AVX on Sandybridge). Exposing the
+    microarchitecture allows the client to reject particular microarchitectures.
+
+
+<a name="license"></a>
+## License
+
+The cpu_features library is licensed under the terms of the Apache license.
+See [LICENSE](LICENSE) for more information.
+
+<a name="cmake"></a>
+## Build with CMake
+
+Please check the [CMake build instructions](cmake/README.md).
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE b/src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE
new file mode 100755
index 00000000..8ea8a8b6
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE
@@ -0,0 +1,7 @@
+# ===== googletest =====
+
+git_repository(
+    name = "com_google_googletest",
+    remote = "https://github.com/google/googletest.git",
+    commit = "c3f65335b79f47b05629e79a54685d899bc53b93",
+)
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml b/src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml
new file mode 100755
index 00000000..f18635a3
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml
@@ -0,0 +1,24 @@
+version: '{build}'
+shallow_clone: true
+
+platform: x64
+
+environment:
+  matrix:
+  - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
+    CMAKE_GENERATOR: "Visual Studio 15 2017 Win64"
+  - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
+    CMAKE_GENERATOR: "Visual Studio 14 2015 Win64"
+
+matrix:
+  fast_finish: true
+
+before_build:
+  - cmake --version
+  - cmake -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON -H. -Bcmake_build -G "%CMAKE_GENERATOR%"
+
+build_script:
+  - cmake --build cmake_build --config Debug --target ALL_BUILD
+
+test_script:
+  - cmake --build cmake_build --config Debug --target RUN_TESTS
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in
new file mode 100755
index 00000000..e0bf10e4
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in
@@ -0,0 +1,3 @@
+# CpuFeatures CMake configuration file
+
+include("${CMAKE_CURRENT_LIST_DIR}/CpuFeaturesTargets.cmake")
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md
new file mode 100755
index 00000000..b6baeaa2
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md
@@ -0,0 +1,28 @@
+# CMake build instructions
+
+## Recommended usage : Incorporating cpu_features into a CMake project
+
+  For API / ABI compatibility reasons, it is recommended to build and use
+  cpu_features in a subdirectory of your project or as an embedded dependency.
+
+  This is similar to the recommended usage of the googletest framework
+  ( https://github.com/google/googletest/blob/master/googletest/README.md )
+
+  Build and use step-by-step
+
+
+  1- Download cpu_features and copy it in a sub-directory in your project.
+      or add cpu_features as a git-submodule in your project
+
+  2- You can then use the cmake command `add_subdirectory()` to include
+     cpu_features directly and use the `cpu_features` target in your project.
+
+  3- Add the `cpu_features` target to the `target_link_libraries()` section of
+     your executable or of your library.
+
+## Enabling tests
+
+  CMake default options for cpu_features is Release built type with tests
+  disabled. To enable testing set cmake `BUILD_TESTING` variable to `ON`,
+  [.travis.yml](../.travis.yml) and [appveyor.yml](../appveyor.yml) have up to
+  date examples.
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in
new file mode 100755
index 00000000..d60a33e9
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 2.8.2)
+
+project(googletest-download NONE)
+
+include(ExternalProject)
+ExternalProject_Add(googletest
+  GIT_REPOSITORY    https://github.com/google/googletest.git
+  GIT_TAG           master
+  SOURCE_DIR        "${CMAKE_BINARY_DIR}/googletest-src"
+  BINARY_DIR        "${CMAKE_BINARY_DIR}/googletest-build"
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND     ""
+  INSTALL_COMMAND   ""
+  TEST_COMMAND      ""
+)
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake
new file mode 100755
index 00000000..dcfab7cf
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake
@@ -0,0 +1,34 @@
+set(CMAKE_SYSTEM_NAME "Linux")
+set(CMAKE_SYSTEM_PROCESSOR "mips32")
+
+if (ENABLE_DSPR2 AND ENABLE_MSA)
+  message(FATAL_ERROR "ENABLE_DSPR2 and ENABLE_MSA cannot be combined.")
+endif ()
+
+if (ENABLE_DSPR2)
+  set(HAVE_DSPR2            1 CACHE BOOL "" FORCE)
+  set(MIPS_CFLAGS           "-mdspr2")
+  set(MIPS_CXXFLAGS         "-mdspr2")
+elseif (ENABLE_MSA)
+  set(HAVE_MSA 1 CACHE BOOL "" FORCE)
+  set(MIPS_CFLAGS           "-mmsa")
+  set(MIPS_CXXFLAGS         "-mmsa")
+endif ()
+
+if ("${MIPS_CPU}" STREQUAL "")
+  set(MIPS_CFLAGS           "${MIPS_CFLAGS} -mips32r2")
+  set(MIPS_CXXFLAGS         "${MIPS_CXXFLAGS} -mips32r2")
+elseif ("${MIPS_CPU}" STREQUAL "p5600")
+  set(P56_FLAGS             "-mips32r5 -mload-store-pairs -msched-weight -mhard-float -mfp64")
+  set(MIPS_CFLAGS           "${MIPS_CFLAGS} ${P56_FLAGS}")
+  set(MIPS_CXXFLAGS         "${MIPS_CXXFLAGS} ${P56_FLAGS}")
+  set(CMAKE_EXE_LINKER_FLAGS  "-mfp64 ${CMAKE_EXE_LINKER_FLAGS}")
+endif ()
+
+set(CMAKE_C_COMPILER        ${CROSS}gcc)
+set(CMAKE_CXX_COMPILER      ${CROSS}g++)
+set(AS_EXECUTABLE           ${CROSS}as)
+set(CMAKE_C_COMPILER_ARG1   "-EL ${MIPS_CFLAGS}")
+set(CMAKE_CXX_COMPILER_ARG1 "-EL ${MIPS_CXXFLAGS}")
+
+set(THREADS_PTHREAD_ARG "2" CACHE STRING "Forcibly set by CMakeLists.txt." FORCE)
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h
new file mode 100755
index 00000000..f8220e1b
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h
@@ -0,0 +1,125 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_
+#define CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_
+
+////////////////////////////////////////////////////////////////////////////////
+// Architectures
+////////////////////////////////////////////////////////////////////////////////
+
+#if ((defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
+      defined(__x86_64__)) &&                                     \
+     !defined(__pnacl__) && !defined(__CLR_VER))
+#define CPU_FEATURES_ARCH_X86
+#endif
+
+#if (defined(__arm__) || defined(_M_ARM))
+#define CPU_FEATURES_ARCH_ARM
+#endif
+
+#if defined(__aarch64__)
+#define CPU_FEATURES_ARCH_AARCH64
+#endif
+
+#if (defined(CPU_FEATURES_ARCH_AARCH64) || defined(CPU_FEATURES_ARCH_ARM))
+#define CPU_FEATURES_ARCH_ANY_ARM
+#endif
+
+#if defined(__mips__)
+#define CPU_FEATURES_ARCH_MIPS
+#endif
+
+#if defined(__powerpc__)
+#define CPU_FEATURES_ARCH_PPC
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Os
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__linux__)
+#define CPU_FEATURES_OS_LINUX_OR_ANDROID
+#endif
+
+#if defined(__ANDROID__)
+#define CPU_FEATURES_OS_ANDROID
+#endif
+
+#if (defined(_WIN64) || defined(_WIN32))
+#define CPU_FEATURES_OS_WINDOWS
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Compilers
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__clang__)
+#define CPU_FEATURES_COMPILER_CLANG
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#define CPU_FEATURES_COMPILER_GCC
+#endif
+
+#if defined(_MSC_VER)
+#define CPU_FEATURES_COMPILER_MSC
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Cpp
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__cplusplus)
+#define CPU_FEATURES_START_CPP_NAMESPACE \
+  namespace cpu_features {  \
+  extern "C" {
+#define CPU_FEATURES_END_CPP_NAMESPACE \
+  }                       \
+  }
+#else
+#define CPU_FEATURES_START_CPP_NAMESPACE
+#define CPU_FEATURES_END_CPP_NAMESPACE
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Compiler flags
+////////////////////////////////////////////////////////////////////////////////
+
+// Use the following to check if a feature is known to be available at compile
+// time. See README.md for an example.
+#if defined(CPU_FEATURES_ARCH_X86)
+#define CPU_FEATURES_COMPILED_X86_AES defined(__AES__)
+#define CPU_FEATURES_COMPILED_X86_F16C defined(__F16C__)
+#define CPU_FEATURES_COMPILED_X86_BMI defined(__BMI__)
+#define CPU_FEATURES_COMPILED_X86_BMI2 defined(__BMI2__)
+#define CPU_FEATURES_COMPILED_X86_SSE (defined(__SSE__) || (_M_IX86_FP >= 1))
+#define CPU_FEATURES_COMPILED_X86_SSE2 (defined(__SSE2__) || (_M_IX86_FP >= 2))
+#define CPU_FEATURES_COMPILED_X86_SSE3 defined(__SSE3__)
+#define CPU_FEATURES_COMPILED_X86_SSSE3 defined(__SSSE3__)
+#define CPU_FEATURES_COMPILED_X86_SSE4_1 defined(__SSE4_1__)
+#define CPU_FEATURES_COMPILED_X86_SSE4_2 defined(__SSE4_2__)
+#define CPU_FEATURES_COMPILED_X86_AVX defined(__AVX__)
+#define CPU_FEATURES_COMPILED_x86_AVX2 defined(__AVX2__)
+#endif
+
+#if defined(CPU_FEATURES_ARCH_ANY_ARM)
+#define CPU_FEATURES_COMPILED_ANY_ARM_NEON defined(__ARM_NEON__)
+#endif
+
+#if defined(CPU_FEATURES_ARCH_MIPS)
+#define CPU_FEATURES_COMPILED_MIPS_MSA defined(__mips_msa)
+#endif
+
+#endif  // CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h
new file mode 100755
index 00000000..b8826ed4
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h
@@ -0,0 +1,65 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_
+#define CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_
+
+#include "cpu_features_macros.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+typedef struct {
+  int fp : 1;     // Floating-point.
+  int asimd : 1;  // Advanced SIMD.
+  int aes : 1;    // Hardware-accelerated Advanced Encryption Standard.
+  int pmull : 1;  // Polynomial multiply long.
+  int sha1 : 1;   // Hardware-accelerated SHA1.
+  int sha2 : 1;   // Hardware-accelerated SHA2-256.
+  int crc32 : 1;  // Hardware-accelerated CRC-32.
+
+  // Make sure to update Aarch64FeaturesEnum below if you add a field here.
+} Aarch64Features;
+
+typedef struct {
+  Aarch64Features features;
+  int implementer;
+  int variant;
+  int part;
+  int revision;
+} Aarch64Info;
+
+Aarch64Info GetAarch64Info(void);
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+typedef enum {
+  AARCH64_FP,
+  AARCH64_ASIMD,
+  AARCH64_AES,
+  AARCH64_PMULL,
+  AARCH64_SHA1,
+  AARCH64_SHA2,
+  AARCH64_CRC32,
+  AARCH64_LAST_,
+} Aarch64FeaturesEnum;
+
+int GetAarch64FeaturesEnumValue(const Aarch64Features* features,
+                                Aarch64FeaturesEnum value);
+
+const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h
new file mode 100755
index 00000000..7a94bb08
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h
@@ -0,0 +1,80 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_
+#define CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_
+
+#include "cpu_features_macros.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+typedef struct {
+  int vfp : 1;       // Vector Floating Point.
+  int iwmmxt : 1;    // Intel Wireless MMX Technology.
+  int neon : 1;      // Advanced SIMD.
+  int vfpv3 : 1;     // VFP version 3
+  int vfpv3d16 : 1;  // VFP version 3 with 16 D-registers
+  int vfpv4 : 1;     // VFP version 4 with fast context switching
+  int idiva : 1;     // SDIV and UDIV hardware division in ARM mode.
+  int idivt : 1;     // SDIV and UDIV hardware division in Thumb mode.
+  int aes : 1;       // Hardware-accelerated Advanced Encryption Standard.
+  int pmull : 1;     // Polynomial multiply long.
+  int sha1 : 1;      // Hardware-accelerated SHA1.
+  int sha2 : 1;      // Hardware-accelerated SHA2-256.
+  int crc32 : 1;     // Hardware-accelerated CRC-32.
+
+  // Make sure to update ArmFeaturesEnum below if you add a field here.
+} ArmFeatures;
+
+typedef struct {
+  ArmFeatures features;
+  int implementer;
+  int architecture;
+  int variant;
+  int part;
+  int revision;
+} ArmInfo;
+
+// TODO(user): Add macros to know which features are present at compile
+// time.
+
+ArmInfo GetArmInfo(void);
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+typedef enum {
+  ARM_VFP,
+  ARM_IWMMXT,
+  ARM_NEON,
+  ARM_VFPV3,
+  ARM_VFPV3D16,
+  ARM_VFPV4,
+  ARM_IDIVA,
+  ARM_IDIVT,
+  ARM_AES,
+  ARM_PMULL,
+  ARM_SHA1,
+  ARM_SHA2,
+  ARM_CRC32,
+  ARM_LAST_,
+} ArmFeaturesEnum;
+
+int GetArmFeaturesEnumValue(const ArmFeatures* features, ArmFeaturesEnum value);
+
+const char* GetArmFeaturesEnumName(ArmFeaturesEnum);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h
new file mode 100755
index 00000000..48c23a16
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h
@@ -0,0 +1,53 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_
+#define CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_
+
+#include "cpu_features_macros.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+typedef struct {
+  int msa : 1;  // MIPS SIMD Architecture
+                // https://www.mips.com/products/architectures/ase/simd/
+  int eva : 1;  // Enhanced Virtual Addressing
+                // https://www.mips.com/products/architectures/mips64/
+
+  // Make sure to update MipsFeaturesEnum below if you add a field here.
+} MipsFeatures;
+
+typedef struct {
+  MipsFeatures features;
+} MipsInfo;
+
+MipsInfo GetMipsInfo(void);
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+typedef enum {
+  MIPS_MSA,
+  MIPS_EVA,
+  MIPS_LAST_,
+} MipsFeaturesEnum;
+
+int GetMipsFeaturesEnumValue(const MipsFeatures* features,
+                             MipsFeaturesEnum value);
+
+const char* GetMipsFeaturesEnumName(MipsFeaturesEnum);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h
new file mode 100755
index 00000000..654155da
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h
@@ -0,0 +1,141 @@
+// Copyright 2018 IBM
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_
+#define CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_
+
+#include "cpu_features_macros.h"
+#include "internal/hwcaps.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+typedef struct {
+  int ppc32 : 1;
+  int ppc64 : 1;
+  int ppc601 : 1;
+  int altivec : 1;
+  int fpu : 1;
+  int mmu : 1;
+  int mac_4xx : 1;
+  int unifiedcache : 1;
+  int spe : 1;
+  int efpsingle : 1;
+  int efpdouble : 1;
+  int no_tb : 1;
+  int power4 : 1;
+  int power5 : 1;
+  int power5plus : 1;
+  int cell : 1;
+  int booke : 1;
+  int smt : 1;
+  int icachesnoop : 1;
+  int arch205 : 1;
+  int pa6t : 1;
+  int dfp : 1;
+  int power6ext : 1;
+  int arch206 : 1;
+  int vsx : 1;
+  int pseries_perfmon_compat : 1;
+  int truele : 1;
+  int ppcle : 1;
+  int arch207 : 1;
+  int htm : 1;
+  int dscr : 1;
+  int ebb : 1;
+  int isel : 1;
+  int tar : 1;
+  int vcrypto : 1;
+  int htm_nosc : 1;
+  int arch300 : 1;
+  int ieee128 : 1;
+  int darn : 1;
+  int scv : 1;
+  int htm_no_suspend : 1;
+
+  // Make sure to update PPCFeaturesEnum below if you add a field here.
+} PPCFeatures;
+
+typedef struct {
+  PPCFeatures features;
+} PPCInfo;
+
+// This function is guaranteed to be malloc, memset and memcpy free.
+PPCInfo GetPPCInfo(void);
+
+typedef struct {
+  char platform[64];  // 0 terminated string
+  char model[64];     // 0 terminated string
+  char machine[64];   // 0 terminated string
+  char cpu[64];       // 0 terminated string
+  PlatformType type;
+} PPCPlatformStrings;
+
+PPCPlatformStrings GetPPCPlatformStrings(void);
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+typedef enum {
+  PPC_32,          /* 32 bit mode execution */
+  PPC_64,          /* 64 bit mode execution */
+  PPC_601_INSTR,   /* Old POWER ISA */
+  PPC_HAS_ALTIVEC, /* SIMD Unit*/
+  PPC_HAS_FPU,     /* Floating Point Unit */
+  PPC_HAS_MMU,     /* Memory management unit */
+  PPC_HAS_4xxMAC,
+  PPC_UNIFIED_CACHE,  /* Unified instruction and data cache */
+  PPC_HAS_SPE,        /* Signal processing extention unit */
+  PPC_HAS_EFP_SINGLE, /* SPE single precision fpu */
+  PPC_HAS_EFP_DOUBLE, /* SPE double precision fpu */
+  PPC_NO_TB,          /* No timebase */
+  PPC_POWER4,
+  PPC_POWER5,
+  PPC_POWER5_PLUS,
+  PPC_CELL,  /* Cell broadband engine */
+  PPC_BOOKE, /* Embedded ISA */
+  PPC_SMT,   /* Simultaneous multi-threading */
+  PPC_ICACHE_SNOOP,
+  PPC_ARCH_2_05, /* ISA 2.05 - POWER6 */
+  PPC_PA6T,      /* PA Semi 6T core ISA */
+  PPC_HAS_DFP,   /* Decimal floating point unit */
+  PPC_POWER6_EXT,
+  PPC_ARCH_2_06,              /* ISA 2.06 - POWER7 */
+  PPC_HAS_VSX,                /* Vector-scalar extension */
+  PPC_PSERIES_PERFMON_COMPAT, /* Set of backwards compatibile performance
+                                 monitoring events */
+  PPC_TRUE_LE,
+  PPC_PPC_LE,
+  PPC_ARCH_2_07,      /* ISA 2.07 - POWER8 */
+  PPC_HTM,            /* Hardware Transactional Memory */
+  PPC_DSCR,           /* Data stream control register */
+  PPC_EBB,            /* Event base branching */
+  PPC_ISEL,           /* Integer select instructions */
+  PPC_TAR,            /* Target address register */
+  PPC_VEC_CRYPTO,     /* Vector cryptography instructions */
+  PPC_HTM_NOSC,       /* Transactions aborted when syscall made*/
+  PPC_ARCH_3_00,      /* ISA 3.00 - POWER9 */
+  PPC_HAS_IEEE128,    /* VSX IEEE Binary Float 128-bit */
+  PPC_DARN,           /* Deliver a random number instruction */
+  PPC_SCV,            /* scv syscall */
+  PPC_HTM_NO_SUSPEND, /* TM w/out suspended state */
+  PPC_LAST_,
+} PPCFeaturesEnum;
+
+int GetPPCFeaturesEnumValue(const PPCFeatures* features, PPCFeaturesEnum value);
+
+const char* GetPPCFeaturesEnumName(PPCFeaturesEnum);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h
new file mode 100755
index 00000000..0123ddbe
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h
@@ -0,0 +1,154 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CPU_FEATURES_INCLUDE_CPUINFO_X86_H_
+#define CPU_FEATURES_INCLUDE_CPUINFO_X86_H_
+
+#include "cpu_features_macros.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+// See https://en.wikipedia.org/wiki/CPUID for a list of x86 cpu features.
+typedef struct {
+  int aes : 1;
+  int erms : 1;
+  int f16c : 1;
+  int fma3 : 1;
+  int vpclmulqdq : 1;
+  int bmi1 : 1;
+  int bmi2 : 1;
+
+  int ssse3 : 1;
+  int sse4_1 : 1;
+  int sse4_2 : 1;
+
+  int avx : 1;
+  int avx2 : 1;
+
+  int avx512f : 1;
+  int avx512cd : 1;
+  int avx512er : 1;
+  int avx512pf : 1;
+  int avx512bw : 1;
+  int avx512dq : 1;
+  int avx512vl : 1;
+  int avx512ifma : 1;
+  int avx512vbmi : 1;
+  int avx512vbmi2 : 1;
+  int avx512vnni : 1;
+  int avx512bitalg : 1;
+  int avx512vpopcntdq : 1;
+  int avx512_4vnniw : 1;
+  int avx512_4vbmi2 : 1;
+
+  int smx : 1;
+  int sgx : 1;
+  int cx16 : 1;  // aka. CMPXCHG16B
+
+  // Make sure to update X86FeaturesEnum below if you add a field here.
+} X86Features;
+
+typedef struct {
+  X86Features features;
+  int family;
+  int model;
+  int stepping;
+  char vendor[13];  // 0 terminated string
+} X86Info;
+
+// Calls cpuid and returns an initialized X86info.
+// This function is guaranteed to be malloc, memset and memcpy free.
+X86Info GetX86Info(void);
+
+typedef enum {
+  X86_UNKNOWN,
+  INTEL_CORE,      // CORE
+  INTEL_PNR,       // PENRYN
+  INTEL_NHM,       // NEHALEM
+  INTEL_ATOM_BNL,  // BONNELL
+  INTEL_WSM,       // WESTMERE
+  INTEL_SNB,       // SANDYBRIDGE
+  INTEL_IVB,       // IVYBRIDGE
+  INTEL_ATOM_SMT,  // SILVERMONT
+  INTEL_HSW,       // HASWELL
+  INTEL_BDW,       // BROADWELL
+  INTEL_SKL,       // SKYLAKE
+  INTEL_ATOM_GMT,  // GOLDMONT
+  INTEL_KBL,       // KABY LAKE
+  INTEL_CFL,       // COFFEE LAKE
+  INTEL_CNL,       // CANNON LAKE
+  AMD_HAMMER,      // K8
+  AMD_K10,         // K10
+  AMD_BOBCAT,      // K14
+  AMD_BULLDOZER,   // K15
+  AMD_JAGUAR,      // K16
+  AMD_ZEN,         // K17
+} X86Microarchitecture;
+
+// Returns the underlying microarchitecture by looking at X86Info's vendor,
+// family and model.
+X86Microarchitecture GetX86Microarchitecture(const X86Info* info);
+
+// Calls cpuid and fills the brand_string.
+// - brand_string *must* be of size 49 (beware of array decaying).
+// - brand_string will be zero terminated.
+// - This function calls memcpy.
+void FillX86BrandString(char brand_string[49]);
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+typedef enum {
+  X86_AES,
+  X86_ERMS,
+  X86_F16C,
+  X86_FMA3,
+  X86_VPCLMULQDQ,
+  X86_BMI1,
+  X86_BMI2,
+  X86_SSSE3,
+  X86_SSE4_1,
+  X86_SSE4_2,
+  X86_AVX,
+  X86_AVX2,
+  X86_AVX512F,
+  X86_AVX512CD,
+  X86_AVX512ER,
+  X86_AVX512PF,
+  X86_AVX512BW,
+  X86_AVX512DQ,
+  X86_AVX512VL,
+  X86_AVX512IFMA,
+  X86_AVX512VBMI,
+  X86_AVX512VBMI2,
+  X86_AVX512VNNI,
+  X86_AVX512BITALG,
+  X86_AVX512VPOPCNTDQ,
+  X86_AVX512_4VNNIW,
+  X86_AVX512_4VBMI2,
+  X86_SMX,
+  X86_SGX,
+  X86_CX16,
+  X86_LAST_,
+} X86FeaturesEnum;
+
+int GetX86FeaturesEnumValue(const X86Features* features, X86FeaturesEnum value);
+
+const char* GetX86FeaturesEnumName(X86FeaturesEnum);
+
+const char* GetX86MicroarchitectureName(X86Microarchitecture);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_CPUINFO_X86_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h
new file mode 100755
index 00000000..75f0cdd5
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h
@@ -0,0 +1,39 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_
+#define CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include "cpu_features_macros.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+inline static bool IsBitSet(uint32_t reg, uint32_t bit) {
+  return (reg >> bit) & 0x1;
+}
+
+inline static uint32_t ExtractBitRange(uint32_t reg, uint32_t msb,
+                                       uint32_t lsb) {
+  const uint64_t bits = msb - lsb + 1;
+  const uint64_t mask = (1ULL << bits) - 1ULL;
+  assert(msb >= lsb);
+  return (reg >> lsb) & mask;
+}
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h
new file mode 100755
index 00000000..9dcee0de
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h
@@ -0,0 +1,37 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_
+#define CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_
+
+#include <stdint.h>
+
+#include "cpu_features_macros.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+// A struct to hold the result of a call to cpuid.
+typedef struct {
+  uint32_t eax, ebx, ecx, edx;
+} Leaf;
+
+// Retrieves the leaf for a particular cpuid.
+Leaf CpuId(uint32_t leaf_id);
+
+// Returns the eax value of the XCR0 register.
+uint32_t GetXCR0Eax(void);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h
new file mode 100755
index 00000000..33788813
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h
@@ -0,0 +1,38 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// An interface for the filesystem that allows mocking the filesystem in
+// unittests.
+#ifndef CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_
+#define CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include "cpu_features_macros.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+// Same as linux "open(filename, O_RDONLY)", retries automatically on EINTR.
+int CpuFeatures_OpenFile(const char* filename);
+
+// Same as linux "read(file_descriptor, buffer, buffer_size)", retries
+// automatically on EINTR.
+int CpuFeatures_ReadFile(int file_descriptor, void* buffer, size_t buffer_size);
+
+// Same as linux "close(file_descriptor)".
+void CpuFeatures_CloseFile(int file_descriptor);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h
new file mode 100755
index 00000000..830cde31
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h
@@ -0,0 +1,131 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Interface to retrieve hardware capabilities. It relies on Linux's getauxval
+// or `/proc/self/auxval` under the hood.
+#ifndef CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_
+#define CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_
+
+#include <stdint.h>
+#include "cpu_features_macros.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+// To avoid depending on the linux kernel we reproduce the architecture specific
+// constants here.
+
+// http://elixir.free-electrons.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h
+#define AARCH64_HWCAP_FP (1UL << 0)
+#define AARCH64_HWCAP_ASIMD (1UL << 1)
+#define AARCH64_HWCAP_AES (1UL << 3)
+#define AARCH64_HWCAP_PMULL (1UL << 4)
+#define AARCH64_HWCAP_SHA1 (1UL << 5)
+#define AARCH64_HWCAP_SHA2 (1UL << 6)
+#define AARCH64_HWCAP_CRC32 (1UL << 7)
+
+// http://elixir.free-electrons.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h
+#define ARM_HWCAP_VFP (1UL << 6)
+#define ARM_HWCAP_IWMMXT (1UL << 9)
+#define ARM_HWCAP_NEON (1UL << 12)
+#define ARM_HWCAP_VFPV3 (1UL << 13)
+#define ARM_HWCAP_VFPV3D16 (1UL << 14)
+#define ARM_HWCAP_VFPV4 (1UL << 16)
+#define ARM_HWCAP_IDIVA (1UL << 17)
+#define ARM_HWCAP_IDIVT (1UL << 18)
+#define ARM_HWCAP2_AES (1UL << 0)
+#define ARM_HWCAP2_PMULL (1UL << 1)
+#define ARM_HWCAP2_SHA1 (1UL << 2)
+#define ARM_HWCAP2_SHA2 (1UL << 3)
+#define ARM_HWCAP2_CRC32 (1UL << 4)
+
+// http://elixir.free-electrons.com/linux/latest/source/arch/mips/include/uapi/asm/hwcap.h
+#define MIPS_HWCAP_VZ (1UL << 0)
+#define MIPS_HWCAP_EVA (1UL << 1)
+#define MIPS_HWCAP_HTW (1UL << 2)
+#define MIPS_HWCAP_FPU (1UL << 3)
+#define MIPS_HWCAP_MIPS32R2 (1UL << 4)
+#define MIPS_HWCAP_MIPS32R5 (1UL << 5)
+#define MIPS_HWCAP_MIPS64R6 (1UL << 6)
+#define MIPS_HWCAP_DSPR1 (1UL << 7)
+#define MIPS_HWCAP_DSPR2 (1UL << 8)
+#define MIPS_HWCAP_MSA (1UL << 9)
+
+// http://elixir.free-electrons.com/linux/latest/source/arch/powerpc/include/uapi/asm/cputable.h
+#ifndef _UAPI__ASM_POWERPC_CPUTABLE_H
+/* in AT_HWCAP */
+#define PPC_FEATURE_32 0x80000000
+#define PPC_FEATURE_64 0x40000000
+#define PPC_FEATURE_601_INSTR 0x20000000
+#define PPC_FEATURE_HAS_ALTIVEC 0x10000000
+#define PPC_FEATURE_HAS_FPU 0x08000000
+#define PPC_FEATURE_HAS_MMU 0x04000000
+#define PPC_FEATURE_HAS_4xxMAC 0x02000000
+#define PPC_FEATURE_UNIFIED_CACHE 0x01000000
+#define PPC_FEATURE_HAS_SPE 0x00800000
+#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000
+#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000
+#define PPC_FEATURE_NO_TB 0x00100000
+#define PPC_FEATURE_POWER4 0x00080000
+#define PPC_FEATURE_POWER5 0x00040000
+#define PPC_FEATURE_POWER5_PLUS 0x00020000
+#define PPC_FEATURE_CELL 0x00010000
+#define PPC_FEATURE_BOOKE 0x00008000
+#define PPC_FEATURE_SMT 0x00004000
+#define PPC_FEATURE_ICACHE_SNOOP 0x00002000
+#define PPC_FEATURE_ARCH_2_05 0x00001000
+#define PPC_FEATURE_PA6T 0x00000800
+#define PPC_FEATURE_HAS_DFP 0x00000400
+#define PPC_FEATURE_POWER6_EXT 0x00000200
+#define PPC_FEATURE_ARCH_2_06 0x00000100
+#define PPC_FEATURE_HAS_VSX 0x00000080
+
+#define PPC_FEATURE_PSERIES_PERFMON_COMPAT 0x00000040
+
+/* Reserved - do not use                0x00000004 */
+#define PPC_FEATURE_TRUE_LE 0x00000002
+#define PPC_FEATURE_PPC_LE 0x00000001
+
+/* in AT_HWCAP2 */
+#define PPC_FEATURE2_ARCH_2_07 0x80000000
+#define PPC_FEATURE2_HTM 0x40000000
+#define PPC_FEATURE2_DSCR 0x20000000
+#define PPC_FEATURE2_EBB 0x10000000
+#define PPC_FEATURE2_ISEL 0x08000000
+#define PPC_FEATURE2_TAR 0x04000000
+#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
+#define PPC_FEATURE2_HTM_NOSC 0x01000000
+#define PPC_FEATURE2_ARCH_3_00 0x00800000
+#define PPC_FEATURE2_HAS_IEEE128 0x00400000
+#define PPC_FEATURE2_DARN 0x00200000
+#define PPC_FEATURE2_SCV 0x00100000
+#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000
+#endif
+
+typedef struct {
+  unsigned long hwcaps;
+  unsigned long hwcaps2;
+} HardwareCapabilities;
+
+HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void);
+
+typedef struct {
+  char platform[64];       // 0 terminated string
+  char base_platform[64];  // 0 terminated string
+} PlatformType;
+
+PlatformType CpuFeatures_GetPlatformType(void);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h
new file mode 100755
index 00000000..77661d4c
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h
@@ -0,0 +1,60 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// CapabilityConfig provides a way to map cpu features to hardware caps and
+// /proc/cpuinfo flags. We then provide functions to update capabilities from
+// either source.
+#ifndef CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_
+#define CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_
+
+#include <ctype.h>
+#include <stdint.h>
+#include "cpu_features_macros.h"
+#include "internal/hwcaps.h"
+#include "internal/string_view.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+// Use the following macro to declare setter functions to be used in
+// CapabilityConfig.
+#define DECLARE_SETTER(FeatureType, FeatureName)                    \
+  static void set_##FeatureName(void* const features, bool value) { \
+    ((FeatureType*)features)->FeatureName = value;                  \
+  }
+
+// Describes the relationship between hardware caps and /proc/cpuinfo flags.
+typedef struct {
+  const HardwareCapabilities hwcaps_mask;
+  const char* const proc_cpuinfo_flag;
+  void (*set_bit)(void* const, bool);  // setter for the corresponding bit.
+} CapabilityConfig;
+
+// For every config, looks into flags_line for the presence of the
+// corresponding proc_cpuinfo_flag, calls `set_bit` accordingly.
+// Note: features is a pointer to the underlying Feature struct.
+void CpuFeatures_SetFromFlags(const size_t configs_size,
+                              const CapabilityConfig* configs,
+                              const StringView flags_line,
+                              void* const features);
+
+// For every config, looks into hwcaps for the presence of the feature. Calls
+// `set_bit` with true if the hardware capability is found.
+// Note: features is a pointer to the underlying Feature struct.
+void CpuFeatures_OverrideFromHwCaps(const size_t configs_size,
+                                    const CapabilityConfig* configs,
+                                    const HardwareCapabilities hwcaps,
+                                    void* const features);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+#endif  // CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h
new file mode 100755
index 00000000..c540f6b2
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h
@@ -0,0 +1,49 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Reads a file line by line and stores the data on the stack. This allows
+// parsing files in one go without allocating.
+#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_
+#define CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_
+
+#include <stdbool.h>
+
+#include "cpu_features_macros.h"
+#include "internal/string_view.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+typedef struct {
+  char buffer[STACK_LINE_READER_BUFFER_SIZE];
+  StringView view;
+  int fd;
+  bool skip_mode;
+} StackLineReader;
+
+// Initializes a StackLineReader.
+void StackLineReader_Initialize(StackLineReader* reader, int fd);
+
+typedef struct {
+  StringView line;  // A view of the line.
+  bool eof;         // Nothing more to read, we reached EOF.
+  bool full_line;   // If false the line was truncated to
+                    // STACK_LINE_READER_BUFFER_SIZE.
+} LineResult;
+
+// Reads the file pointed to by fd and tries to read a full line.
+LineResult StackLineReader_NextLine(StackLineReader* reader);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h
new file mode 100755
index 00000000..aa3779c4
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h
@@ -0,0 +1,108 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A view over a piece of string. The view is not 0 terminated.
+#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_
+#define CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include "cpu_features_macros.h"
+
+CPU_FEATURES_START_CPP_NAMESPACE
+
+typedef struct {
+  const char* ptr;
+  size_t size;
+} StringView;
+
+#ifdef __cplusplus
+static const StringView kEmptyStringView = {NULL, 0};
+#else
+static const StringView kEmptyStringView;
+#endif
+
+// Returns a StringView from the provided string.
+// Passing NULL is valid only if size is 0.
+static inline StringView view(const char* str, const size_t size) {
+  StringView view;
+  view.ptr = str;
+  view.size = size;
+  return view;
+}
+
+static inline StringView str(const char* str) { return view(str, strlen(str)); }
+
+// Returns the index of the first occurrence of c in view or -1 if not found.
+int CpuFeatures_StringView_IndexOfChar(const StringView view, char c);
+
+// Returns the index of the first occurrence of sub_view in view or -1 if not
+// found.
+int CpuFeatures_StringView_IndexOf(const StringView view,
+                                   const StringView sub_view);
+
+// Returns whether a is equal to b (same content).
+bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b);
+
+// Returns whether a starts with b.
+bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b);
+
+// Removes count characters from the beginning of view or kEmptyStringView if
+// count if greater than view.size.
+StringView CpuFeatures_StringView_PopFront(const StringView str_view,
+                                           size_t count);
+
+// Removes count characters from the end of view or kEmptyStringView if count if
+// greater than view.size.
+StringView CpuFeatures_StringView_PopBack(const StringView str_view,
+                                          size_t count);
+
+// Keeps the count first characters of view or view if count if greater than
+// view.size.
+StringView CpuFeatures_StringView_KeepFront(const StringView str_view,
+                                            size_t count);
+
+// Retrieves the first character of view. If view is empty the behavior is
+// undefined.
+char CpuFeatures_StringView_Front(const StringView view);
+
+// Retrieves the last character of view. If view is empty the behavior is
+// undefined.
+char CpuFeatures_StringView_Back(const StringView view);
+
+// Removes leading and tailing space characters.
+StringView CpuFeatures_StringView_TrimWhitespace(StringView view);
+
+// Convert StringView to positive integer. e.g. "42", "0x2a".
+// Returns -1 on error.
+int CpuFeatures_StringView_ParsePositiveNumber(const StringView view);
+
+// Copies src StringView to dst buffer.
+void CpuFeatures_StringView_CopyString(const StringView src, char* dst,
+                                       size_t dst_size);
+
+// Checks if line contains the specified whitespace separated word.
+bool CpuFeatures_StringView_HasWord(const StringView line,
+                                    const char* const word);
+
+// Get key/value from line. key and value are separated by ": ".
+// key and value are cleaned up from leading and trailing whitespaces.
+bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line,
+                                                 StringView* key,
+                                                 StringView* value);
+
+CPU_FEATURES_END_CPP_NAMESPACE
+
+#endif  // CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh
new file mode 100755
index 00000000..a1de0d1e
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+
+readonly SCRIPT_FOLDER=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
+readonly PROJECT_FOLDER="${SCRIPT_FOLDER}/.."
+readonly ARCHIVE_FOLDER=~/cpu_features_archives
+readonly QEMU_INSTALL=${ARCHIVE_FOLDER}/qemu
+readonly DEFAULT_CMAKE_ARGS=" -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON"
+
+function extract() {
+  case $1 in
+    *.tar.bz2)   tar xjf "$1"    ;;
+    *.tar.xz)    tar xJf "$1"    ;;
+    *.tar.gz)    tar xzf "$1"    ;;
+    *)
+      echo "don't know how to extract '$1'..."
+      exit 1
+  esac
+}
+
+function unpackifnotexists() {
+  mkdir -p "${ARCHIVE_FOLDER}"
+  cd "${ARCHIVE_FOLDER}" || exit
+  local URL=$1
+  local RELATIVE_FOLDER=$2
+  local DESTINATION="${ARCHIVE_FOLDER}/${RELATIVE_FOLDER}"
+  if [[  ! -d "${DESTINATION}" ]] ; then
+    local ARCHIVE_NAME=$(echo ${URL} | sed 's/.*\///')
+    test -f "${ARCHIVE_NAME}" || wget -q "${URL}"
+    extract "${ARCHIVE_NAME}"
+  fi
+}
+
+function installqemuifneeded() {
+  local VERSION=${QEMU_VERSION:=2.11.1}
+  local ARCHES=${QEMU_ARCHES:=arm aarch64 i386 x86_64 mips mipsel}
+  local TARGETS=${QEMU_TARGETS:=$(echo "$ARCHES" | sed 's#$# #;s#\([^ ]*\) #\1-linux-user #g')}
+
+  if echo "${VERSION} ${TARGETS}" | cmp --silent ${QEMU_INSTALL}/.build -; then
+    echo "qemu ${VERSION} up to date!"
+    return 0
+  fi
+
+  echo "VERSION: ${VERSION}"
+  echo "TARGETS: ${TARGETS}"
+
+  rm -rf ${QEMU_INSTALL}
+
+  # Checking for a tarball before downloading makes testing easier :-)
+  local QEMU_URL="http://wiki.qemu-project.org/download/qemu-${VERSION}.tar.xz"
+  local QEMU_FOLDER="qemu-${VERSION}"
+  unpackifnotexists ${QEMU_URL} ${QEMU_FOLDER}
+  cd ${QEMU_FOLDER} || exit
+
+  ./configure \
+    --prefix="${QEMU_INSTALL}" \
+    --target-list="${TARGETS}" \
+    --disable-docs \
+    --disable-sdl \
+    --disable-gtk \
+    --disable-gnutls \
+    --disable-gcrypt \
+    --disable-nettle \
+    --disable-curses \
+    --static
+
+  make -j4
+  make install
+
+  echo "$VERSION $TARGETS" > ${QEMU_INSTALL}/.build
+}
+
+function assert_defined(){
+  local VALUE=${1}
+  : "${VALUE?"${1} needs to be defined"}"
+}
+
+function integrate() {
+  cd "${PROJECT_FOLDER}" || exit
+  cmake -H. -B"${BUILD_DIR}" ${DEFAULT_CMAKE_ARGS} ${CMAKE_ADDITIONAL_ARGS}
+  cmake --build "${BUILD_DIR}" --target all
+
+  if [[ -n "${QEMU_ARCH}" ]]; then
+    if [[ "${QEMU_ARCH}" == "DISABLED" ]]; then
+      QEMU="true || "
+    else
+      installqemuifneeded
+      QEMU="${QEMU_INSTALL}/bin/qemu-${QEMU_ARCH} ${QEMU_ARGS}"
+    fi
+  else
+    QEMU=""
+  fi
+  # Run tests
+  for test_binary in ${BUILD_DIR}/test/*_test; do ${QEMU} ${test_binary}; done
+  # Run demo program
+  ${QEMU} "${BUILD_DIR}/list_cpu_features"
+}
+
+function expand_linaro_config() {
+  assert_defined TARGET
+  local LINARO_ROOT_URL=https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11
+
+  local GCC_URL=${LINARO_ROOT_URL}/${TARGET}/gcc-linaro-7.2.1-2017.11-x86_64_${TARGET}.tar.xz
+  local GCC_RELATIVE_FOLDER="gcc-linaro-7.2.1-2017.11-x86_64_${TARGET}"
+  unpackifnotexists "${GCC_URL}" "${GCC_RELATIVE_FOLDER}"
+
+  local SYSROOT_URL=${LINARO_ROOT_URL}/${TARGET}/sysroot-glibc-linaro-2.25-2017.11-${TARGET}.tar.xz
+  local SYSROOT_RELATIVE_FOLDER=sysroot-glibc-linaro-2.25-2017.11-${TARGET}
+  unpackifnotexists "${SYSROOT_URL}" "${SYSROOT_RELATIVE_FOLDER}"
+
+  local SYSROOT_FOLDER=${ARCHIVE_FOLDER}/${SYSROOT_RELATIVE_FOLDER}
+  local GCC_FOLDER=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}
+
+  CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_SYSROOT=${SYSROOT_FOLDER}"
+  CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_C_COMPILER=${GCC_FOLDER}/bin/${TARGET}-gcc"
+  CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_CXX_COMPILER=${GCC_FOLDER}/bin/${TARGET}-g++"
+
+  CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER"
+  CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY"
+  CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=ONLY"
+
+  QEMU_ARGS+=" -L ${SYSROOT_FOLDER}"
+  QEMU_ARGS+=" -E LD_LIBRARY_PATH=/lib"
+}
+
+function expand_codescape_config() {
+  assert_defined TARGET
+  local FLAVOUR=${QEMU_ARCH}-r2-hard
+  local DATE=2016.05-03
+  local CODESCAPE_URL=http://codescape-mips-sdk.imgtec.com/components/toolchain/${DATE}/Codescape.GNU.Tools.Package.${DATE}.for.MIPS.MTI.Linux.CentOS-5.x86_64.tar.gz
+  local GCC_URL=${CODESCAPE_URL}
+  local GCC_RELATIVE_FOLDER=${TARGET}/${DATE}
+  unpackifnotexists "${GCC_URL}" "${GCC_RELATIVE_FOLDER}"
+
+  local SYSROOT_URL=${CODESCAPE_URL}
+  local SYSROOT_FOLDER=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}/sysroot/${FLAVOUR}
+  unpackifnotexists "${SYSROOT_URL}" "${SYSROOT_RELATIVE_FOLDER}"
+
+  CMAKE_ADDITIONAL_ARGS+=" -DENABLE_MSA=1"
+  CMAKE_ADDITIONAL_ARGS+=" -DMIPS_CPU=p5600"
+  CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_TOOLCHAIN_FILE=cmake/mips32-linux-gcc.cmake"
+  CMAKE_ADDITIONAL_ARGS+=" -DCROSS=${TARGET}-"
+  CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}"
+
+  QEMU_ARGS+=" -L ${SYSROOT_FOLDER}"
+  QEMU_ARGS+=" -E LD_LIBRARY_PATH=/lib"
+  QEMU_ARGS+=" -cpu P5600"
+}
+
+function expand_environment_and_integrate() {
+  assert_defined PROJECT_FOLDER
+  assert_defined TARGET
+
+  BUILD_DIR="${PROJECT_FOLDER}/cmake_build/${TARGET}"
+  mkdir -p "${BUILD_DIR}"
+
+  CMAKE_ADDITIONAL_ARGS=""
+  QEMU_ARGS=""
+
+  case ${TOOLCHAIN} in
+    LINARO)    expand_linaro_config     ;;
+    CODESCAPE) expand_codescape_config  ;;
+    NATIVE)    QEMU_ARCH=""             ;;
+    *)
+              echo "Unknown toolchain '${TOOLCHAIN}'..."
+              exit 1
+  esac
+  integrate
+}
+
+if [ "${CONTINUOUS_INTEGRATION}" = "true" ]; then
+  QEMU_ARCHES=${QEMU_ARCH}
+  expand_environment_and_integrate
+fi
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh
new file mode 100755
index 00000000..53d1d3b8
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh
@@ -0,0 +1,80 @@
+source "$(dirname -- "$0")"/run_integration.sh
+
+# Toolchains for little-endian, 64-bit ARMv8 for GNU/Linux systems
+function set_aarch64-linux-gnu() {
+  TOOLCHAIN=LINARO
+  TARGET=aarch64-linux-gnu
+  QEMU_ARCH=aarch64
+}
+
+# Toolchains for little-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems 
+function set_arm-linux-gnueabihf() {
+  TOOLCHAIN=LINARO
+  TARGET=arm-linux-gnueabihf
+  QEMU_ARCH=arm
+}
+
+# Toolchains for little-endian, 32-bit ARMv8 for GNU/Linux systems
+function set_armv8l-linux-gnueabihf() {
+  TOOLCHAIN=LINARO
+  TARGET=armv8l-linux-gnueabihf
+  QEMU_ARCH=arm
+}
+
+# Toolchains for little-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
+function set_arm-linux-gnueabi() {
+  TOOLCHAIN=LINARO
+  TARGET=arm-linux-gnueabi
+  QEMU_ARCH=arm
+}
+
+# Toolchains for big-endian, 64-bit ARMv8 for GNU/Linux systems
+function set_aarch64_be-linux-gnu() {
+  TOOLCHAIN=LINARO
+  TARGET=aarch64_be-linux-gnu
+  QEMU_ARCH="DISABLED"
+}
+
+# Toolchains for big-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
+function set_armeb-linux-gnueabihf() {
+  TOOLCHAIN=LINARO
+  TARGET=armeb-linux-gnueabihf
+  QEMU_ARCH="DISABLED"
+}
+
+# Toolchains for big-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
+function set_armeb-linux-gnueabi() {
+  TOOLCHAIN=LINARO
+  TARGET=armeb-linux-gnueabi
+  QEMU_ARCH="DISABLED"
+}
+
+
+function set_mips() {
+  TOOLCHAIN=CODESCAPE
+  TARGET=mips-mti-linux-gnu
+  QEMU_ARCH="DISABLED"
+}
+
+function set_native() {
+  TOOLCHAIN=NATIVE
+  TARGET=native
+  QEMU_ARCH=""
+}
+
+ENVIRONMENTS="
+  set_aarch64-linux-gnu
+  set_arm-linux-gnueabihf
+  set_armv8l-linux-gnueabihf
+  set_arm-linux-gnueabi
+  set_aarch64_be-linux-gnu
+  set_armeb-linux-gnueabihf
+  set_armeb-linux-gnueabi
+  set_native
+  set_mips
+"
+
+for SET_ENVIRONMENT in ${ENVIRONMENTS}; do
+  ${SET_ENVIRONMENT}
+  expand_environment_and_integrate
+done
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c
new file mode 100755
index 00000000..472e7125
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c
@@ -0,0 +1,36 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "internal/cpuid_x86.h"
+
+#if defined(CPU_FEATURES_ARCH_X86)
+#if defined(CPU_FEATURES_COMPILER_CLANG) || defined(CPU_FEATURES_COMPILER_GCC)
+
+#include <cpuid.h>
+
+Leaf CpuId(uint32_t leaf_id) {
+  Leaf leaf;
+  __cpuid_count(leaf_id, 0, leaf.eax, leaf.ebx, leaf.ecx, leaf.edx);
+  return leaf;
+}
+
+uint32_t GetXCR0Eax(void) {
+  uint32_t eax, edx;
+  __asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
+  return eax;
+}
+
+#endif  // defined(CPU_FEATURES_COMPILER_CLANG) ||
+        // defined(CPU_FEATURES_COMPILER_GCC)
+#endif  // defined(CPU_FEATURES_ARCH_X86)
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c
new file mode 100755
index 00000000..cd8f19f2
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c
@@ -0,0 +1,34 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "internal/cpuid_x86.h"
+
+#if defined(CPU_FEATURES_ARCH_X86) && defined(CPU_FEATURES_COMPILER_MSC)
+#include <immintrin.h>
+#include <intrin.h>  // For __cpuidex()
+
+Leaf CpuId(uint32_t leaf_id) {
+  Leaf leaf;
+  int data[4];
+  __cpuid(data, leaf_id);
+  leaf.eax = data[0];
+  leaf.ebx = data[1];
+  leaf.ecx = data[2];
+  leaf.edx = data[3];
+  return leaf;
+}
+
+uint32_t GetXCR0Eax(void) { return _xgetbv(0); }
+
+#endif  // defined(CPU_FEATURES_ARCH_X86) && defined(CPU_FEATURES_COMPILER_MSC)
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c
new file mode 100755
index 00000000..0d111ff9
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c
@@ -0,0 +1,141 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cpuinfo_aarch64.h"
+
+#include "internal/filesystem.h"
+#include "internal/hwcaps.h"
+#include "internal/linux_features_aggregator.h"
+#include "internal/stack_line_reader.h"
+#include "internal/string_view.h"
+
+#include <ctype.h>
+
+DECLARE_SETTER(Aarch64Features, fp)
+DECLARE_SETTER(Aarch64Features, asimd)
+DECLARE_SETTER(Aarch64Features, aes)
+DECLARE_SETTER(Aarch64Features, pmull)
+DECLARE_SETTER(Aarch64Features, sha1)
+DECLARE_SETTER(Aarch64Features, sha2)
+DECLARE_SETTER(Aarch64Features, crc32)
+
+static const CapabilityConfig kConfigs[] = {
+    {{AARCH64_HWCAP_FP, 0}, "fp", &set_fp},           //
+    {{AARCH64_HWCAP_ASIMD, 0}, "asimd", &set_asimd},  //
+    {{AARCH64_HWCAP_AES, 0}, "aes", &set_aes},        //
+    {{AARCH64_HWCAP_PMULL, 0}, "pmull", &set_pmull},  //
+    {{AARCH64_HWCAP_SHA1, 0}, "sha1", &set_sha1},     //
+    {{AARCH64_HWCAP_SHA2, 0}, "sha2", &set_sha2},     //
+    {{AARCH64_HWCAP_CRC32, 0}, "crc32", &set_crc32},  //
+};
+
+static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig);
+
+static bool HandleAarch64Line(const LineResult result,
+                              Aarch64Info* const info) {
+  StringView line = result.line;
+  StringView key, value;
+  if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) {
+    if (CpuFeatures_StringView_IsEquals(key, str("Features"))) {
+      CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, &info->features);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) {
+      info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) {
+      info->variant = CpuFeatures_StringView_ParsePositiveNumber(value);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) {
+      info->part = CpuFeatures_StringView_ParsePositiveNumber(value);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) {
+      info->revision = CpuFeatures_StringView_ParsePositiveNumber(value);
+    }
+  }
+  return !result.eof;
+}
+
+static void FillProcCpuInfoData(Aarch64Info* const info) {
+  const int fd = CpuFeatures_OpenFile("/proc/cpuinfo");
+  if (fd >= 0) {
+    StackLineReader reader;
+    StackLineReader_Initialize(&reader, fd);
+    for (;;) {
+      if (!HandleAarch64Line(StackLineReader_NextLine(&reader), info)) {
+        break;
+      }
+    }
+    CpuFeatures_CloseFile(fd);
+  }
+}
+
+static const Aarch64Info kEmptyAarch64Info;
+
+Aarch64Info GetAarch64Info(void) {
+  // capabilities are fetched from both getauxval and /proc/cpuinfo so we can
+  // have some information if the executable is sandboxed (aka no access to
+  // /proc/cpuinfo).
+  Aarch64Info info = kEmptyAarch64Info;
+
+  FillProcCpuInfoData(&info);
+  CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs,
+                                 CpuFeatures_GetHardwareCapabilities(),
+                                 &info.features);
+
+  return info;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+int GetAarch64FeaturesEnumValue(const Aarch64Features* features,
+                                Aarch64FeaturesEnum value) {
+  switch (value) {
+    case AARCH64_FP:
+      return features->fp;
+    case AARCH64_ASIMD:
+      return features->asimd;
+    case AARCH64_AES:
+      return features->aes;
+    case AARCH64_PMULL:
+      return features->pmull;
+    case AARCH64_SHA1:
+      return features->sha1;
+    case AARCH64_SHA2:
+      return features->sha2;
+    case AARCH64_CRC32:
+      return features->crc32;
+    case AARCH64_LAST_:
+      break;
+  }
+  return false;
+}
+
+const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum value) {
+  switch (value) {
+    case AARCH64_FP:
+      return "fp";
+    case AARCH64_ASIMD:
+      return "asimd";
+    case AARCH64_AES:
+      return "aes";
+    case AARCH64_PMULL:
+      return "pmull";
+    case AARCH64_SHA1:
+      return "sha1";
+    case AARCH64_SHA2:
+      return "sha2";
+    case AARCH64_CRC32:
+      return "crc32";
+    case AARCH64_LAST_:
+      break;
+  }
+  return "unknown feature";
+}
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c
new file mode 100755
index 00000000..3ea06419
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c
@@ -0,0 +1,259 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cpuinfo_arm.h"
+
+#include "internal/bit_utils.h"
+#include "internal/filesystem.h"
+#include "internal/hwcaps.h"
+#include "internal/linux_features_aggregator.h"
+#include "internal/stack_line_reader.h"
+#include "internal/string_view.h"
+
+#include <ctype.h>
+
+DECLARE_SETTER(ArmFeatures, vfp)
+DECLARE_SETTER(ArmFeatures, iwmmxt)
+DECLARE_SETTER(ArmFeatures, neon)
+DECLARE_SETTER(ArmFeatures, vfpv3)
+DECLARE_SETTER(ArmFeatures, vfpv3d16)
+DECLARE_SETTER(ArmFeatures, vfpv4)
+DECLARE_SETTER(ArmFeatures, idiva)
+DECLARE_SETTER(ArmFeatures, idivt)
+DECLARE_SETTER(ArmFeatures, aes)
+DECLARE_SETTER(ArmFeatures, pmull)
+DECLARE_SETTER(ArmFeatures, sha1)
+DECLARE_SETTER(ArmFeatures, sha2)
+DECLARE_SETTER(ArmFeatures, crc32)
+
+static const CapabilityConfig kConfigs[] = {
+    {{ARM_HWCAP_VFP, 0}, "vfp", &set_vfp},                 //
+    {{ARM_HWCAP_IWMMXT, 0}, "iwmmxt", &set_iwmmxt},        //
+    {{ARM_HWCAP_NEON, 0}, "neon", &set_neon},              //
+    {{ARM_HWCAP_VFPV3, 0}, "vfpv3", &set_vfpv3},           //
+    {{ARM_HWCAP_VFPV3D16, 0}, "vfpv3d16", &set_vfpv3d16},  //
+    {{ARM_HWCAP_VFPV4, 0}, "vfpv4", &set_vfpv4},           //
+    {{ARM_HWCAP_IDIVA, 0}, "idiva", &set_idiva},           //
+    {{ARM_HWCAP_IDIVT, 0}, "idivt", &set_idivt},           //
+    {{0, ARM_HWCAP2_AES}, "aes", &set_aes},                //
+    {{0, ARM_HWCAP2_PMULL}, "pmull", &set_pmull},          //
+    {{0, ARM_HWCAP2_SHA1}, "sha1", &set_sha1},             //
+    {{0, ARM_HWCAP2_SHA2}, "sha2", &set_sha2},             //
+    {{0, ARM_HWCAP2_CRC32}, "crc32", &set_crc32},          //
+};
+
+static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig);
+
+typedef struct {
+  bool processor_reports_armv6;
+  bool hardware_reports_goldfish;
+} ProcCpuInfoData;
+
+static int IndexOfNonDigit(StringView str) {
+  size_t index = 0;
+  while (str.size && isdigit(CpuFeatures_StringView_Front(str))) {
+    str = CpuFeatures_StringView_PopFront(str, 1);
+    ++index;
+  }
+  return index;
+}
+
+static bool HandleArmLine(const LineResult result, ArmInfo* const info,
+                          ProcCpuInfoData* const proc_info) {
+  StringView line = result.line;
+  StringView key, value;
+  if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) {
+    if (CpuFeatures_StringView_IsEquals(key, str("Features"))) {
+      CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, &info->features);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) {
+      info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) {
+      info->variant = CpuFeatures_StringView_ParsePositiveNumber(value);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) {
+      info->part = CpuFeatures_StringView_ParsePositiveNumber(value);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) {
+      info->revision = CpuFeatures_StringView_ParsePositiveNumber(value);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("CPU architecture"))) {
+      // CPU architecture is a number that may be followed by letters. e.g.
+      // "6TEJ", "7".
+      const StringView digits =
+          CpuFeatures_StringView_KeepFront(value, IndexOfNonDigit(value));
+      info->architecture = CpuFeatures_StringView_ParsePositiveNumber(digits);
+    } else if (CpuFeatures_StringView_IsEquals(key, str("Processor"))) {
+      proc_info->processor_reports_armv6 =
+          CpuFeatures_StringView_IndexOf(value, str("(v6l)")) >= 0;
+    } else if (CpuFeatures_StringView_IsEquals(key, str("Hardware"))) {
+      proc_info->hardware_reports_goldfish =
+          CpuFeatures_StringView_IsEquals(value, str("Goldfish"));
+    }
+  }
+  return !result.eof;
+}
+
+static uint32_t GetCpuId(const ArmInfo* const info) {
+  return (ExtractBitRange(info->implementer, 7, 0) << 24) |
+         (ExtractBitRange(info->variant, 3, 0) << 20) |
+         (ExtractBitRange(info->part, 11, 0) << 4) |
+         (ExtractBitRange(info->revision, 3, 0) << 0);
+}
+
+static void FixErrors(ArmInfo* const info,
+                      ProcCpuInfoData* const proc_cpu_info_data) {
+  // Fixing Samsung kernel reporting invalid cpu architecture.
+  // http://code.google.com/p/android/issues/detail?id=10812
+  if (proc_cpu_info_data->processor_reports_armv6 && info->architecture >= 7) {
+    info->architecture = 6;
+  }
+
+  // Handle kernel configuration bugs that prevent the correct reporting of CPU
+  // features.
+  switch (GetCpuId(info)) {
+    case 0x4100C080:
+      // Special case: The emulator-specific Android 4.2 kernel fails to report
+      // support for the 32-bit ARM IDIV instruction. Technically, this is a
+      // feature of the virtual CPU implemented by the emulator. Note that it
+      // could also support Thumb IDIV in the future, and this will have to be
+      // slightly updated.
+      if (info->architecture >= 7 &&
+          proc_cpu_info_data->hardware_reports_goldfish) {
+        info->features.idiva = true;
+      }
+      break;
+    case 0x511004D0:
+      // https://crbug.com/341598.
+      info->features.neon = false;
+      break;
+    case 0x510006F2:
+    case 0x510006F3:
+      // The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report
+      // IDIV support.
+      info->features.idiva = true;
+      info->features.idivt = true;
+      break;
+  }
+
+  // Propagate cpu features.
+  if (info->features.vfpv4) info->features.vfpv3 = true;
+  if (info->features.neon) info->features.vfpv3 = true;
+  if (info->features.vfpv3) info->features.vfp = true;
+}
+
+static void FillProcCpuInfoData(ArmInfo* const info,
+                                ProcCpuInfoData* proc_cpu_info_data) {
+  const int fd = CpuFeatures_OpenFile("/proc/cpuinfo");
+  if (fd >= 0) {
+    StackLineReader reader;
+    StackLineReader_Initialize(&reader, fd);
+    for (;;) {
+      if (!HandleArmLine(StackLineReader_NextLine(&reader), info,
+                         proc_cpu_info_data)) {
+        break;
+      }
+    }
+    CpuFeatures_CloseFile(fd);
+  }
+}
+
+static const ArmInfo kEmptyArmInfo;
+
+static const ProcCpuInfoData kEmptyProcCpuInfoData;
+
+ArmInfo GetArmInfo(void) {
+  // capabilities are fetched from both getauxval and /proc/cpuinfo so we can
+  // have some information if the executable is sandboxed (aka no access to
+  // /proc/cpuinfo).
+  ArmInfo info = kEmptyArmInfo;
+  ProcCpuInfoData proc_cpu_info_data = kEmptyProcCpuInfoData;
+
+  FillProcCpuInfoData(&info, &proc_cpu_info_data);
+  CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs,
+                                 CpuFeatures_GetHardwareCapabilities(),
+                                 &info.features);
+
+  FixErrors(&info, &proc_cpu_info_data);
+
+  return info;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+int GetArmFeaturesEnumValue(const ArmFeatures* features,
+                            ArmFeaturesEnum value) {
+  switch (value) {
+    case ARM_VFP:
+      return features->vfp;
+    case ARM_IWMMXT:
+      return features->iwmmxt;
+    case ARM_NEON:
+      return features->neon;
+    case ARM_VFPV3:
+      return features->vfpv3;
+    case ARM_VFPV3D16:
+      return features->vfpv3d16;
+    case ARM_VFPV4:
+      return features->vfpv4;
+    case ARM_IDIVA:
+      return features->idiva;
+    case ARM_IDIVT:
+      return features->idivt;
+    case ARM_AES:
+      return features->aes;
+    case ARM_PMULL:
+      return features->pmull;
+    case ARM_SHA1:
+      return features->sha1;
+    case ARM_SHA2:
+      return features->sha2;
+    case ARM_CRC32:
+      return features->crc32;
+    case ARM_LAST_:
+      break;
+  }
+  return false;
+}
+
+const char* GetArmFeaturesEnumName(ArmFeaturesEnum value) {
+  switch (value) {
+    case ARM_VFP:
+      return "vfp";
+    case ARM_IWMMXT:
+      return "iwmmxt";
+    case ARM_NEON:
+      return "neon";
+    case ARM_VFPV3:
+      return "vfpv3";
+    case ARM_VFPV3D16:
+      return "vfpv3d16";
+    case ARM_VFPV4:
+      return "vfpv4";
+    case ARM_IDIVA:
+      return "idiva";
+    case ARM_IDIVT:
+      return "idivt";
+    case ARM_AES:
+      return "aes";
+    case ARM_PMULL:
+      return "pmull";
+    case ARM_SHA1:
+      return "sha1";
+    case ARM_SHA2:
+      return "sha2";
+    case ARM_CRC32:
+      return "crc32";
+    case ARM_LAST_:
+      break;
+  }
+  return "unknown feature";
+}
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c
new file mode 100755
index 00000000..a61cdd81
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c
@@ -0,0 +1,98 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cpuinfo_mips.h"
+
+#include "internal/filesystem.h"
+#include "internal/linux_features_aggregator.h"
+#include "internal/stack_line_reader.h"
+#include "internal/string_view.h"
+
+DECLARE_SETTER(MipsFeatures, msa)
+DECLARE_SETTER(MipsFeatures, eva)
+
+static const CapabilityConfig kConfigs[] = {
+    {{MIPS_HWCAP_MSA, 0}, "msa", &set_msa},  //
+    {{MIPS_HWCAP_EVA, 0}, "eva", &set_eva},  //
+};
+static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig);
+
+static bool HandleMipsLine(const LineResult result,
+                           MipsFeatures* const features) {
+  StringView key, value;
+  // See tests for an example.
+  if (CpuFeatures_StringView_GetAttributeKeyValue(result.line, &key, &value)) {
+    if (CpuFeatures_StringView_IsEquals(key, str("ASEs implemented"))) {
+      CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, features);
+    }
+  }
+  return !result.eof;
+}
+
+static void FillProcCpuInfoData(MipsFeatures* const features) {
+  const int fd = CpuFeatures_OpenFile("/proc/cpuinfo");
+  if (fd >= 0) {
+    StackLineReader reader;
+    StackLineReader_Initialize(&reader, fd);
+    for (;;) {
+      if (!HandleMipsLine(StackLineReader_NextLine(&reader), features)) {
+        break;
+      }
+    }
+    CpuFeatures_CloseFile(fd);
+  }
+}
+
+static const MipsInfo kEmptyMipsInfo;
+
+MipsInfo GetMipsInfo(void) {
+  // capabilities are fetched from both getauxval and /proc/cpuinfo so we can
+  // have some information if the executable is sandboxed (aka no access to
+  // /proc/cpuinfo).
+  MipsInfo info = kEmptyMipsInfo;
+
+  FillProcCpuInfoData(&info.features);
+  CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs,
+                                 CpuFeatures_GetHardwareCapabilities(),
+                                 &info.features);
+  return info;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+int GetMipsFeaturesEnumValue(const MipsFeatures* features,
+                             MipsFeaturesEnum value) {
+  switch (value) {
+    case MIPS_MSA:
+      return features->msa;
+    case MIPS_EVA:
+      return features->eva;
+    case MIPS_LAST_:
+      break;
+  }
+  return false;
+}
+
+const char* GetMipsFeaturesEnumName(MipsFeaturesEnum value) {
+  switch (value) {
+    case MIPS_MSA:
+      return "msa";
+    case MIPS_EVA:
+      return "eva";
+    case MIPS_LAST_:
+      break;
+  }
+  return "unknown feature";
+}
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c
new file mode 100755
index 00000000..59b9ecca
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c
@@ -0,0 +1,358 @@
+// Copyright 2018 IBM.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdbool.h>
+#include <string.h>
+
+#include "cpuinfo_ppc.h"
+#include "internal/bit_utils.h"
+#include "internal/filesystem.h"
+#include "internal/linux_features_aggregator.h"
+#include "internal/stack_line_reader.h"
+#include "internal/string_view.h"
+
+DECLARE_SETTER(PPCFeatures, ppc32)
+DECLARE_SETTER(PPCFeatures, ppc64)
+DECLARE_SETTER(PPCFeatures, ppc601)
+DECLARE_SETTER(PPCFeatures, altivec)
+DECLARE_SETTER(PPCFeatures, fpu)
+DECLARE_SETTER(PPCFeatures, mmu)
+DECLARE_SETTER(PPCFeatures, mac_4xx)
+DECLARE_SETTER(PPCFeatures, unifiedcache)
+DECLARE_SETTER(PPCFeatures, spe)
+DECLARE_SETTER(PPCFeatures, efpsingle)
+DECLARE_SETTER(PPCFeatures, efpdouble)
+DECLARE_SETTER(PPCFeatures, no_tb)
+DECLARE_SETTER(PPCFeatures, power4)
+DECLARE_SETTER(PPCFeatures, power5)
+DECLARE_SETTER(PPCFeatures, power5plus)
+DECLARE_SETTER(PPCFeatures, cell)
+DECLARE_SETTER(PPCFeatures, booke)
+DECLARE_SETTER(PPCFeatures, smt)
+DECLARE_SETTER(PPCFeatures, icachesnoop)
+DECLARE_SETTER(PPCFeatures, arch205)
+DECLARE_SETTER(PPCFeatures, pa6t)
+DECLARE_SETTER(PPCFeatures, dfp)
+DECLARE_SETTER(PPCFeatures, power6ext)
+DECLARE_SETTER(PPCFeatures, arch206)
+DECLARE_SETTER(PPCFeatures, vsx)
+DECLARE_SETTER(PPCFeatures, pseries_perfmon_compat)
+DECLARE_SETTER(PPCFeatures, truele)
+DECLARE_SETTER(PPCFeatures, ppcle)
+DECLARE_SETTER(PPCFeatures, arch207)
+DECLARE_SETTER(PPCFeatures, htm)
+DECLARE_SETTER(PPCFeatures, dscr)
+DECLARE_SETTER(PPCFeatures, ebb)
+DECLARE_SETTER(PPCFeatures, isel)
+DECLARE_SETTER(PPCFeatures, tar)
+DECLARE_SETTER(PPCFeatures, vcrypto)
+DECLARE_SETTER(PPCFeatures, htm_nosc)
+DECLARE_SETTER(PPCFeatures, arch300)
+DECLARE_SETTER(PPCFeatures, ieee128)
+DECLARE_SETTER(PPCFeatures, darn)
+DECLARE_SETTER(PPCFeatures, scv)
+DECLARE_SETTER(PPCFeatures, htm_no_suspend)
+
+static const CapabilityConfig kConfigs[] = {
+    {{PPC_FEATURE_32, 0}, "ppc32", &set_ppc32},
+    {{PPC_FEATURE_64, 0}, "ppc64", &set_ppc64},
+    {{PPC_FEATURE_601_INSTR, 0}, "ppc601", &set_ppc601},
+    {{PPC_FEATURE_HAS_ALTIVEC, 0}, "altivec", &set_altivec},
+    {{PPC_FEATURE_HAS_FPU, 0}, "fpu", &set_fpu},
+    {{PPC_FEATURE_HAS_MMU, 0}, "mmu", &set_mmu},
+    {{PPC_FEATURE_HAS_4xxMAC, 0}, "4xxmac", &set_mac_4xx},
+    {{PPC_FEATURE_UNIFIED_CACHE, 0}, "ucache", &set_unifiedcache},
+    {{PPC_FEATURE_HAS_SPE, 0}, "spe", &set_spe},
+    {{PPC_FEATURE_HAS_EFP_SINGLE, 0}, "efpsingle", &set_efpsingle},
+    {{PPC_FEATURE_HAS_EFP_DOUBLE, 0}, "efpdouble", &set_efpdouble},
+    {{PPC_FEATURE_NO_TB, 0}, "notb", &set_no_tb},
+    {{PPC_FEATURE_POWER4, 0}, "power4", &set_power4},
+    {{PPC_FEATURE_POWER5, 0}, "power5", &set_power5},
+    {{PPC_FEATURE_POWER5_PLUS, 0}, "power5+", &set_power5plus},
+    {{PPC_FEATURE_CELL, 0}, "cellbe", &set_cell},
+    {{PPC_FEATURE_BOOKE, 0}, "booke", &set_booke},
+    {{PPC_FEATURE_SMT, 0}, "smt", &set_smt},
+    {{PPC_FEATURE_ICACHE_SNOOP, 0}, "ic_snoop", &set_icachesnoop},
+    {{PPC_FEATURE_ARCH_2_05, 0}, "arch_2_05", &set_arch205},
+    {{PPC_FEATURE_PA6T, 0}, "pa6t", &set_pa6t},
+    {{PPC_FEATURE_HAS_DFP, 0}, "dfp", &set_dfp},
+    {{PPC_FEATURE_POWER6_EXT, 0}, "power6x", &set_power6ext},
+    {{PPC_FEATURE_ARCH_2_06, 0}, "arch_2_06", &set_arch206},
+    {{PPC_FEATURE_HAS_VSX, 0}, "vsx", &set_vsx},
+    {{PPC_FEATURE_PSERIES_PERFMON_COMPAT, 0},
+     "archpmu",
+     &set_pseries_perfmon_compat},
+    {{PPC_FEATURE_TRUE_LE, 0}, "true_le", &set_truele},
+    {{PPC_FEATURE_PPC_LE, 0}, "ppcle", &set_ppcle},
+    {{0, PPC_FEATURE2_ARCH_2_07}, "arch_2_07", &set_arch207},
+    {{0, PPC_FEATURE2_HTM}, "htm", &set_htm},
+    {{0, PPC_FEATURE2_DSCR}, "dscr", &set_dscr},
+    {{0, PPC_FEATURE2_EBB}, "ebb", &set_ebb},
+    {{0, PPC_FEATURE2_ISEL}, "isel", &set_isel},
+    {{0, PPC_FEATURE2_TAR}, "tar", &set_tar},
+    {{0, PPC_FEATURE2_VEC_CRYPTO}, "vcrypto", &set_vcrypto},
+    {{0, PPC_FEATURE2_HTM_NOSC}, "htm-nosc", &set_htm_nosc},
+    {{0, PPC_FEATURE2_ARCH_3_00}, "arch_3_00", &set_arch300},
+    {{0, PPC_FEATURE2_HAS_IEEE128}, "ieee128", &set_ieee128},
+    {{0, PPC_FEATURE2_DARN}, "darn", &set_darn},
+    {{0, PPC_FEATURE2_SCV}, "scv", &set_scv},
+    {{0, PPC_FEATURE2_HTM_NO_SUSPEND}, "htm-no-suspend", &set_htm_no_suspend},
+};
+static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig);
+
+static bool HandlePPCLine(const LineResult result,
+                          PPCPlatformStrings* const strings) {
+  StringView line = result.line;
+  StringView key, value;
+  if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) {
+    if (CpuFeatures_StringView_HasWord(key, "platform")) {
+      CpuFeatures_StringView_CopyString(value, strings->platform,
+                                        sizeof(strings->platform));
+    } else if (CpuFeatures_StringView_IsEquals(key, str("model"))) {
+      CpuFeatures_StringView_CopyString(value, strings->model,
+                                        sizeof(strings->platform));
+    } else if (CpuFeatures_StringView_IsEquals(key, str("machine"))) {
+      CpuFeatures_StringView_CopyString(value, strings->machine,
+                                        sizeof(strings->platform));
+    } else if (CpuFeatures_StringView_IsEquals(key, str("cpu"))) {
+      CpuFeatures_StringView_CopyString(value, strings->cpu,
+                                        sizeof(strings->platform));
+    }
+  }
+  return !result.eof;
+}
+
+static void FillProcCpuInfoData(PPCPlatformStrings* const strings) {
+  const int fd = CpuFeatures_OpenFile("/proc/cpuinfo");
+  if (fd >= 0) {
+    StackLineReader reader;
+    StackLineReader_Initialize(&reader, fd);
+    for (;;) {
+      if (!HandlePPCLine(StackLineReader_NextLine(&reader), strings)) {
+        break;
+      }
+    }
+    CpuFeatures_CloseFile(fd);
+  }
+}
+
+static const PPCInfo kEmptyPPCInfo;
+
+PPCInfo GetPPCInfo(void) {
+  /*
+   * On Power feature flags aren't currently in cpuinfo so we only look at
+   * the auxilary vector.
+   */
+  PPCInfo info = kEmptyPPCInfo;
+
+  CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs,
+                                 CpuFeatures_GetHardwareCapabilities(),
+                                 &info.features);
+  return info;
+}
+
+static const PPCPlatformStrings kEmptyPPCPlatformStrings;
+
+PPCPlatformStrings GetPPCPlatformStrings(void) {
+  PPCPlatformStrings strings = kEmptyPPCPlatformStrings;
+
+  FillProcCpuInfoData(&strings);
+  strings.type = CpuFeatures_GetPlatformType();
+  return strings;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+int GetPPCFeaturesEnumValue(const PPCFeatures* features,
+                            PPCFeaturesEnum value) {
+  switch (value) {
+    case PPC_32:
+      return features->ppc32;
+    case PPC_64:
+      return features->ppc64;
+    case PPC_601_INSTR:
+      return features->ppc601;
+    case PPC_HAS_ALTIVEC:
+      return features->altivec;
+    case PPC_HAS_FPU:
+      return features->fpu;
+    case PPC_HAS_MMU:
+      return features->mmu;
+    case PPC_HAS_4xxMAC:
+      return features->mac_4xx;
+    case PPC_UNIFIED_CACHE:
+      return features->unifiedcache;
+    case PPC_HAS_SPE:
+      return features->spe;
+    case PPC_HAS_EFP_SINGLE:
+      return features->efpsingle;
+    case PPC_HAS_EFP_DOUBLE:
+      return features->efpdouble;
+    case PPC_NO_TB:
+      return features->no_tb;
+    case PPC_POWER4:
+      return features->power4;
+    case PPC_POWER5:
+      return features->power5;
+    case PPC_POWER5_PLUS:
+      return features->power5plus;
+    case PPC_CELL:
+      return features->cell;
+    case PPC_BOOKE:
+      return features->booke;
+    case PPC_SMT:
+      return features->smt;
+    case PPC_ICACHE_SNOOP:
+      return features->icachesnoop;
+    case PPC_ARCH_2_05:
+      return features->arch205;
+    case PPC_PA6T:
+      return features->pa6t;
+    case PPC_HAS_DFP:
+      return features->dfp;
+    case PPC_POWER6_EXT:
+      return features->power6ext;
+    case PPC_ARCH_2_06:
+      return features->arch206;
+    case PPC_HAS_VSX:
+      return features->vsx;
+    case PPC_PSERIES_PERFMON_COMPAT:
+      return features->pseries_perfmon_compat;
+    case PPC_TRUE_LE:
+      return features->truele;
+    case PPC_PPC_LE:
+      return features->ppcle;
+    case PPC_ARCH_2_07:
+      return features->arch207;
+    case PPC_HTM:
+      return features->htm;
+    case PPC_DSCR:
+      return features->dscr;
+    case PPC_EBB:
+      return features->ebb;
+    case PPC_ISEL:
+      return features->isel;
+    case PPC_TAR:
+      return features->tar;
+    case PPC_VEC_CRYPTO:
+      return features->vcrypto;
+    case PPC_HTM_NOSC:
+      return features->htm_nosc;
+    case PPC_ARCH_3_00:
+      return features->arch300;
+    case PPC_HAS_IEEE128:
+      return features->ieee128;
+    case PPC_DARN:
+      return features->darn;
+    case PPC_SCV:
+      return features->scv;
+    case PPC_HTM_NO_SUSPEND:
+      return features->htm_no_suspend;
+    case PPC_LAST_:
+      break;
+  }
+  return false;
+}
+
+/* Have used the same names as glibc  */
+const char* GetPPCFeaturesEnumName(PPCFeaturesEnum value) {
+  switch (value) {
+    case PPC_32:
+      return "ppc32";
+    case PPC_64:
+      return "ppc64";
+    case PPC_601_INSTR:
+      return "ppc601";
+    case PPC_HAS_ALTIVEC:
+      return "altivec";
+    case PPC_HAS_FPU:
+      return "fpu";
+    case PPC_HAS_MMU:
+      return "mmu";
+    case PPC_HAS_4xxMAC:
+      return "4xxmac";
+    case PPC_UNIFIED_CACHE:
+      return "ucache";
+    case PPC_HAS_SPE:
+      return "spe";
+    case PPC_HAS_EFP_SINGLE:
+      return "efpsingle";
+    case PPC_HAS_EFP_DOUBLE:
+      return "efpdouble";
+    case PPC_NO_TB:
+      return "notb";
+    case PPC_POWER4:
+      return "power4";
+    case PPC_POWER5:
+      return "power5";
+    case PPC_POWER5_PLUS:
+      return "power5+";
+    case PPC_CELL:
+      return "cellbe";
+    case PPC_BOOKE:
+      return "booke";
+    case PPC_SMT:
+      return "smt";
+    case PPC_ICACHE_SNOOP:
+      return "ic_snoop";
+    case PPC_ARCH_2_05:
+      return "arch_2_05";
+    case PPC_PA6T:
+      return "pa6t";
+    case PPC_HAS_DFP:
+      return "dfp";
+    case PPC_POWER6_EXT:
+      return "power6x";
+    case PPC_ARCH_2_06:
+      return "arch_2_06";
+    case PPC_HAS_VSX:
+      return "vsx";
+    case PPC_PSERIES_PERFMON_COMPAT:
+      return "archpmu";
+    case PPC_TRUE_LE:
+      return "true_le";
+    case PPC_PPC_LE:
+      return "ppcle";
+    case PPC_ARCH_2_07:
+      return "arch_2_07";
+    case PPC_HTM:
+      return "htm";
+    case PPC_DSCR:
+      return "dscr";
+    case PPC_EBB:
+      return "ebb";
+    case PPC_ISEL:
+      return "isel";
+    case PPC_TAR:
+      return "tar";
+    case PPC_VEC_CRYPTO:
+      return "vcrypto";
+    case PPC_HTM_NOSC:
+      return "htm-nosc";
+    case PPC_ARCH_3_00:
+      return "arch_3_00";
+    case PPC_HAS_IEEE128:
+      return "ieee128";
+    case PPC_DARN:
+      return "darn";
+    case PPC_SCV:
+      return "scv";
+    case PPC_HTM_NO_SUSPEND:
+      return "htm-no-suspend";
+    case PPC_LAST_:
+      break;
+  }
+  return "unknown_feature";
+}
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c
new file mode 100755
index 00000000..390e8c92
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c
@@ -0,0 +1,447 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cpuinfo_x86.h"
+#include "internal/bit_utils.h"
+#include "internal/cpuid_x86.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+static const Leaf kEmptyLeaf;
+
+static Leaf SafeCpuId(uint32_t max_cpuid_leaf, uint32_t leaf_id) {
+  if (leaf_id <= max_cpuid_leaf) {
+    return CpuId(leaf_id);
+  } else {
+    return kEmptyLeaf;
+  }
+}
+
+#define MASK_XMM 0x2
+#define MASK_YMM 0x4
+#define MASK_MASKREG 0x20
+#define MASK_ZMM0_15 0x40
+#define MASK_ZMM16_31 0x80
+
+static bool HasMask(uint32_t value, uint32_t mask) {
+  return (value & mask) == mask;
+}
+
+// Checks that operating system saves and restores xmm registers during context
+// switches.
+static bool HasXmmOsXSave(uint32_t xcr0_eax) {
+  return HasMask(xcr0_eax, MASK_XMM);
+}
+
+// Checks that operating system saves and restores ymm registers during context
+// switches.
+static bool HasYmmOsXSave(uint32_t xcr0_eax) {
+  return HasMask(xcr0_eax, MASK_XMM | MASK_YMM);
+}
+
+// Checks that operating system saves and restores zmm registers during context
+// switches.
+static bool HasZmmOsXSave(uint32_t xcr0_eax) {
+  return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 |
+                               MASK_ZMM16_31);
+}
+
+static void SetVendor(const Leaf leaf, char* const vendor) {
+  *(uint32_t*)(vendor) = leaf.ebx;
+  *(uint32_t*)(vendor + 4) = leaf.edx;
+  *(uint32_t*)(vendor + 8) = leaf.ecx;
+  vendor[12] = '\0';
+}
+
+static int IsVendor(const Leaf leaf, const char* const name) {
+  const uint32_t ebx = *(const uint32_t*)(name);
+  const uint32_t edx = *(const uint32_t*)(name + 4);
+  const uint32_t ecx = *(const uint32_t*)(name + 8);
+  return leaf.ebx == ebx && leaf.ecx == ecx && leaf.edx == edx;
+}
+
+// Reference https://en.wikipedia.org/wiki/CPUID.
+static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info) {
+  const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
+  const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7);
+
+  const bool have_xsave = IsBitSet(leaf_1.ecx, 26);
+  const bool have_osxsave = IsBitSet(leaf_1.ecx, 27);
+  const uint32_t xcr0_eax = (have_xsave && have_osxsave) ? GetXCR0Eax() : 0;
+  const bool have_sse_os_support = HasXmmOsXSave(xcr0_eax);
+  const bool have_avx_os_support = HasYmmOsXSave(xcr0_eax);
+  const bool have_avx512_os_support = HasZmmOsXSave(xcr0_eax);
+
+  const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8);
+  const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20);
+  const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4);
+  const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16);
+
+  X86Features* const features = &info->features;
+
+  info->family = extended_family + family;
+  info->model = (extended_model << 4) + model;
+  info->stepping = ExtractBitRange(leaf_1.eax, 3, 0);
+
+  features->smx = IsBitSet(leaf_1.ecx, 6);
+  features->cx16 = IsBitSet(leaf_1.ecx, 13);
+  features->aes = IsBitSet(leaf_1.ecx, 25);
+  features->f16c = IsBitSet(leaf_1.ecx, 29);
+  features->sgx = IsBitSet(leaf_7.ebx, 2);
+  features->bmi1 = IsBitSet(leaf_7.ebx, 3);
+  features->bmi2 = IsBitSet(leaf_7.ebx, 8);
+  features->erms = IsBitSet(leaf_7.ebx, 9);
+  features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10);
+
+  if (have_sse_os_support) {
+    features->ssse3 = IsBitSet(leaf_1.ecx, 9);
+    features->sse4_1 = IsBitSet(leaf_1.ecx, 19);
+    features->sse4_2 = IsBitSet(leaf_1.ecx, 20);
+  }
+
+  if (have_avx_os_support) {
+    features->fma3 = IsBitSet(leaf_1.ecx, 12);
+    features->avx = IsBitSet(leaf_1.ecx, 28);
+    features->avx2 = IsBitSet(leaf_7.ebx, 5);
+  }
+
+  if (have_avx512_os_support) {
+    features->avx512f = IsBitSet(leaf_7.ebx, 16);
+    features->avx512cd = IsBitSet(leaf_7.ebx, 28);
+    features->avx512er = IsBitSet(leaf_7.ebx, 27);
+    features->avx512pf = IsBitSet(leaf_7.ebx, 26);
+    features->avx512bw = IsBitSet(leaf_7.ebx, 30);
+    features->avx512dq = IsBitSet(leaf_7.ebx, 17);
+    features->avx512vl = IsBitSet(leaf_7.ebx, 31);
+    features->avx512ifma = IsBitSet(leaf_7.ebx, 21);
+    features->avx512vbmi = IsBitSet(leaf_7.ecx, 1);
+    features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6);
+    features->avx512vnni = IsBitSet(leaf_7.ecx, 11);
+    features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
+    features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
+    features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
+    features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3);
+  }
+}
+
+static const X86Info kEmptyX86Info;
+
+X86Info GetX86Info(void) {
+  X86Info info = kEmptyX86Info;
+  const Leaf leaf_0 = CpuId(0);
+  const uint32_t max_cpuid_leaf = leaf_0.eax;
+  SetVendor(leaf_0, info.vendor);
+  if (IsVendor(leaf_0, "GenuineIntel") || IsVendor(leaf_0, "AuthenticAMD")) {
+    ParseCpuId(max_cpuid_leaf, &info);
+  }
+  return info;
+}
+
+#define CPUID(FAMILY, MODEL) (((FAMILY & 0xFF) << 8) | (MODEL & 0xFF))
+
+X86Microarchitecture GetX86Microarchitecture(const X86Info* info) {
+  if (memcmp(info->vendor, "GenuineIntel", sizeof(info->vendor)) == 0) {
+    switch (CPUID(info->family, info->model)) {
+      case CPUID(0x06, 0x35):
+      case CPUID(0x06, 0x36):
+        // https://en.wikipedia.org/wiki/Bonnell_(microarchitecture)
+        return INTEL_ATOM_BNL;
+      case CPUID(0x06, 0x37):
+      case CPUID(0x06, 0x4C):
+        // https://en.wikipedia.org/wiki/Silvermont
+        return INTEL_ATOM_SMT;
+      case CPUID(0x06, 0x5C):
+        // https://en.wikipedia.org/wiki/Goldmont
+        return INTEL_ATOM_GMT;
+      case CPUID(0x06, 0x0F):
+      case CPUID(0x06, 0x16):
+        // https://en.wikipedia.org/wiki/Intel_Core_(microarchitecture)
+        return INTEL_CORE;
+      case CPUID(0x06, 0x17):
+      case CPUID(0x06, 0x1D):
+        // https://en.wikipedia.org/wiki/Penryn_(microarchitecture)
+        return INTEL_PNR;
+      case CPUID(0x06, 0x1A):
+      case CPUID(0x06, 0x1E):
+      case CPUID(0x06, 0x1F):
+      case CPUID(0x06, 0x2E):
+        // https://en.wikipedia.org/wiki/Nehalem_(microarchitecture)
+        return INTEL_NHM;
+      case CPUID(0x06, 0x25):
+      case CPUID(0x06, 0x2C):
+      case CPUID(0x06, 0x2F):
+        // https://en.wikipedia.org/wiki/Westmere_(microarchitecture)
+        return INTEL_WSM;
+      case CPUID(0x06, 0x2A):
+      case CPUID(0x06, 0x2D):
+        // https://en.wikipedia.org/wiki/Sandy_Bridge#Models_and_steppings
+        return INTEL_SNB;
+      case CPUID(0x06, 0x3A):
+      case CPUID(0x06, 0x3E):
+        // https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)#Models_and_steppings
+        return INTEL_IVB;
+      case CPUID(0x06, 0x3C):
+      case CPUID(0x06, 0x3F):
+      case CPUID(0x06, 0x45):
+      case CPUID(0x06, 0x46):
+        // https://en.wikipedia.org/wiki/Haswell_(microarchitecture)
+        return INTEL_HSW;
+      case CPUID(0x06, 0x3D):
+      case CPUID(0x06, 0x47):
+      case CPUID(0x06, 0x4F):
+      case CPUID(0x06, 0x56):
+        // https://en.wikipedia.org/wiki/Broadwell_(microarchitecture)
+        return INTEL_BDW;
+      case CPUID(0x06, 0x4E):
+      case CPUID(0x06, 0x55):
+      case CPUID(0x06, 0x5E):
+        // https://en.wikipedia.org/wiki/Skylake_(microarchitecture)
+        return INTEL_SKL;
+      case CPUID(0x06, 0x8E):
+      case CPUID(0x06, 0x9E):
+        // https://en.wikipedia.org/wiki/Kaby_Lake
+        return INTEL_KBL;
+      default:
+        return X86_UNKNOWN;
+    }
+  }
+  if (memcmp(info->vendor, "AuthenticAMD", sizeof(info->vendor)) == 0) {
+    switch (info->family) {
+        // https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
+      case 0x0F:
+        return AMD_HAMMER;
+      case 0x10:
+        return AMD_K10;
+      case 0x14:
+        return AMD_BOBCAT;
+      case 0x15:
+        return AMD_BULLDOZER;
+      case 0x16:
+        return AMD_JAGUAR;
+      case 0x17:
+        return AMD_ZEN;
+      default:
+        return X86_UNKNOWN;
+    }
+  }
+  return X86_UNKNOWN;
+}
+
+static void SetString(const uint32_t max_cpuid_ext_leaf, const uint32_t leaf_id,
+                      char* buffer) {
+  const Leaf leaf = SafeCpuId(max_cpuid_ext_leaf, leaf_id);
+  // We allow calling memcpy from SetString which is only called when requesting
+  // X86BrandString.
+  memcpy(buffer, &leaf, sizeof(Leaf));
+}
+
+void FillX86BrandString(char brand_string[49]) {
+  const Leaf leaf_ext_0 = CpuId(0x80000000);
+  const uint32_t max_cpuid_leaf_ext = leaf_ext_0.eax;
+  SetString(max_cpuid_leaf_ext, 0x80000002, brand_string);
+  SetString(max_cpuid_leaf_ext, 0x80000003, brand_string + 16);
+  SetString(max_cpuid_leaf_ext, 0x80000004, brand_string + 32);
+  brand_string[48] = '\0';
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Introspection functions
+
+int GetX86FeaturesEnumValue(const X86Features* features,
+                            X86FeaturesEnum value) {
+  switch (value) {
+    case X86_AES:
+      return features->aes;
+    case X86_ERMS:
+      return features->erms;
+    case X86_F16C:
+      return features->f16c;
+    case X86_FMA3:
+      return features->fma3;
+    case X86_VPCLMULQDQ:
+      return features->vpclmulqdq;
+    case X86_BMI1:
+      return features->bmi1;
+    case X86_BMI2:
+      return features->bmi2;
+    case X86_SSSE3:
+      return features->ssse3;
+    case X86_SSE4_1:
+      return features->sse4_1;
+    case X86_SSE4_2:
+      return features->sse4_2;
+    case X86_AVX:
+      return features->avx;
+    case X86_AVX2:
+      return features->avx2;
+    case X86_AVX512F:
+      return features->avx512f;
+    case X86_AVX512CD:
+      return features->avx512cd;
+    case X86_AVX512ER:
+      return features->avx512er;
+    case X86_AVX512PF:
+      return features->avx512pf;
+    case X86_AVX512BW:
+      return features->avx512bw;
+    case X86_AVX512DQ:
+      return features->avx512dq;
+    case X86_AVX512VL:
+      return features->avx512vl;
+    case X86_AVX512IFMA:
+      return features->avx512ifma;
+    case X86_AVX512VBMI:
+      return features->avx512vbmi;
+    case X86_AVX512VBMI2:
+      return features->avx512vbmi2;
+    case X86_AVX512VNNI:
+      return features->avx512vnni;
+    case X86_AVX512BITALG:
+      return features->avx512bitalg;
+    case X86_AVX512VPOPCNTDQ:
+      return features->avx512vpopcntdq;
+    case X86_AVX512_4VNNIW:
+      return features->avx512_4vnniw;
+    case X86_AVX512_4VBMI2:
+      return features->avx512_4vbmi2;
+    case X86_SMX:
+      return features->smx;
+    case X86_SGX:
+      return features->sgx;
+    case X86_CX16:
+      return features->cx16;
+    case X86_LAST_:
+      break;
+  }
+  return false;
+}
+
+const char* GetX86FeaturesEnumName(X86FeaturesEnum value) {
+  switch (value) {
+    case X86_AES:
+      return "aes";
+    case X86_ERMS:
+      return "erms";
+    case X86_F16C:
+      return "f16c";
+    case X86_FMA3:
+      return "fma3";
+    case X86_VPCLMULQDQ:
+      return "vpclmulqdq";
+    case X86_BMI1:
+      return "bmi1";
+    case X86_BMI2:
+      return "bmi2";
+    case X86_SSSE3:
+      return "ssse3";
+    case X86_SSE4_1:
+      return "sse4_1";
+    case X86_SSE4_2:
+      return "sse4_2";
+    case X86_AVX:
+      return "avx";
+    case X86_AVX2:
+      return "avx2";
+    case X86_AVX512F:
+      return "avx512f";
+    case X86_AVX512CD:
+      return "avx512cd";
+    case X86_AVX512ER:
+      return "avx512er";
+    case X86_AVX512PF:
+      return "avx512pf";
+    case X86_AVX512BW:
+      return "avx512bw";
+    case X86_AVX512DQ:
+      return "avx512dq";
+    case X86_AVX512VL:
+      return "avx512vl";
+    case X86_AVX512IFMA:
+      return "avx512ifma";
+    case X86_AVX512VBMI:
+      return "avx512vbmi";
+    case X86_AVX512VBMI2:
+      return "avx512vbmi2";
+    case X86_AVX512VNNI:
+      return "avx512vnni";
+    case X86_AVX512BITALG:
+      return "avx512bitalg";
+    case X86_AVX512VPOPCNTDQ:
+      return "avx512vpopcntdq";
+    case X86_AVX512_4VNNIW:
+      return "avx512_4vnniw";
+    case X86_AVX512_4VBMI2:
+      return "avx512_4vbmi2";
+    case X86_SMX:
+      return "smx";
+    case X86_SGX:
+      return "sgx";
+    case X86_CX16:
+      return "cx16";
+    case X86_LAST_:
+      break;
+  }
+  return "unknown_feature";
+}
+
+const char* GetX86MicroarchitectureName(X86Microarchitecture uarch) {
+  switch (uarch) {
+    case X86_UNKNOWN:
+      return "X86_UNKNOWN";
+    case INTEL_CORE:
+      return "INTEL_CORE";
+    case INTEL_PNR:
+      return "INTEL_PNR";
+    case INTEL_NHM:
+      return "INTEL_NHM";
+    case INTEL_ATOM_BNL:
+      return "INTEL_ATOM_BNL";
+    case INTEL_WSM:
+      return "INTEL_WSM";
+    case INTEL_SNB:
+      return "INTEL_SNB";
+    case INTEL_IVB:
+      return "INTEL_IVB";
+    case INTEL_ATOM_SMT:
+      return "INTEL_ATOM_SMT";
+    case INTEL_HSW:
+      return "INTEL_HSW";
+    case INTEL_BDW:
+      return "INTEL_BDW";
+    case INTEL_SKL:
+      return "INTEL_SKL";
+    case INTEL_ATOM_GMT:
+      return "INTEL_ATOM_GMT";
+    case INTEL_KBL:
+      return "INTEL_KBL";
+    case INTEL_CFL:
+      return "INTEL_CFL";
+    case INTEL_CNL:
+      return "INTEL_CNL";
+    case AMD_HAMMER:
+      return "AMD_HAMMER";
+    case AMD_K10:
+      return "AMD_K10";
+    case AMD_BOBCAT:
+      return "AMD_BOBCAT";
+    case AMD_BULLDOZER:
+      return "AMD_BULLDOZER";
+    case AMD_JAGUAR:
+      return "AMD_JAGUAR";
+    case AMD_ZEN:
+      return "AMD_ZEN";
+  }
+  return "unknown microarchitecture";
+}
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c
new file mode 100755
index 00000000..286a9ccb
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c
@@ -0,0 +1,57 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "internal/filesystem.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#if defined(_MSC_VER)
+#include <io.h>
+int CpuFeatures_OpenFile(const char* filename) {
+  return _open(filename, _O_RDONLY);
+}
+
+void CpuFeatures_CloseFile(int file_descriptor) { _close(file_descriptor); }
+
+int CpuFeatures_ReadFile(int file_descriptor, void* buffer,
+                         size_t buffer_size) {
+  return _read(file_descriptor, buffer, buffer_size);
+}
+
+#else
+#include <unistd.h>
+
+int CpuFeatures_OpenFile(const char* filename) {
+  int result;
+  do {
+    result = open(filename, O_RDONLY);
+  } while (result == -1L && errno == EINTR);
+  return result;
+}
+
+void CpuFeatures_CloseFile(int file_descriptor) { close(file_descriptor); }
+
+int CpuFeatures_ReadFile(int file_descriptor, void* buffer,
+                         size_t buffer_size) {
+  int result;
+  do {
+    result = read(file_descriptor, buffer, buffer_size);
+  } while (result == -1L && errno == EINTR);
+  return result;
+}
+
+#endif
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c
new file mode 100755
index 00000000..99ea74b5
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c
@@ -0,0 +1,194 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cpu_features_macros.h"
+#include "internal/filesystem.h"
+#include "internal/hwcaps.h"
+#include "internal/string_view.h"
+
+#if defined(NDEBUG)
+#define D(...)
+#else
+#include <stdio.h>
+#define D(...)           \
+  do {                   \
+    printf(__VA_ARGS__); \
+    fflush(stdout);      \
+  } while (0)
+#endif
+
+#if defined(CPU_FEATURES_ARCH_MIPS) || defined(CPU_FEATURES_ARCH_ANY_ARM)
+#define HWCAPS_ANDROID_MIPS_OR_ARM
+#endif
+
+#if defined(CPU_FEATURES_OS_LINUX_OR_ANDROID) && \
+    !defined(HWCAPS_ANDROID_MIPS_OR_ARM)
+#define HWCAPS_REGULAR_LINUX
+#endif
+
+#if defined(HWCAPS_ANDROID_MIPS_OR_ARM) || defined(HWCAPS_REGULAR_LINUX)
+#define HWCAPS_SUPPORTED
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementation of GetElfHwcapFromGetauxval
+////////////////////////////////////////////////////////////////////////////////
+
+// On Linux we simply use getauxval.
+#if defined(HWCAPS_REGULAR_LINUX)
+#include <dlfcn.h>
+#include <sys/auxv.h>
+static unsigned long GetElfHwcapFromGetauxval(uint32_t hwcap_type) {
+  return getauxval(hwcap_type);
+}
+#endif  // defined(HWCAPS_REGULAR_LINUX)
+
+// On Android we probe the system's C library for a 'getauxval' function and
+// call it if it exits, or return 0 for failure. This function is available
+// since API level 20.
+//
+// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the edge
+// case where some NDK developers use headers for a platform that is newer than
+// the one really targetted by their application. This is typically done to use
+// newer native APIs only when running on more recent Android versions, and
+// requires careful symbol management.
+//
+// Note that getauxval() can't really be re-implemented here, because its
+// implementation does not parse /proc/self/auxv. Instead it depends on values
+// that are passed by the kernel at process-init time to the C runtime
+// initialization layer.
+#if defined(HWCAPS_ANDROID_MIPS_OR_ARM)
+#include <dlfcn.h>
+#define AT_HWCAP 16
+#define AT_HWCAP2 26
+#define AT_PLATFORM 15
+#define AT_BASE_PLATFORM 24
+
+typedef unsigned long getauxval_func_t(unsigned long);
+
+static uint32_t GetElfHwcapFromGetauxval(uint32_t hwcap_type) {
+  uint32_t ret = 0;
+  void* libc_handle = NULL;
+  getauxval_func_t* func = NULL;
+
+  dlerror();  // Cleaning error state before calling dlopen.
+  libc_handle = dlopen("libc.so", RTLD_NOW);
+  if (!libc_handle) {
+    D("Could not dlopen() C library: %s\n", dlerror());
+    return 0;
+  }
+  func = (getauxval_func_t*)dlsym(libc_handle, "getauxval");
+  if (!func) {
+    D("Could not find getauxval() in C library\n");
+  } else {
+    // Note: getauxval() returns 0 on failure. Doesn't touch errno.
+    ret = (uint32_t)(*func)(hwcap_type);
+  }
+  dlclose(libc_handle);
+  return ret;
+}
+#endif  // defined(HWCAPS_ANDROID_MIPS_OR_ARM)
+
+#if defined(HWCAPS_SUPPORTED)
+////////////////////////////////////////////////////////////////////////////////
+// Implementation of GetHardwareCapabilities for Android and Linux
+////////////////////////////////////////////////////////////////////////////////
+
+// Fallback when getauxval is not available, retrieves hwcaps from
+// "/proc/self/auxv".
+static uint32_t GetElfHwcapFromProcSelfAuxv(uint32_t hwcap_type) {
+  struct {
+    uint32_t tag;
+    uint32_t value;
+  } entry;
+  uint32_t result = 0;
+  const char filepath[] = "/proc/self/auxv";
+  const int fd = CpuFeatures_OpenFile(filepath);
+  if (fd < 0) {
+    D("Could not open %s\n", filepath);
+    return 0;
+  }
+  for (;;) {
+    const int ret = CpuFeatures_ReadFile(fd, (char*)&entry, sizeof entry);
+    if (ret < 0) {
+      D("Error while reading %s\n", filepath);
+      break;
+    }
+    // Detect end of list.
+    if (ret == 0 || (entry.tag == 0 && entry.value == 0)) {
+      break;
+    }
+    if (entry.tag == hwcap_type) {
+      result = entry.value;
+      break;
+    }
+  }
+  CpuFeatures_CloseFile(fd);
+  return result;
+}
+
+// Retrieves hardware capabilities by first trying to call getauxval, if not
+// available falls back to reading "/proc/self/auxv".
+static unsigned long GetHardwareCapabilitiesFor(uint32_t type) {
+  unsigned long hwcaps = GetElfHwcapFromGetauxval(type);
+  if (!hwcaps) {
+    D("Parsing /proc/self/auxv to extract ELF hwcaps!\n");
+    hwcaps = GetElfHwcapFromProcSelfAuxv(type);
+  }
+  return hwcaps;
+}
+
+HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) {
+  HardwareCapabilities capabilities;
+  capabilities.hwcaps = GetHardwareCapabilitiesFor(AT_HWCAP);
+  capabilities.hwcaps2 = GetHardwareCapabilitiesFor(AT_HWCAP2);
+  return capabilities;
+}
+
+PlatformType kEmptyPlatformType;
+
+PlatformType CpuFeatures_GetPlatformType(void) {
+  PlatformType type = kEmptyPlatformType;
+  char *platform = (char *)GetHardwareCapabilitiesFor(AT_PLATFORM);
+  char *base_platform = (char *)GetHardwareCapabilitiesFor(AT_BASE_PLATFORM);
+
+  if (platform != NULL)
+    CpuFeatures_StringView_CopyString(str(platform), type.platform,
+                                      sizeof(type.platform));
+  if (base_platform != NULL)
+    CpuFeatures_StringView_CopyString(str(base_platform), type.base_platform,
+                                      sizeof(type.base_platform));
+  return type;
+}
+#else  // (defined(HWCAPS_SUPPORTED)
+
+PlatformType kEmptyPlatformType;
+
+PlatformType CpuFeatures_GetPlatformType(void) {
+	PlatformType type = kEmptyPlatformType;
+	return type;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementation of GetHardwareCapabilities for unsupported platforms.
+////////////////////////////////////////////////////////////////////////////////
+
+const HardwareCapabilities kEmptyHardwareCapabilities;
+HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) {
+  return kEmptyHardwareCapabilities;
+}
+#endif
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c
new file mode 100755
index 00000000..b7f8f3d9
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c
@@ -0,0 +1,51 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "internal/linux_features_aggregator.h"
+#include "internal/string_view.h"
+
+void CpuFeatures_SetFromFlags(const size_t configs_size,
+                              const CapabilityConfig* configs,
+                              const StringView flags_line,
+                              void* const features) {
+  size_t i = 0;
+  for (; i < configs_size; ++i) {
+    const CapabilityConfig config = configs[i];
+    config.set_bit(features, CpuFeatures_StringView_HasWord(
+                                 flags_line, config.proc_cpuinfo_flag));
+  }
+}
+
+static bool IsSet(const uint32_t mask, const uint32_t value) {
+  return (value & mask) == mask;
+}
+
+static bool IsHwCapsSet(const HardwareCapabilities hwcaps_mask,
+                        const HardwareCapabilities hwcaps) {
+  return IsSet(hwcaps_mask.hwcaps, hwcaps.hwcaps) &&
+         IsSet(hwcaps_mask.hwcaps2, hwcaps.hwcaps2);
+}
+
+void CpuFeatures_OverrideFromHwCaps(const size_t configs_size,
+                                    const CapabilityConfig* configs,
+                                    const HardwareCapabilities hwcaps,
+                                    void* const features) {
+  size_t i = 0;
+  for (; i < configs_size; ++i) {
+    const CapabilityConfig* config = &configs[i];
+    if (IsHwCapsSet(config->hwcaps_mask, hwcaps)) {
+      config->set_bit(features, true);
+    }
+  }
+}
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c
new file mode 100755
index 00000000..b2c48ba6
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c
@@ -0,0 +1,131 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "internal/stack_line_reader.h"
+#include "internal/filesystem.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+
+void StackLineReader_Initialize(StackLineReader* reader, int fd) {
+  reader->view.ptr = reader->buffer;
+  reader->view.size = 0;
+  reader->skip_mode = false;
+  reader->fd = fd;
+}
+
+// Replaces the content of buffer with bytes from the file.
+static int LoadFullBuffer(StackLineReader* reader) {
+  const int read = CpuFeatures_ReadFile(reader->fd, reader->buffer,
+                                        STACK_LINE_READER_BUFFER_SIZE);
+  assert(read >= 0);
+  reader->view.ptr = reader->buffer;
+  reader->view.size = read;
+  return read;
+}
+
+// Appends with bytes from the file to buffer, filling the remaining space.
+static int LoadMore(StackLineReader* reader) {
+  char* const ptr = reader->buffer + reader->view.size;
+  const size_t size_to_read = STACK_LINE_READER_BUFFER_SIZE - reader->view.size;
+  const int read = CpuFeatures_ReadFile(reader->fd, ptr, size_to_read);
+  assert(read >= 0);
+  assert(read <= (int)size_to_read);
+  reader->view.size += read;
+  return read;
+}
+
+static int IndexOfEol(StackLineReader* reader) {
+  return CpuFeatures_StringView_IndexOfChar(reader->view, '\n');
+}
+
+// Relocate buffer's pending bytes at the beginning of the array and fills the
+// remaining space with bytes from the file.
+static int BringToFrontAndLoadMore(StackLineReader* reader) {
+  if (reader->view.size && reader->view.ptr != reader->buffer) {
+    memmove(reader->buffer, reader->view.ptr, reader->view.size);
+  }
+  reader->view.ptr = reader->buffer;
+  return LoadMore(reader);
+}
+
+// Loads chunks of buffer size from disks until it contains a newline character
+// or end of file.
+static void SkipToNextLine(StackLineReader* reader) {
+  for (;;) {
+    const int read = LoadFullBuffer(reader);
+    if (read == 0) {
+      break;
+    } else {
+      const int eol_index = IndexOfEol(reader);
+      if (eol_index >= 0) {
+        reader->view =
+            CpuFeatures_StringView_PopFront(reader->view, eol_index + 1);
+        break;
+      }
+    }
+  }
+}
+
+static LineResult CreateLineResult(bool eof, bool full_line, StringView view) {
+  LineResult result;
+  result.eof = eof;
+  result.full_line = full_line;
+  result.line = view;
+  return result;
+}
+
+// Helper methods to provide clearer semantic in StackLineReader_NextLine.
+static LineResult CreateEOFLineResult(StringView view) {
+  return CreateLineResult(true, true, view);
+}
+
+static LineResult CreateTruncatedLineResult(StringView view) {
+  return CreateLineResult(false, false, view);
+}
+
+static LineResult CreateValidLineResult(StringView view) {
+  return CreateLineResult(false, true, view);
+}
+
+LineResult StackLineReader_NextLine(StackLineReader* reader) {
+  if (reader->skip_mode) {
+    SkipToNextLine(reader);
+    reader->skip_mode = false;
+  }
+  {
+    const bool can_load_more =
+        reader->view.size < STACK_LINE_READER_BUFFER_SIZE;
+    int eol_index = IndexOfEol(reader);
+    if (eol_index < 0 && can_load_more) {
+      const int read = BringToFrontAndLoadMore(reader);
+      if (read == 0) {
+        return CreateEOFLineResult(reader->view);
+      }
+      eol_index = IndexOfEol(reader);
+    }
+    if (eol_index < 0) {
+      reader->skip_mode = true;
+      return CreateTruncatedLineResult(reader->view);
+    }
+    {
+      StringView line =
+          CpuFeatures_StringView_KeepFront(reader->view, eol_index);
+      reader->view =
+          CpuFeatures_StringView_PopFront(reader->view, eol_index + 1);
+      return CreateValidLineResult(line);
+    }
+  }
+}
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c
new file mode 100755
index 00000000..4f27cbdb
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c
@@ -0,0 +1,182 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "internal/string_view.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <string.h>
+
+int CpuFeatures_StringView_IndexOfChar(const StringView view, char c) {
+  if (view.ptr && view.size) {
+    const char* const found = (const char*)memchr(view.ptr, c, view.size);
+    if (found) {
+      return found - view.ptr;
+    }
+  }
+  return -1;
+}
+
+int CpuFeatures_StringView_IndexOf(const StringView view,
+                                   const StringView sub_view) {
+  if (sub_view.size) {
+    StringView remainder = view;
+    while (remainder.size >= sub_view.size) {
+      const int found_index =
+          CpuFeatures_StringView_IndexOfChar(remainder, sub_view.ptr[0]);
+      if (found_index < 0) break;
+      remainder = CpuFeatures_StringView_PopFront(remainder, found_index);
+      if (CpuFeatures_StringView_StartsWith(remainder, sub_view)) {
+        return remainder.ptr - view.ptr;
+      }
+      remainder = CpuFeatures_StringView_PopFront(remainder, 1);
+    }
+  }
+  return -1;
+}
+
+bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b) {
+  if (a.size == b.size) {
+    return a.ptr == b.ptr || memcmp(a.ptr, b.ptr, b.size) == 0;
+  }
+  return false;
+}
+
+bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b) {
+  return a.ptr && b.ptr && b.size && a.size >= b.size
+             ? memcmp(a.ptr, b.ptr, b.size) == 0
+             : false;
+}
+
+StringView CpuFeatures_StringView_PopFront(const StringView str_view,
+                                           size_t count) {
+  if (count > str_view.size) {
+    return kEmptyStringView;
+  }
+  return view(str_view.ptr + count, str_view.size - count);
+}
+
+StringView CpuFeatures_StringView_PopBack(const StringView str_view,
+                                          size_t count) {
+  if (count > str_view.size) {
+    return kEmptyStringView;
+  }
+  return view(str_view.ptr, str_view.size - count);
+}
+
+StringView CpuFeatures_StringView_KeepFront(const StringView str_view,
+                                            size_t count) {
+  return count <= str_view.size ? view(str_view.ptr, count) : str_view;
+}
+
+char CpuFeatures_StringView_Front(const StringView view) {
+  assert(view.size);
+  assert(view.ptr);
+  return view.ptr[0];
+}
+
+char CpuFeatures_StringView_Back(const StringView view) {
+  assert(view.size);
+  return view.ptr[view.size - 1];
+}
+
+StringView CpuFeatures_StringView_TrimWhitespace(StringView view) {
+  while (view.size && isspace(CpuFeatures_StringView_Front(view)))
+    view = CpuFeatures_StringView_PopFront(view, 1);
+  while (view.size && isspace(CpuFeatures_StringView_Back(view)))
+    view = CpuFeatures_StringView_PopBack(view, 1);
+  return view;
+}
+
+static int HexValue(const char c) {
+  if (c >= '0' && c <= '9') return c - '0';
+  if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+  if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+  return -1;
+}
+
+// Returns -1 if view contains non digits.
+static int ParsePositiveNumberWithBase(const StringView view, int base) {
+  int result = 0;
+  StringView remainder = view;
+  for (; remainder.size;
+       remainder = CpuFeatures_StringView_PopFront(remainder, 1)) {
+    const int value = HexValue(CpuFeatures_StringView_Front(remainder));
+    if (value < 0 || value >= base) return -1;
+    result = (result * base) + value;
+  }
+  return result;
+}
+
+int CpuFeatures_StringView_ParsePositiveNumber(const StringView view) {
+  if (view.size) {
+    const StringView hex_prefix = str("0x");
+    if (CpuFeatures_StringView_StartsWith(view, hex_prefix)) {
+      const StringView span_no_prefix =
+          CpuFeatures_StringView_PopFront(view, hex_prefix.size);
+      return ParsePositiveNumberWithBase(span_no_prefix, 16);
+    }
+    return ParsePositiveNumberWithBase(view, 10);
+  }
+  return -1;
+}
+
+void CpuFeatures_StringView_CopyString(const StringView src, char* dst,
+                                       size_t dst_size) {
+  if (dst_size > 0) {
+    const size_t max_copy_size = dst_size - 1;
+    const size_t copy_size =
+        src.size > max_copy_size ? max_copy_size : src.size;
+    memcpy(dst, src.ptr, copy_size);
+    dst[copy_size] = '\0';
+  }
+}
+
+bool CpuFeatures_StringView_HasWord(const StringView line,
+                                    const char* const word_str) {
+  const StringView word = str(word_str);
+  StringView remainder = line;
+  for (;;) {
+    const int index_of_word = CpuFeatures_StringView_IndexOf(remainder, word);
+    if (index_of_word < 0) {
+      return false;
+    } else {
+      const StringView before =
+          CpuFeatures_StringView_KeepFront(line, index_of_word);
+      const StringView after =
+          CpuFeatures_StringView_PopFront(line, index_of_word + word.size);
+      const bool valid_before =
+          before.size == 0 || CpuFeatures_StringView_Back(before) == ' ';
+      const bool valid_after =
+          after.size == 0 || CpuFeatures_StringView_Front(after) == ' ';
+      if (valid_before && valid_after) return true;
+      remainder =
+          CpuFeatures_StringView_PopFront(remainder, index_of_word + word.size);
+    }
+  }
+  return false;
+}
+
+bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line,
+                                                 StringView* key,
+                                                 StringView* value) {
+  const StringView sep = str(": ");
+  const int index_of_separator = CpuFeatures_StringView_IndexOf(line, sep);
+  if (index_of_separator < 0) return false;
+  *value = CpuFeatures_StringView_TrimWhitespace(
+      CpuFeatures_StringView_PopFront(line, index_of_separator + sep.size));
+  *key = CpuFeatures_StringView_TrimWhitespace(
+      CpuFeatures_StringView_KeepFront(line, index_of_separator));
+  return true;
+}
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c
new file mode 100755
index 00000000..a5f7f8ce
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c
@@ -0,0 +1,237 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cpu_features_macros.h"
+#include "cpuinfo_aarch64.h"
+#include "cpuinfo_arm.h"
+#include "cpuinfo_mips.h"
+#include "cpuinfo_ppc.h"
+#include "cpuinfo_x86.h"
+
+static void PrintEscapedAscii(const char* str) {
+  putchar('"');
+  for (; str && *str; ++str) {
+    switch (*str) {
+      case '\"':
+      case '\\':
+      case '/':
+      case '\b':
+      case '\f':
+      case '\n':
+      case '\r':
+      case '\t':
+        putchar('\\');
+    }
+    putchar(*str);
+  }
+  putchar('"');
+}
+
+static void PrintVoid(void) {}
+static void PrintComma(void) { putchar(','); }
+static void PrintLineFeed(void) { putchar('\n'); }
+static void PrintOpenBrace(void) { putchar('{'); }
+static void PrintCloseBrace(void) { putchar('}'); }
+static void PrintOpenBracket(void) { putchar('['); }
+static void PrintCloseBracket(void) { putchar(']'); }
+static void PrintString(const char* field) { printf("%s", field); }
+static void PrintAlignedHeader(const char* field) { printf("%-15s : ", field); }
+static void PrintIntValue(int value) { printf("%d", value); }
+static void PrintDecHexValue(int value) {
+  printf("%3d (0x%02X)", value, value);
+}
+static void PrintJsonHeader(const char* field) {
+  PrintEscapedAscii(field);
+  putchar(':');
+}
+
+typedef struct {
+  void (*Start)(void);
+  void (*ArrayStart)(void);
+  void (*ArraySeparator)(void);
+  void (*ArrayEnd)(void);
+  void (*PrintString)(const char* value);
+  void (*PrintValue)(int value);
+  void (*EndField)(void);
+  void (*StartField)(const char* field);
+  void (*End)(void);
+} Printer;
+
+static Printer getJsonPrinter(void) {
+  return (Printer){
+      .Start = &PrintOpenBrace,
+      .ArrayStart = &PrintOpenBracket,
+      .ArraySeparator = &PrintComma,
+      .ArrayEnd = &PrintCloseBracket,
+      .PrintString = &PrintEscapedAscii,
+      .PrintValue = &PrintIntValue,
+      .EndField = &PrintComma,
+      .StartField = &PrintJsonHeader,
+      .End = &PrintCloseBrace,
+  };
+}
+
+static Printer getTextPrinter(void) {
+  return (Printer){
+      .Start = &PrintVoid,
+      .ArrayStart = &PrintVoid,
+      .ArraySeparator = &PrintComma,
+      .ArrayEnd = &PrintVoid,
+      .PrintString = &PrintString,
+      .PrintValue = &PrintDecHexValue,
+      .EndField = &PrintLineFeed,
+      .StartField = &PrintAlignedHeader,
+      .End = &PrintVoid,
+  };
+}
+
+// Prints a named numeric value in both decimal and hexadecimal.
+static void PrintN(const Printer p, const char* field, int value) {
+  p.StartField(field);
+  p.PrintValue(value);
+  p.EndField();
+}
+
+// Prints a named string.
+static void PrintS(const Printer p, const char* field, const char* value) {
+  p.StartField(field);
+  p.PrintString(value);
+  p.EndField();
+}
+
+static int cmp(const void* p1, const void* p2) {
+  return strcmp(*(const char* const*)p1, *(const char* const*)p2);
+}
+
+#define DEFINE_PRINT_FLAGS(HasFeature, FeatureName, FeatureType, LastEnum) \
+  static void PrintFlags(const Printer p, const FeatureType* features) {   \
+    size_t i;                                                              \
+    const char* ptrs[LastEnum] = {0};                                      \
+    size_t count = 0;                                                      \
+    for (i = 0; i < LastEnum; ++i) {                                       \
+      if (HasFeature(features, i)) {                                       \
+        ptrs[count] = FeatureName(i);                                      \
+        ++count;                                                           \
+      }                                                                    \
+    }                                                                      \
+    qsort(ptrs, count, sizeof(char*), cmp);                                \
+    p.StartField("flags");                                                 \
+    p.ArrayStart();                                                        \
+    for (i = 0; i < count; ++i) {                                          \
+      if (i > 0) p.ArraySeparator();                                       \
+      p.PrintString(ptrs[i]);                                              \
+    }                                                                      \
+    p.ArrayEnd();                                                          \
+  }
+
+#if defined(CPU_FEATURES_ARCH_X86)
+DEFINE_PRINT_FLAGS(GetX86FeaturesEnumValue, GetX86FeaturesEnumName, X86Features,
+                   X86_LAST_)
+#elif defined(CPU_FEATURES_ARCH_ARM)
+DEFINE_PRINT_FLAGS(GetArmFeaturesEnumValue, GetArmFeaturesEnumName, ArmFeatures,
+                   ARM_LAST_)
+#elif defined(CPU_FEATURES_ARCH_AARCH64)
+DEFINE_PRINT_FLAGS(GetAarch64FeaturesEnumValue, GetAarch64FeaturesEnumName,
+                   Aarch64Features, AARCH64_LAST_)
+#elif defined(CPU_FEATURES_ARCH_MIPS)
+DEFINE_PRINT_FLAGS(GetMipsFeaturesEnumValue, GetMipsFeaturesEnumName,
+                   MipsFeatures, MIPS_LAST_)
+#elif defined(CPU_FEATURES_ARCH_PPC)
+DEFINE_PRINT_FLAGS(GetPPCFeaturesEnumValue, GetPPCFeaturesEnumName, PPCFeatures,
+                   PPC_LAST_)
+#endif
+
+static void PrintFeatures(const Printer printer) {
+#if defined(CPU_FEATURES_ARCH_X86)
+  char brand_string[49];
+  const X86Info info = GetX86Info();
+  FillX86BrandString(brand_string);
+  PrintS(printer, "arch", "x86");
+  PrintS(printer, "brand", brand_string);
+  PrintN(printer, "family", info.family);
+  PrintN(printer, "model", info.model);
+  PrintN(printer, "stepping", info.stepping);
+  PrintS(printer, "uarch",
+         GetX86MicroarchitectureName(GetX86Microarchitecture(&info)));
+  PrintFlags(printer, &info.features);
+#elif defined(CPU_FEATURES_ARCH_ARM)
+  const ArmInfo info = GetArmInfo();
+  PrintS(printer, "arch", "ARM");
+  PrintN(printer, "implementer", info.implementer);
+  PrintN(printer, "architecture", info.architecture);
+  PrintN(printer, "variant", info.variant);
+  PrintN(printer, "part", info.part);
+  PrintN(printer, "revision", info.revision);
+  PrintFlags(printer, &info.features);
+#elif defined(CPU_FEATURES_ARCH_AARCH64)
+  const Aarch64Info info = GetAarch64Info();
+  PrintS(printer, "arch", "aarch64");
+  PrintN(printer, "implementer", info.implementer);
+  PrintN(printer, "variant", info.variant);
+  PrintN(printer, "part", info.part);
+  PrintN(printer, "revision", info.revision);
+  PrintFlags(printer, &info.features);
+#elif defined(CPU_FEATURES_ARCH_MIPS)
+  const MipsInfo info = GetMipsInfo();
+  PrintS(printer, "arch", "mips");
+  PrintFlags(printer, &info.features);
+#elif defined(CPU_FEATURES_ARCH_PPC)
+  const PPCInfo info = GetPPCInfo();
+  const PPCPlatformStrings strings = GetPPCPlatformStrings();
+  PrintS(printer, "arch", "ppc");
+  PrintS(printer, "platform", strings.platform);
+  PrintS(printer, "model", strings.model);
+  PrintS(printer, "machine", strings.machine);
+  PrintS(printer, "cpu", strings.cpu);
+  PrintS(printer, "instruction set", strings.type.platform);
+  PrintS(printer, "microarchitecture", strings.type.base_platform);
+  PrintFlags(printer, &info.features);
+#endif
+}
+
+static void showUsage(const char* name) {
+  printf(
+      "\n"
+      "Usage: %s [options]\n"
+      "      Options:\n"
+      "      -h | --help     Show help message.\n"
+      "      -j | --json     Format output as json instead of plain text.\n"
+      "\n",
+      name);
+}
+
+int main(int argc, char** argv) {
+  Printer printer = getTextPrinter();
+  int i = 1;
+  for (; i < argc; ++i) {
+    const char* arg = argv[i];
+    if (strcmp(arg, "-j") == 0 || strcmp(arg, "--json") == 0) {
+      printer = getJsonPrinter();
+    } else {
+      showUsage(argv[0]);
+      if (strcmp(arg, "-h") == 0 || strcmp(arg, "--help") == 0)
+        return EXIT_SUCCESS;
+      return EXIT_FAILURE;
+    }
+  }
+  printer.Start();
+  PrintFeatures(printer);
+  printer.End();
+  PrintLineFeed();
+  return EXIT_SUCCESS;
+}
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt
new file mode 100755
index 00000000..794ef04b
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt
@@ -0,0 +1,79 @@
+#
+# libraries for tests
+#
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF) # prefer use of -std11 instead of -gnustd11
+
+include_directories(../include)
+add_definitions(-DCPU_FEATURES_TEST)
+
+##------------------------------------------------------------------------------
+add_library(string_view ../src/string_view.c)
+##------------------------------------------------------------------------------
+add_library(filesystem_for_testing filesystem_for_testing.cc)
+##------------------------------------------------------------------------------
+add_library(hwcaps_for_testing hwcaps_for_testing.cc)
+target_link_libraries(hwcaps_for_testing filesystem_for_testing)
+##------------------------------------------------------------------------------
+add_library(stack_line_reader ../src/stack_line_reader.c)
+target_compile_definitions(stack_line_reader PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024)
+target_link_libraries(stack_line_reader string_view)
+##------------------------------------------------------------------------------
+add_library(stack_line_reader_for_test ../src/stack_line_reader.c)
+target_compile_definitions(stack_line_reader_for_test PUBLIC STACK_LINE_READER_BUFFER_SIZE=16)
+target_link_libraries(stack_line_reader_for_test string_view filesystem_for_testing)
+##------------------------------------------------------------------------------
+add_library(all_libraries ../src/stack_line_reader.c ../src/linux_features_aggregator.c)
+target_link_libraries(all_libraries hwcaps_for_testing stack_line_reader string_view)
+
+#
+# tests
+#
+link_libraries(gtest gmock_main)
+
+## bit_utils_test
+add_executable(bit_utils_test bit_utils_test.cc)
+target_link_libraries(bit_utils_test)
+add_test(NAME bit_utils_test COMMAND bit_utils_test)
+##------------------------------------------------------------------------------
+## string_view_test
+add_executable(string_view_test string_view_test.cc ../src/string_view.c)
+target_link_libraries(string_view_test string_view)
+add_test(NAME string_view_test COMMAND string_view_test)
+##------------------------------------------------------------------------------
+## stack_line_reader_test
+add_executable(stack_line_reader_test stack_line_reader_test.cc)
+target_link_libraries(stack_line_reader_test stack_line_reader_for_test)
+add_test(NAME stack_line_reader_test COMMAND stack_line_reader_test)
+##------------------------------------------------------------------------------
+## linux_features_aggregator_test
+add_executable(linux_features_aggregator_test linux_features_aggregator_test.cc)
+target_link_libraries(linux_features_aggregator_test all_libraries)
+add_test(NAME linux_features_aggregator_test COMMAND linux_features_aggregator_test)
+##------------------------------------------------------------------------------
+## cpuinfo_x86_test
+add_executable(cpuinfo_x86_test cpuinfo_x86_test.cc ../src/cpuinfo_x86.c)
+target_link_libraries(cpuinfo_x86_test all_libraries)
+add_test(NAME cpuinfo_x86_test COMMAND cpuinfo_x86_test)
+##------------------------------------------------------------------------------
+## cpuinfo_arm_test
+add_executable(cpuinfo_arm_test cpuinfo_arm_test.cc ../src/cpuinfo_arm.c)
+target_link_libraries(cpuinfo_arm_test all_libraries)
+add_test(NAME cpuinfo_arm_test COMMAND cpuinfo_arm_test)
+##------------------------------------------------------------------------------
+## cpuinfo_aarch64_test
+add_executable(cpuinfo_aarch64_test cpuinfo_aarch64_test.cc ../src/cpuinfo_aarch64.c)
+target_link_libraries(cpuinfo_aarch64_test all_libraries)
+add_test(NAME cpuinfo_aarch64_test COMMAND cpuinfo_aarch64_test)
+##------------------------------------------------------------------------------
+## cpuinfo_mips_test
+add_executable(cpuinfo_mips_test cpuinfo_mips_test.cc  ../src/cpuinfo_mips.c)
+target_link_libraries(cpuinfo_mips_test all_libraries)
+add_test(NAME cpuinfo_mips_test COMMAND cpuinfo_mips_test)
+##------------------------------------------------------------------------------
+## cpuinfo_ppc_test
+add_executable(cpuinfo_ppc_test cpuinfo_ppc_test.cc  ../src/cpuinfo_ppc.c)
+target_link_libraries(cpuinfo_ppc_test all_libraries)
+add_test(NAME cpuinfo_ppc_test COMMAND cpuinfo_ppc_test)
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc
new file mode 100755
index 00000000..8937cbc2
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc
@@ -0,0 +1,53 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "internal/bit_utils.h"
+
+#include "gtest/gtest.h"
+
+namespace cpu_features {
+namespace {
+
+TEST(UtilsTest, IsBitSet) {
+  for (size_t bit_set = 0; bit_set < 32; ++bit_set) {
+    const uint32_t value = 1UL << bit_set;
+    for (size_t i = 0; i < 32; ++i) {
+      EXPECT_EQ(IsBitSet(value, i), i == bit_set);
+    }
+  }
+
+  // testing 0, all bits should be 0.
+  for (size_t i = 0; i < 32; ++i) {
+    EXPECT_FALSE(IsBitSet(0, i));
+  }
+
+  // testing ~0, all bits should be 1.
+  for (size_t i = 0; i < 32; ++i) {
+    EXPECT_TRUE(IsBitSet(-1, i));
+  }
+}
+
+TEST(UtilsTest, ExtractBitRange) {
+  // Extracting all bits gives the same number.
+  EXPECT_EQ(ExtractBitRange(123, 31, 0), 123);
+  // Extracting 1 bit gives parity.
+  EXPECT_EQ(ExtractBitRange(123, 0, 0), 1);
+  EXPECT_EQ(ExtractBitRange(122, 0, 0), 0);
+
+  EXPECT_EQ(ExtractBitRange(0xF0, 7, 4), 0xF);
+  EXPECT_EQ(ExtractBitRange(0x42 << 2, 10, 2), 0x42);
+}
+
+}  // namespace
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc
new file mode 100755
index 00000000..bdb4d17c
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc
@@ -0,0 +1,74 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cpuinfo_aarch64.h"
+#include "filesystem_for_testing.h"
+#include "hwcaps_for_testing.h"
+
+#include "gtest/gtest.h"
+
+namespace cpu_features {
+namespace {
+
+void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
+
+TEST(CpuinfoAarch64Test, FromHardwareCap) {
+  SetHardwareCapabilities(AARCH64_HWCAP_FP | AARCH64_HWCAP_AES, 0);
+  GetEmptyFilesystem();  // disabling /proc/cpuinfo
+  const auto info = GetAarch64Info();
+  EXPECT_TRUE(info.features.fp);
+  EXPECT_FALSE(info.features.asimd);
+  EXPECT_TRUE(info.features.aes);
+  EXPECT_FALSE(info.features.pmull);
+  EXPECT_FALSE(info.features.sha1);
+  EXPECT_FALSE(info.features.sha2);
+  EXPECT_FALSE(info.features.crc32);
+}
+
+TEST(CpuinfoAarch64Test, ARMCortexA53) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(Processor   : AArch64 Processor rev 3 (aarch64)
+processor   : 0
+processor   : 1
+processor   : 2
+processor   : 3
+processor   : 4
+processor   : 5
+processor   : 6
+processor   : 7
+Features    : fp asimd evtstrm aes pmull sha1 sha2 crc32
+CPU implementer : 0x41
+CPU architecture: AArch64
+CPU variant : 0x0
+CPU part    : 0xd03
+CPU revision    : 3)");
+  const auto info = GetAarch64Info();
+  EXPECT_EQ(info.implementer, 0x41);
+  EXPECT_EQ(info.variant, 0x0);
+  EXPECT_EQ(info.part, 0xd03);
+  EXPECT_EQ(info.revision, 3);
+
+  EXPECT_TRUE(info.features.fp);
+  EXPECT_TRUE(info.features.asimd);
+  EXPECT_TRUE(info.features.aes);
+  EXPECT_TRUE(info.features.pmull);
+  EXPECT_TRUE(info.features.sha1);
+  EXPECT_TRUE(info.features.sha2);
+  EXPECT_TRUE(info.features.crc32);
+}
+
+}  // namespace
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc
new file mode 100755
index 00000000..a72c5662
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc
@@ -0,0 +1,182 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cpuinfo_arm.h"
+#include "filesystem_for_testing.h"
+#include "hwcaps_for_testing.h"
+
+#include "gtest/gtest.h"
+
+namespace cpu_features {
+namespace {
+
+void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
+
+TEST(CpuinfoArmTest, FromHardwareCap) {
+  SetHardwareCapabilities(ARM_HWCAP_NEON, ARM_HWCAP2_AES | ARM_HWCAP2_CRC32);
+  GetEmptyFilesystem();  // disabling /proc/cpuinfo
+  const auto info = GetArmInfo();
+  EXPECT_TRUE(info.features.vfp);    // triggered by vfpv3
+  EXPECT_TRUE(info.features.vfpv3);  // triggered by neon
+  EXPECT_TRUE(info.features.neon);
+  EXPECT_TRUE(info.features.aes);
+  EXPECT_TRUE(info.features.crc32);
+
+  EXPECT_FALSE(info.features.vfpv4);
+  EXPECT_FALSE(info.features.iwmmxt);
+  EXPECT_FALSE(info.features.vfpv3d16);
+  EXPECT_FALSE(info.features.idiva);
+  EXPECT_FALSE(info.features.idivt);
+  EXPECT_FALSE(info.features.pmull);
+  EXPECT_FALSE(info.features.sha1);
+  EXPECT_FALSE(info.features.sha2);
+}
+
+TEST(CpuinfoArmTest, ODroidFromCpuInfo) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo", R"(processor       : 0
+model name      : ARMv7 Processor rev 3 (v71)
+BogoMIPS        : 120.00
+Features        : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae
+CPU implementer : 0x41
+CPU architecture: 7
+CPU variant     : 0x2
+CPU part        : 0xc0f
+CPU revision    : 3)");
+  const auto info = GetArmInfo();
+  EXPECT_EQ(info.implementer, 0x41);
+  EXPECT_EQ(info.variant, 0x2);
+  EXPECT_EQ(info.part, 0xc0f);
+  EXPECT_EQ(info.revision, 3);
+  EXPECT_EQ(info.architecture, 7);
+
+  EXPECT_TRUE(info.features.vfp);
+  EXPECT_FALSE(info.features.iwmmxt);
+  EXPECT_TRUE(info.features.neon);
+  EXPECT_TRUE(info.features.vfpv3);
+  EXPECT_FALSE(info.features.vfpv3d16);
+  EXPECT_TRUE(info.features.vfpv4);
+  EXPECT_TRUE(info.features.idiva);
+  EXPECT_TRUE(info.features.idivt);
+  EXPECT_FALSE(info.features.aes);
+  EXPECT_FALSE(info.features.pmull);
+  EXPECT_FALSE(info.features.sha1);
+  EXPECT_FALSE(info.features.sha2);
+  EXPECT_FALSE(info.features.crc32);
+}
+
+// http://code.google.com/p/android/issues/detail?id=10812
+TEST(CpuinfoArmTest, InvalidArmv7) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(Processor       : ARMv6-compatible processor rev 6 (v6l) 
+BogoMIPS        : 199.47 
+Features        : swp half thumb fastmult vfp edsp java 
+CPU implementer : 0x41 
+CPU architecture: 7 
+CPU variant     : 0x0 
+CPU part        : 0xb76 
+CPU revision    : 6 
+
+Hardware        : SPICA 
+Revision        : 0020 
+Serial          : 33323613546d00ec )");
+  const auto info = GetArmInfo();
+  EXPECT_EQ(info.architecture, 6);
+}
+
+// https://crbug.com/341598.
+TEST(CpuinfoArmTest, InvalidNeon) {
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(Processor: ARMv7 Processory rev 0 (v71)
+processor: 0
+BogoMIPS: 13.50
+
+Processor: 1
+BogoMIPS: 13.50
+
+Features: swp half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt
+CPU implementer : 0x51
+CPU architecture: 7
+CPU variant: 0x1
+CPU part: 0x04d
+CPU revision: 0
+
+Hardware: SAMSUNG M2
+Revision: 0010
+Serial: 00001e030000354e)");
+  const auto info = GetArmInfo();
+  EXPECT_FALSE(info.features.neon);
+}
+
+// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV
+// support.
+TEST(CpuinfoArmTest, Nexus4_0x510006f2) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(CPU implementer	: 0x51
+CPU architecture: 7
+CPU variant	: 0x0
+CPU part	: 0x6f
+CPU revision	: 2)");
+  const auto info = GetArmInfo();
+  EXPECT_TRUE(info.features.idiva);
+  EXPECT_TRUE(info.features.idivt);
+}
+
+// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV
+// support.
+TEST(CpuinfoArmTest, Nexus4_0x510006f3) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(CPU implementer	: 0x51
+CPU architecture: 7
+CPU variant	: 0x0
+CPU part	: 0x6f
+CPU revision	: 3)");
+  const auto info = GetArmInfo();
+  EXPECT_TRUE(info.features.idiva);
+  EXPECT_TRUE(info.features.idivt);
+}
+
+// The emulator-specific Android 4.2 kernel fails to report support for the
+// 32-bit ARM IDIV instruction. Technically, this is a feature of the virtual
+// CPU implemented by the emulator.
+TEST(CpuinfoArmTest, EmulatorSpecificIdiv) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(Processor	: ARMv7 Processor rev 0 (v7l)
+BogoMIPS	: 629.14
+Features	: swp half thumb fastmult vfp edsp neon vfpv3
+CPU implementer	: 0x41
+CPU architecture: 7
+CPU variant	: 0x0
+CPU part	: 0xc08
+CPU revision	: 0
+
+Hardware	: Goldfish
+Revision	: 0000
+Serial		: 0000000000000000)");
+  const auto info = GetArmInfo();
+  EXPECT_TRUE(info.features.idiva);
+}
+
+}  // namespace
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc
new file mode 100755
index 00000000..7c5a6752
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc
@@ -0,0 +1,125 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cpuinfo_mips.h"
+#include "filesystem_for_testing.h"
+#include "hwcaps_for_testing.h"
+#include "internal/stack_line_reader.h"
+#include "internal/string_view.h"
+
+#include "gtest/gtest.h"
+
+namespace cpu_features {
+
+namespace {
+
+void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
+
+TEST(CpuinfoMipsTest, FromHardwareCapBoth) {
+  SetHardwareCapabilities(MIPS_HWCAP_EVA | MIPS_HWCAP_MSA, 0);
+  GetEmptyFilesystem();  // disabling /proc/cpuinfo
+  const auto info = GetMipsInfo();
+  EXPECT_TRUE(info.features.msa);
+  EXPECT_TRUE(info.features.eva);
+}
+
+TEST(CpuinfoMipsTest, FromHardwareCapOnlyOne) {
+  SetHardwareCapabilities(MIPS_HWCAP_MSA, 0);
+  GetEmptyFilesystem();  // disabling /proc/cpuinfo
+  const auto info = GetMipsInfo();
+  EXPECT_TRUE(info.features.msa);
+  EXPECT_FALSE(info.features.eva);
+}
+
+TEST(CpuinfoMipsTest, Ci40) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo", R"(system type : IMG Pistachio SoC (B0)
+machine : IMG Marduk – Ci40 with cc2520
+processor : 0
+cpu model : MIPS interAptiv (multi) V2.0 FPU V0.0
+BogoMIPS : 363.72
+wait instruction : yes
+microsecond timers : yes
+tlb_entries : 64
+extra interrupt vector : yes
+hardware watchpoint : yes, count: 4, address/irw mask: [0x0ffc, 0x0ffc, 0x0ffb, 0x0ffb]
+isa : mips1 mips2 mips32r1 mips32r2
+ASEs implemented : mips16 dsp mt eva
+shadow register sets : 1
+kscratch registers : 0
+package : 0
+core : 0
+VCED exceptions : not available
+VCEI exceptions : not available
+VPE : 0
+)");
+  const auto info = GetMipsInfo();
+  EXPECT_FALSE(info.features.msa);
+  EXPECT_TRUE(info.features.eva);
+}
+
+TEST(CpuinfoMipsTest, AR7161) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(system type             : Atheros AR7161 rev 2
+machine                 : NETGEAR WNDR3700/WNDR3800/WNDRMAC
+processor               : 0
+cpu model               : MIPS 24Kc V7.4
+BogoMIPS                : 452.19
+wait instruction        : yes
+microsecond timers      : yes
+tlb_entries             : 16
+extra interrupt vector  : yes
+hardware watchpoint     : yes, count: 4, address/irw mask: [0x0000, 0x0f98, 0x0f78, 0x0df8]
+ASEs implemented        : mips16
+shadow register sets    : 1
+kscratch registers      : 0
+core                    : 0
+VCED exceptions         : not available
+VCEI exceptions         : not available
+)");
+  const auto info = GetMipsInfo();
+  EXPECT_FALSE(info.features.msa);
+  EXPECT_FALSE(info.features.eva);
+}
+
+TEST(CpuinfoMipsTest, Goldfish) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo", R"(system type		: MIPS-Goldfish
+Hardware		: goldfish
+Revison		: 1
+processor		: 0
+cpu model		: MIPS 24Kc V0.0  FPU V0.0
+BogoMIPS		: 1042.02
+wait instruction	: yes
+microsecond timers	: yes
+tlb_entries		: 16
+extra interrupt vector	: yes
+hardware watchpoint	: yes, count: 1, address/irw mask: [0x0ff8]
+ASEs implemented	:
+shadow register sets	: 1
+core			: 0
+VCED exceptions		: not available
+VCEI exceptions		: not available
+)");
+  const auto info = GetMipsInfo();
+  EXPECT_FALSE(info.features.msa);
+  EXPECT_FALSE(info.features.eva);
+}
+
+}  // namespace
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc
new file mode 100755
index 00000000..5d5e7980
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc
@@ -0,0 +1,119 @@
+// Copyright 2018 IBM.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cpuinfo_ppc.h"
+#include "filesystem_for_testing.h"
+#include "hwcaps_for_testing.h"
+#include "internal/string_view.h"
+
+#include "gtest/gtest.h"
+
+namespace cpu_features {
+namespace {
+
+void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
+
+TEST(CpustringsPPCTest, FromHardwareCap) {
+  SetHardwareCapabilities(PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_VSX,
+                          PPC_FEATURE2_ARCH_3_00);
+  GetEmptyFilesystem();  // disabling /proc/cpuinfo
+  const auto info = GetPPCInfo();
+  EXPECT_TRUE(info.features.fpu);
+  EXPECT_FALSE(info.features.mmu);
+  EXPECT_TRUE(info.features.vsx);
+  EXPECT_TRUE(info.features.arch300);
+  EXPECT_FALSE(info.features.power4);
+  EXPECT_FALSE(info.features.altivec);
+  EXPECT_FALSE(info.features.vcrypto);
+  EXPECT_FALSE(info.features.htm);
+}
+
+TEST(CpustringsPPCTest, Blade) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(processor       : 14
+cpu             : POWER7 (architected), altivec supported
+clock           : 3000.000000MHz
+revision        : 2.1 (pvr 003f 0201)
+
+processor       : 15
+cpu             : POWER7 (architected), altivec supported
+clock           : 3000.000000MHz
+revision        : 2.1 (pvr 003f 0201)
+
+timebase        : 512000000
+platform        : pSeries
+model           : IBM,8406-70Y
+machine         : CHRP IBM,8406-70Y)");
+  SetPlatformTypes("power7", "power8");
+  const auto strings = GetPPCPlatformStrings();
+  ASSERT_STREQ(strings.platform, "pSeries");
+  ASSERT_STREQ(strings.model, "IBM,8406-70Y");
+  ASSERT_STREQ(strings.machine, "CHRP IBM,8406-70Y");
+  ASSERT_STREQ(strings.cpu, "POWER7 (architected), altivec supported");
+  ASSERT_STREQ(strings.type.platform, "power7");
+  ASSERT_STREQ(strings.type.base_platform, "power8");
+}
+
+TEST(CpustringsPPCTest, Firestone) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(processor       : 126
+cpu             : POWER8 (raw), altivec supported
+clock           : 2061.000000MHz
+revision        : 2.0 (pvr 004d 0200)
+
+processor       : 127
+cpu             : POWER8 (raw), altivec supported
+clock           : 2061.000000MHz
+revision        : 2.0 (pvr 004d 0200)
+
+timebase        : 512000000
+platform        : PowerNV
+model           : 8335-GTA
+machine         : PowerNV 8335-GTA
+firmware        : OPAL v3)");
+  const auto strings = GetPPCPlatformStrings();
+  ASSERT_STREQ(strings.platform, "PowerNV");
+  ASSERT_STREQ(strings.model, "8335-GTA");
+  ASSERT_STREQ(strings.machine, "PowerNV 8335-GTA");
+  ASSERT_STREQ(strings.cpu, "POWER8 (raw), altivec supported");
+}
+
+TEST(CpustringsPPCTest, w8) {
+  DisableHardwareCapabilities();
+  auto& fs = GetEmptyFilesystem();
+  fs.CreateFile("/proc/cpuinfo",
+                R"(processor       : 143
+cpu             : POWER9, altivec supported
+clock           : 2300.000000MHz
+revision        : 2.2 (pvr 004e 1202)
+
+timebase        : 512000000
+platform        : PowerNV
+model           : 0000000000000000
+machine         : PowerNV 0000000000000000
+firmware        : OPAL
+MMU             : Radix)");
+  const auto strings = GetPPCPlatformStrings();
+  ASSERT_STREQ(strings.platform, "PowerNV");
+  ASSERT_STREQ(strings.model, "0000000000000000");
+  ASSERT_STREQ(strings.machine, "PowerNV 0000000000000000");
+  ASSERT_STREQ(strings.cpu, "POWER9, altivec supported");
+}
+
+}  // namespace
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc
new file mode 100755
index 00000000..f7fc0817
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc
@@ -0,0 +1,172 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cassert>
+#include <cstdio>
+#include <map>
+
+#include "gtest/gtest.h"
+
+#include "cpuinfo_x86.h"
+#include "internal/cpuid_x86.h"
+
+namespace cpu_features {
+
+class FakeCpu {
+ public:
+  Leaf CpuId(uint32_t leaf_id) const {
+    const auto itr = cpuid_leaves_.find(leaf_id);
+    EXPECT_TRUE(itr != cpuid_leaves_.end()) << "Missing leaf " << leaf_id;
+    return itr->second;
+  }
+
+  uint32_t GetXCR0Eax() const { return xcr0_eax_; }
+
+  void SetLeaves(std::map<uint32_t, Leaf> configuration) {
+    cpuid_leaves_ = std::move(configuration);
+  }
+
+  void SetOsBackupsExtendedRegisters(bool os_backups_extended_registers) {
+    xcr0_eax_ = os_backups_extended_registers ? -1 : 0;
+  }
+
+ private:
+  std::map<uint32_t, Leaf> cpuid_leaves_;
+  uint32_t xcr0_eax_;
+};
+
+auto* g_fake_cpu = new FakeCpu();
+
+extern "C" Leaf CpuId(uint32_t leaf_id) { return g_fake_cpu->CpuId(leaf_id); }
+extern "C" uint32_t GetXCR0Eax(void) { return g_fake_cpu->GetXCR0Eax(); }
+
+namespace {
+
+TEST(CpuidX86Test, SandyBridge) {
+  g_fake_cpu->SetOsBackupsExtendedRegisters(true);
+  g_fake_cpu->SetLeaves({
+      {0x00000000, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}},
+      {0x00000001, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}},
+      {0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}},
+  });
+  const auto info = GetX86Info();
+  EXPECT_STREQ(info.vendor, "GenuineIntel");
+  EXPECT_EQ(info.family, 0x06);
+  EXPECT_EQ(info.model, 0x02A);
+  EXPECT_EQ(info.stepping, 0x06);
+  // Leaf 7 is zeroed out so none of the Leaf 7 flags are set.
+  const auto features = info.features;
+  EXPECT_FALSE(features.erms);
+  EXPECT_FALSE(features.avx2);
+  EXPECT_FALSE(features.avx512f);
+  EXPECT_FALSE(features.avx512cd);
+  EXPECT_FALSE(features.avx512er);
+  EXPECT_FALSE(features.avx512pf);
+  EXPECT_FALSE(features.avx512bw);
+  EXPECT_FALSE(features.avx512dq);
+  EXPECT_FALSE(features.avx512vl);
+  EXPECT_FALSE(features.avx512ifma);
+  EXPECT_FALSE(features.avx512vbmi);
+  EXPECT_FALSE(features.avx512vbmi2);
+  EXPECT_FALSE(features.avx512vnni);
+  EXPECT_FALSE(features.avx512bitalg);
+  EXPECT_FALSE(features.avx512vpopcntdq);
+  EXPECT_FALSE(features.avx512_4vnniw);
+  EXPECT_FALSE(features.avx512_4vbmi2);
+  // All old cpu features should be set.
+  EXPECT_TRUE(features.aes);
+  EXPECT_TRUE(features.ssse3);
+  EXPECT_TRUE(features.sse4_1);
+  EXPECT_TRUE(features.sse4_2);
+  EXPECT_TRUE(features.avx);
+}
+
+TEST(CpuidX86Test, SandyBridgeTestOsSupport) {
+  g_fake_cpu->SetLeaves({
+      {0x00000000, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}},
+      {0x00000001, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}},
+      {0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}},
+  });
+  // avx is disabled if os does not support backing up ymm registers.
+  g_fake_cpu->SetOsBackupsExtendedRegisters(false);
+  EXPECT_FALSE(GetX86Info().features.avx);
+  // avx is disabled if os does not support backing up ymm registers.
+  g_fake_cpu->SetOsBackupsExtendedRegisters(true);
+  EXPECT_TRUE(GetX86Info().features.avx);
+}
+
+TEST(CpuidX86Test, SkyLake) {
+  g_fake_cpu->SetOsBackupsExtendedRegisters(true);
+  g_fake_cpu->SetLeaves({
+      {0x00000000, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}},
+      {0x00000001, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}},
+      {0x00000007, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}},
+  });
+  const auto info = GetX86Info();
+  EXPECT_STREQ(info.vendor, "GenuineIntel");
+  EXPECT_EQ(info.family, 0x06);
+  EXPECT_EQ(info.model, 0x04E);
+  EXPECT_EQ(info.stepping, 0x03);
+  EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::INTEL_SKL);
+}
+
+TEST(CpuidX86Test, Branding) {
+  g_fake_cpu->SetLeaves({
+      {0x00000000, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}},
+      {0x00000001, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}},
+      {0x00000007, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}},
+      {0x80000000, Leaf{0x80000008, 0x00000000, 0x00000000, 0x00000000}},
+      {0x80000001, Leaf{0x00000000, 0x00000000, 0x00000121, 0x2C100000}},
+      {0x80000002, Leaf{0x65746E49, 0x2952286C, 0x726F4320, 0x4D542865}},
+      {0x80000003, Leaf{0x37692029, 0x3035362D, 0x43205530, 0x40205550}},
+      {0x80000004, Leaf{0x352E3220, 0x7A484730, 0x00000000, 0x00000000}},
+  });
+  char brand_string[49];
+  FillX86BrandString(brand_string);
+  EXPECT_STREQ(brand_string, "Intel(R) Core(TM) i7-6500U CPU @ 2.50GHz");
+}
+
+// http://users.atw.hu/instlatx64/AuthenticAMD0630F81_K15_Godavari_CPUID.txt
+TEST(CpuidX86Test, AMD_K15) {
+  g_fake_cpu->SetLeaves({
+      {0x00000000, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}},
+      {0x00000001, Leaf{0x00630F81, 0x00040800, 0x3E98320B, 0x178BFBFF}},
+      {0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}},
+      {0x80000000, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}},
+      {0x80000001, Leaf{0x00630F81, 0x10000000, 0x0FEBBFFF, 0x2FD3FBFF}},
+      {0x80000002, Leaf{0x20444D41, 0x372D3841, 0x4B303736, 0x64615220}},
+      {0x80000003, Leaf{0x206E6F65, 0x202C3752, 0x43203031, 0x75706D6F}},
+      {0x80000004, Leaf{0x43206574, 0x7365726F, 0x2B433420, 0x00204736}},
+      {0x80000005, Leaf{0xFF40FF18, 0xFF40FF30, 0x10040140, 0x60030140}},
+  });
+  const auto info = GetX86Info();
+
+  EXPECT_STREQ(info.vendor, "AuthenticAMD");
+  EXPECT_EQ(info.family, 0x15);
+  EXPECT_EQ(info.model, 0x38);
+  EXPECT_EQ(info.stepping, 0x01);
+  EXPECT_EQ(GetX86Microarchitecture(&info),
+            X86Microarchitecture::AMD_BULLDOZER);
+
+  char brand_string[49];
+  FillX86BrandString(brand_string);
+  EXPECT_STREQ(brand_string, "AMD A8-7670K Radeon R7, 10 Compute Cores 4C+6G ");
+}
+
+// TODO(user): test what happens when xsave/osxsave are not present.
+// TODO(user): test what happens when xmm/ymm/zmm os support are not
+// present.
+
+}  // namespace
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc
new file mode 100755
index 00000000..4554c1f0
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc
@@ -0,0 +1,103 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "filesystem_for_testing.h"
+
+#include <cassert>
+#include <climits>
+#include <cstdio>
+#include <cstring>
+#include <utility>
+
+namespace cpu_features {
+
+FakeFile::FakeFile(int file_descriptor, const char* content)
+    : file_descriptor_(file_descriptor), content_(content) {}
+
+FakeFile::~FakeFile() { assert(!opened_); }
+
+void FakeFile::Open() {
+  assert(!opened_);
+  opened_ = true;
+}
+
+void FakeFile::Close() {
+  assert(opened_);
+  opened_ = false;
+}
+
+int FakeFile::Read(int fd, void* buf, size_t count) {
+  assert(count < INT_MAX);
+  assert(fd == file_descriptor_);
+  const size_t remainder = content_.size() - head_index_;
+  const size_t read = count > remainder ? remainder : count;
+  memcpy(buf, content_.data() + head_index_, read);
+  head_index_ += read;
+  assert(read < INT_MAX);
+  return read;
+}
+
+void FakeFilesystem::Reset() { files_.clear(); }
+
+FakeFile* FakeFilesystem::CreateFile(const std::string& filename,
+                                     const char* content) {
+  auto& file = files_[filename];
+  file =
+      std::unique_ptr<FakeFile>(new FakeFile(next_file_descriptor_++, content));
+  return file.get();
+}
+
+FakeFile* FakeFilesystem::FindFileOrNull(const std::string& filename) const {
+  const auto itr = files_.find(filename);
+  return itr == files_.end() ? nullptr : itr->second.get();
+}
+
+FakeFile* FakeFilesystem::FindFileOrDie(const int file_descriptor) const {
+  for (const auto& filename_file_pair : files_) {
+    FakeFile* const file_ptr = filename_file_pair.second.get();
+    if (file_ptr->GetFileDescriptor() == file_descriptor) {
+      return file_ptr;
+    }
+  }
+  assert(false);
+  return nullptr;
+}
+
+static FakeFilesystem* kFilesystem = new FakeFilesystem();
+
+FakeFilesystem& GetEmptyFilesystem() {
+  kFilesystem->Reset();
+  return *kFilesystem;
+}
+
+extern "C" int CpuFeatures_OpenFile(const char* filename) {
+  auto* const file = kFilesystem->FindFileOrNull(filename);
+  if (file) {
+    file->Open();
+    return file->GetFileDescriptor();
+  }
+  return -1;
+}
+
+extern "C" void CpuFeatures_CloseFile(int file_descriptor) {
+  kFilesystem->FindFileOrDie(file_descriptor)->Close();
+}
+
+extern "C" int CpuFeatures_ReadFile(int file_descriptor, void* buffer,
+                                    size_t buffer_size) {
+  return kFilesystem->FindFileOrDie(file_descriptor)
+      ->Read(file_descriptor, buffer, buffer_size);
+}
+
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h
new file mode 100755
index 00000000..ca269e52
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h
@@ -0,0 +1,61 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Implements a fake filesystem, useful for tests.
+#ifndef CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_
+#define CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "internal/filesystem.h"
+
+namespace cpu_features {
+
+class FakeFile {
+ public:
+  explicit FakeFile(int file_descriptor, const char* content);
+  ~FakeFile();
+
+  void Open();
+  void Close();
+  int Read(int fd, void* buf, size_t count);
+
+  int GetFileDescriptor() const { return file_descriptor_; }
+
+ private:
+  const int file_descriptor_;
+  const std::string content_;
+  bool opened_ = false;
+  size_t head_index_ = 0;
+};
+
+class FakeFilesystem {
+ public:
+  void Reset();
+  FakeFile* CreateFile(const std::string& filename, const char* content);
+  FakeFile* FindFileOrDie(const int file_descriptor) const;
+  FakeFile* FindFileOrNull(const std::string& filename) const;
+
+ private:
+  size_t next_file_descriptor_ = 0;
+  std::unordered_map<std::string, std::unique_ptr<FakeFile>> files_;
+};
+
+FakeFilesystem& GetEmptyFilesystem();
+
+}  // namespace cpu_features
+
+#endif  // CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc
new file mode 100755
index 00000000..07f68e8a
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc
@@ -0,0 +1,45 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string.h>
+
+#include "hwcaps_for_testing.h"
+#include "internal/string_view.h"
+
+namespace cpu_features {
+
+namespace {
+static auto* const g_hardware_capabilities = new HardwareCapabilities();
+static auto* const g_platform_types = new PlatformType();
+}  // namespace
+
+void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2) {
+  g_hardware_capabilities->hwcaps = hwcaps;
+  g_hardware_capabilities->hwcaps2 = hwcaps2;
+}
+
+HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) {
+  return *g_hardware_capabilities;
+}
+
+void SetPlatformTypes(const char* platform, const char* base_platform) {
+  CpuFeatures_StringView_CopyString(str(platform), g_platform_types->platform,
+                                    sizeof(g_platform_types->platform));
+  CpuFeatures_StringView_CopyString(str(base_platform),
+                                    g_platform_types->base_platform,
+                                    sizeof(g_platform_types->base_platform));
+}
+
+PlatformType CpuFeatures_GetPlatformType(void) { return *g_platform_types; }
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h
new file mode 100755
index 00000000..0d037772
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h
@@ -0,0 +1,27 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_
+#define CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_
+
+#include "internal/hwcaps.h"
+
+namespace cpu_features {
+
+void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2);
+void SetPlatformTypes(const char *platform, const char *base_platform);
+
+}  // namespace cpu_features
+
+#endif  // CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc
new file mode 100755
index 00000000..99367dc4
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc
@@ -0,0 +1,95 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <array>
+
+#include "internal/linux_features_aggregator.h"
+
+#include "gtest/gtest.h"
+
+namespace cpu_features {
+
+namespace {
+
+struct Features {
+  bool a = false;
+  bool b = false;
+  bool c = false;
+};
+
+DECLARE_SETTER(Features, a)
+DECLARE_SETTER(Features, b)
+DECLARE_SETTER(Features, c)
+
+class LinuxFeatureAggregatorTest : public testing::Test {
+ public:
+  const std::array<CapabilityConfig, 3> kConfigs = {
+      {{{0b0001, 0b0000}, "a", &set_a},
+       {{0b0010, 0b0000}, "b", &set_b},
+       {{0b0000, 0b1100}, "c", &set_c}}};
+};
+
+TEST_F(LinuxFeatureAggregatorTest, FromFlagsEmpty) {
+  Features features;
+  CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str(""),
+                           &features);
+  EXPECT_FALSE(features.a);
+  EXPECT_FALSE(features.b);
+  EXPECT_FALSE(features.c);
+}
+
+TEST_F(LinuxFeatureAggregatorTest, FromFlagsAllSet) {
+  Features features;
+  CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str("a c b"),
+                           &features);
+  EXPECT_TRUE(features.a);
+  EXPECT_TRUE(features.b);
+  EXPECT_TRUE(features.c);
+}
+
+TEST_F(LinuxFeatureAggregatorTest, FromFlagsOnlyA) {
+  Features features;
+  CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str("a"),
+                           &features);
+  EXPECT_TRUE(features.a);
+  EXPECT_FALSE(features.b);
+  EXPECT_FALSE(features.c);
+}
+
+TEST_F(LinuxFeatureAggregatorTest, FromHwcapsNone) {
+  HardwareCapabilities capability;
+  capability.hwcaps = 0;   // matches none
+  capability.hwcaps2 = 0;  // matches none
+  Features features;
+  CpuFeatures_OverrideFromHwCaps(kConfigs.size(), kConfigs.data(), capability,
+                                 &features);
+  EXPECT_FALSE(features.a);
+  EXPECT_FALSE(features.b);
+  EXPECT_FALSE(features.c);
+}
+
+TEST_F(LinuxFeatureAggregatorTest, FromHwcapsSet) {
+  HardwareCapabilities capability;
+  capability.hwcaps = 0b0010;   // matches b but not a
+  capability.hwcaps2 = 0b1111;  // matches c
+  Features features;
+  CpuFeatures_OverrideFromHwCaps(kConfigs.size(), kConfigs.data(), capability,
+                                 &features);
+  EXPECT_FALSE(features.a);
+  EXPECT_TRUE(features.b);
+  EXPECT_TRUE(features.c);
+}
+
+}  // namespace
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc
new file mode 100755
index 00000000..c8f96910
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc
@@ -0,0 +1,132 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "internal/stack_line_reader.h"
+#include "filesystem_for_testing.h"
+
+#include "gtest/gtest.h"
+
+namespace cpu_features {
+
+bool operator==(const StringView& a, const StringView& b) {
+  return CpuFeatures_StringView_IsEquals(a, b);
+}
+
+namespace {
+
+std::string ToString(StringView view) { return {view.ptr, view.size}; }
+
+TEST(StackLineReaderTest, Empty) {
+  auto& fs = GetEmptyFilesystem();
+  auto* file = fs.CreateFile("/proc/cpuinfo", "");
+  StackLineReader reader;
+  StackLineReader_Initialize(&reader, file->GetFileDescriptor());
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_TRUE(result.eof);
+    EXPECT_TRUE(result.full_line);
+    EXPECT_EQ(result.line, str(""));
+  }
+}
+
+TEST(StackLineReaderTest, ManySmallLines) {
+  auto& fs = GetEmptyFilesystem();
+  auto* file = fs.CreateFile("/proc/cpuinfo", "a\nb\nc");
+
+  StackLineReader reader;
+  StackLineReader_Initialize(&reader, file->GetFileDescriptor());
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_FALSE(result.eof);
+    EXPECT_TRUE(result.full_line);
+    EXPECT_EQ(result.line, str("a"));
+  }
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_FALSE(result.eof);
+    EXPECT_TRUE(result.full_line);
+    EXPECT_EQ(result.line, str("b"));
+  }
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_TRUE(result.eof);
+    EXPECT_TRUE(result.full_line);
+    EXPECT_EQ(result.line, str("c"));
+  }
+}
+
+TEST(StackLineReaderTest, TruncatedLine) {
+  auto& fs = GetEmptyFilesystem();
+  auto* file = fs.CreateFile("/proc/cpuinfo", R"(First
+Second
+More than 16 characters, this will be truncated.
+last)");
+
+  StackLineReader reader;
+  StackLineReader_Initialize(&reader, file->GetFileDescriptor());
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_FALSE(result.eof);
+    EXPECT_TRUE(result.full_line);
+    EXPECT_EQ(result.line, str("First"));
+  }
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_FALSE(result.eof);
+    EXPECT_TRUE(result.full_line);
+    EXPECT_EQ(result.line, str("Second"));
+  }
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_FALSE(result.eof);
+    EXPECT_FALSE(result.full_line);
+    EXPECT_EQ(result.line, str("More than 16 cha"));
+  }
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_TRUE(result.eof);
+    EXPECT_TRUE(result.full_line);
+    EXPECT_EQ(result.line, str("last"));
+  }
+}
+
+TEST(StackLineReaderTest, TruncatedLines) {
+  auto& fs = GetEmptyFilesystem();
+  auto* file = fs.CreateFile("/proc/cpuinfo", R"(More than 16 characters
+Another line that is too long)");
+
+  StackLineReader reader;
+  StackLineReader_Initialize(&reader, file->GetFileDescriptor());
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_FALSE(result.eof);
+    EXPECT_FALSE(result.full_line);
+    EXPECT_EQ(result.line, str("More than 16 cha"));
+  }
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_FALSE(result.eof);
+    EXPECT_FALSE(result.full_line);
+    EXPECT_EQ(result.line, str("Another line tha"));
+  }
+  {
+    const auto result = StackLineReader_NextLine(&reader);
+    EXPECT_TRUE(result.eof);
+    EXPECT_TRUE(result.full_line);
+    EXPECT_EQ(result.line, str(""));
+  }
+}
+
+}  // namespace
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc
new file mode 100755
index 00000000..abfcc2cd
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc
@@ -0,0 +1,144 @@
+// Copyright 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "internal/string_view.h"
+
+#include "gtest/gtest.h"
+
+namespace cpu_features {
+
+bool operator==(const StringView& a, const StringView& b) {
+  return CpuFeatures_StringView_IsEquals(a, b);
+}
+
+namespace {
+
+TEST(StringViewTest, Empty) {
+  EXPECT_EQ(kEmptyStringView.ptr, nullptr);
+  EXPECT_EQ(kEmptyStringView.size, 0);
+}
+
+TEST(StringViewTest, Build) {
+  const auto view = str("test");
+  EXPECT_EQ(view.ptr[0], 't');
+  EXPECT_EQ(view.size, 4);
+}
+
+TEST(StringViewTest, CpuFeatures_StringView_IndexOfChar) {
+  // Found.
+  EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'e'), 1);
+  // Not found.
+  EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'z'), -1);
+  // Empty.
+  EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(kEmptyStringView, 'z'), -1);
+}
+
+TEST(StringViewTest, CpuFeatures_StringView_IndexOf) {
+  // Found.
+  EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("es")), 1);
+  // Not found.
+  EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("aa")), -1);
+  // Empty.
+  EXPECT_EQ(CpuFeatures_StringView_IndexOf(kEmptyStringView, str("aa")), -1);
+  EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("aa"), kEmptyStringView), -1);
+}
+
+TEST(StringViewTest, CpuFeatures_StringView_StartsWith) {
+  EXPECT_TRUE(CpuFeatures_StringView_StartsWith(str("test"), str("te")));
+  EXPECT_FALSE(CpuFeatures_StringView_StartsWith(str("test"), str("")));
+  EXPECT_FALSE(
+      CpuFeatures_StringView_StartsWith(str("test"), kEmptyStringView));
+  EXPECT_FALSE(
+      CpuFeatures_StringView_StartsWith(kEmptyStringView, str("test")));
+}
+
+TEST(StringViewTest, CpuFeatures_StringView_IsEquals) {
+  EXPECT_TRUE(
+      CpuFeatures_StringView_IsEquals(kEmptyStringView, kEmptyStringView));
+  EXPECT_TRUE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str("")));
+  EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str(""), kEmptyStringView));
+  EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str("a"), str("a")));
+  EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), str("b")));
+  EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), kEmptyStringView));
+  EXPECT_FALSE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str("a")));
+}
+
+TEST(StringViewTest, CpuFeatures_StringView_PopFront) {
+  EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 2), str("st"));
+  EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 0), str("test"));
+  EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 4), str(""));
+  EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 100), str(""));
+}
+
+TEST(StringViewTest, CpuFeatures_StringView_ParsePositiveNumber) {
+  EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("42")), 42);
+  EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2a")), 42);
+  EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2A")), 42);
+
+  EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("-0x2A")), -1);
+  EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("abc")), -1);
+  EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("")), -1);
+}
+
+TEST(StringViewTest, CpuFeatures_StringView_CopyString) {
+  char buf[4];
+  buf[0] = 'X';
+
+  // Empty
+  CpuFeatures_StringView_CopyString(str(""), buf, sizeof(buf));
+  EXPECT_STREQ(buf, "");
+
+  // Less
+  CpuFeatures_StringView_CopyString(str("a"), buf, sizeof(buf));
+  EXPECT_STREQ(buf, "a");
+
+  // exact
+  CpuFeatures_StringView_CopyString(str("abc"), buf, sizeof(buf));
+  EXPECT_STREQ(buf, "abc");
+
+  // More
+  CpuFeatures_StringView_CopyString(str("abcd"), buf, sizeof(buf));
+  EXPECT_STREQ(buf, "abc");
+}
+
+TEST(StringViewTest, CpuFeatures_StringView_HasWord) {
+  // Find flags at beginning, middle and end.
+  EXPECT_TRUE(
+      CpuFeatures_StringView_HasWord(str("first middle last"), "first"));
+  EXPECT_TRUE(
+      CpuFeatures_StringView_HasWord(str("first middle last"), "middle"));
+  EXPECT_TRUE(CpuFeatures_StringView_HasWord(str("first middle last"), "last"));
+  // Do not match partial flags
+  EXPECT_FALSE(
+      CpuFeatures_StringView_HasWord(str("first middle last"), "irst"));
+  EXPECT_FALSE(CpuFeatures_StringView_HasWord(str("first middle last"), "mid"));
+  EXPECT_FALSE(CpuFeatures_StringView_HasWord(str("first middle last"), "las"));
+}
+
+TEST(StringViewTest, CpuFeatures_StringView_GetAttributeKeyValue) {
+  const StringView line = str(" key :   first middle last   ");
+  StringView key, value;
+  EXPECT_TRUE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value));
+  EXPECT_EQ(key, str("key"));
+  EXPECT_EQ(value, str("first middle last"));
+}
+
+TEST(StringViewTest, FailingGetAttributeKeyValue) {
+  const StringView line = str("key  first middle last");
+  StringView key, value;
+  EXPECT_FALSE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value));
+}
+
+}  // namespace
+}  // namespace cpu_features
diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu b/src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu
new file mode 100644
index 00000000..db94e488
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu
@@ -0,0 +1,353 @@
+#define BLOCK_BYTES	32
+#define OUT_BYTES	16
+#define BLAKE_SHARED_MEM            480
+#define BLAKE_SHARED_MEM_UINT       120
+
+#define G(m, r, i, a, b, c, d) \
+do { \
+	a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
+	d = rotr64(d ^ a, 32); \
+	c = c + d; \
+	b = rotr64(b ^ c, 24); \
+	a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
+	d = rotr64(d ^ a, 16); \
+	c = c + d; \
+	b = rotr64(b ^ c, 63); \
+} while ((void)0, 0)
+
+#define G_S(m, a, b, c, d) \
+do { \
+	a = a + b + m; \
+	d = rotr64(d ^ a, 32); \
+	c = c + d; \
+	b = rotr64(b ^ c, 24); \
+	a = a + b + m; \
+	d = rotr64(d ^ a, 16); \
+	c = c + d; \
+	b = rotr64(b ^ c, 63); \
+} while ((void)0, 0)
+
+#define ROUND(m, t, r) \
+do { \
+	G(m, r, t, v0, v1, v2, v3); \
+    v1 = __shfl_sync(0xFFFFFFFF, v1, t + 1, 4); \
+    v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \
+    v3 = __shfl_sync(0xFFFFFFFF, v3, t + 3, 4); \
+	G(m, r, (t + 4), v0, v1, v2, v3); \
+    v1 = __shfl_sync(0xFFFFFFFF, v1, t + 3, 4); \
+    v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \
+    v3 = __shfl_sync(0xFFFFFFFF, v3, t + 1, 4); \
+} while ((void)0, 0)
+
+#define ROUND_S(m, t) \
+do { \
+	G_S(m, v0, v1, v2, v3); \
+    v1 = __shfl_sync(0xFFFFFFFF, v1, t + 1, 4); \
+    v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \
+    v3 = __shfl_sync(0xFFFFFFFF, v3, t + 3, 4); \
+	G_S(m, v0, v1, v2, v3); \
+    v1 = __shfl_sync(0xFFFFFFFF, v1, t + 3, 4); \
+    v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \
+    v3 = __shfl_sync(0xFFFFFFFF, v3, t + 1, 4); \
+} while ((void)0, 0)
+
+__constant__ uint64_t blake2b_IV[8] = {
+    0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
+    0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
+    0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
+    0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
+};
+
+__constant__ uint32_t blake2b_sigma[12][16] = {
+    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+    {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+    {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+    {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
+    {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
+    {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
+    {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
+    {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
+    {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
+    {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
+    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+    {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+};
+
+__device__ uint64_t rotr64(uint64_t x, uint32_t n)
+{
+    return (x >> n) | (x << (64 - n));
+}
+
+__device__ __forceinline__ void blake2b_compress(uint64_t *h, uint64_t *m, uint64_t f0, int thr_id)
+{
+    uint64_t v0, v1, v2, v3;
+
+    v0 = h[thr_id];
+    v1 = h[thr_id + 4];
+    v2 = blake2b_IV[thr_id];
+    v3 = blake2b_IV[thr_id + 4];
+
+    if(thr_id == 0) v3 ^= h[8];
+    if(thr_id == 1) v3 ^= h[9];
+    if(thr_id == 2) v3 ^= f0;
+
+    ROUND(m, thr_id, 0);
+    ROUND(m, thr_id, 1);
+    ROUND(m, thr_id, 2);
+    ROUND(m, thr_id, 3);
+    ROUND(m, thr_id, 4);
+    ROUND(m, thr_id, 5);
+    ROUND(m, thr_id, 6);
+    ROUND(m, thr_id, 7);
+    ROUND(m, thr_id, 8);
+    ROUND(m, thr_id, 9);
+    ROUND(m, thr_id, 10);
+    ROUND(m, thr_id, 11);
+
+    h[thr_id] ^= v0 ^ v2;
+    h[thr_id + 4] ^= v1 ^ v3;
+}
+
+__device__ __forceinline__ void blake2b_compress_static(uint64_t *h, uint64_t m, uint64_t f0, int thr_id)
+{
+    uint64_t v0, v1, v2, v3;
+
+    v0 = h[thr_id];
+    v1 = h[thr_id + 4];
+    v2 = blake2b_IV[thr_id];
+    v3 = blake2b_IV[thr_id + 4];
+
+    if(thr_id == 0) v3 ^= h[8];
+    if(thr_id == 1) v3 ^= h[9];
+    if(thr_id == 2) v3 ^= f0;
+
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+    ROUND_S(m, thr_id);
+
+    h[thr_id] ^= v0 ^ v2;
+    h[thr_id + 4] ^= v1 ^ v3;
+}
+
+__device__ __forceinline__ int blake2b_init(uint64_t *h, int out_len, int thr_id)
+{
+    h[thr_id * 2] = blake2b_IV[thr_id * 2];
+    h[thr_id * 2 + 1] = blake2b_IV[thr_id * 2 + 1];
+
+    if(thr_id == 0) {
+        h[8] = h[9] = 0;
+        h[0] = 0x6A09E667F3BCC908 ^ ((out_len * 4) | (1 << 16) | (1 << 24));
+    }
+
+    return 0;
+}
+
+__device__ __forceinline__ void blake2b_incrementCounter(uint64_t *h, int inc)
+{
+    h[8] += (inc * 4);
+    h[9] += (h[8] < (inc * 4));
+}
+
+__device__ __forceinline__ int blake2b_update(uint32_t *in, int in_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id)
+{
+    uint32_t *cursor_in = in;
+    uint32_t *cursor_out = buf + buf_len;
+
+    if (buf_len + in_len > BLOCK_BYTES) {
+        int left = BLOCK_BYTES - buf_len;
+
+        for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) {
+            cursor_out[thr_id] = cursor_in[thr_id];
+        }
+
+        if(thr_id == 0) {
+            for (int i = 0; i < (left % 4); i++) {
+                cursor_out[i] = cursor_in[i];
+            }
+            blake2b_incrementCounter(h, BLOCK_BYTES);
+        }
+
+        blake2b_compress(h, (uint64_t*)buf, 0, thr_id);
+
+        buf_len = 0;
+
+        in_len -= left;
+        in += left;
+
+        while (in_len > BLOCK_BYTES) {
+            if(thr_id == 0)
+                blake2b_incrementCounter(h, BLOCK_BYTES);
+
+            cursor_in = in;
+            cursor_out = buf;
+
+            for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) {
+                cursor_out[thr_id] = cursor_in[thr_id];
+            }
+
+            blake2b_compress(h, (uint64_t *)buf, 0, thr_id);
+
+            in_len -= BLOCK_BYTES;
+            in += BLOCK_BYTES;
+        }
+    }
+
+    cursor_in = in;
+    cursor_out = buf + buf_len;
+
+    for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) {
+        cursor_out[thr_id] = cursor_in[thr_id];
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (in_len % 4); i++) {
+            cursor_out[i] = cursor_in[i];
+        }
+    }
+
+    return buf_len + in_len;
+}
+
+__device__ __forceinline__ int blake2b_update_static(uint32_t in, int in_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id)
+{
+    uint64_t in64 = in;
+    in64 = in64 << 32;
+    in64 = in64 | in;
+
+    uint32_t *cursor_out = buf + buf_len;
+
+    if (buf_len + in_len > BLOCK_BYTES) {
+        int left = BLOCK_BYTES - buf_len;
+
+        for(int i=0; i < (left >> 2); i++, cursor_out += 4) {
+            cursor_out[thr_id] = in;
+        }
+
+        if(thr_id == 0) {
+            for (int i = 0; i < (left % 4); i++) {
+                cursor_out[i] = in;
+            }
+            blake2b_incrementCounter(h, BLOCK_BYTES);
+        }
+
+        blake2b_compress(h, (uint64_t*)buf, 0, thr_id);
+
+        buf_len = 0;
+
+        in_len -= left;
+
+        while (in_len > BLOCK_BYTES) {
+            if(thr_id == 0)
+                blake2b_incrementCounter(h, BLOCK_BYTES);
+
+            blake2b_compress_static(h, in64, 0, thr_id);
+
+            in_len -= BLOCK_BYTES;
+        }
+    }
+
+    cursor_out = buf + buf_len;
+
+    for(int i=0; i < (in_len >> 2); i++, cursor_out += 4) {
+        cursor_out[thr_id] = in;
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (in_len % 4); i++) {
+            cursor_out[i] = in;
+        }
+    }
+
+    return buf_len + in_len;
+}
+
+__device__ __forceinline__ void blake2b_final(uint32_t *out, int out_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id)
+{
+    int left = BLOCK_BYTES - buf_len;
+    uint32_t *cursor_out = buf + buf_len;
+
+    for(int i=0; i < (left >> 2); i++, cursor_out += 4) {
+        cursor_out[thr_id] = 0;
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (left % 4); i++) {
+            cursor_out[i] = 0;
+        }
+        blake2b_incrementCounter(h, buf_len);
+    }
+
+    blake2b_compress(h, (uint64_t*)buf, 0xFFFFFFFFFFFFFFFF, thr_id);
+
+    uint32_t *cursor_in = (uint32_t *)h;
+    cursor_out = out;
+
+    for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out += 4) {
+        cursor_out[thr_id] = cursor_in[thr_id];
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (out_len % 4); i++) {
+            cursor_out[i] = cursor_in[i];
+        }
+    }
+}
+
+__device__ void blake2b_digestLong(uint32_t *out, int out_len, uint32_t *in, int in_len, int thr_id, uint32_t *shared)
+{
+    uint64_t *h = (uint64_t*)shared;
+    uint32_t *buf = (uint32_t*)&h[10];
+    uint32_t *out_buffer = &buf[32];
+    int buf_len;
+
+    if(thr_id == 0) buf[0] = (out_len * 4);
+    buf_len = 1;
+
+    if (out_len <= OUT_BYTES) {
+        blake2b_init(h, out_len, thr_id);
+        buf_len = blake2b_update(in, in_len, h, buf, buf_len, thr_id);
+        blake2b_final(out, out_len, h, buf, buf_len, thr_id);
+    } else {
+        uint32_t *cursor_in = out_buffer;
+        uint32_t *cursor_out = out;
+
+        blake2b_init(h, OUT_BYTES, thr_id);
+        buf_len = blake2b_update(in, in_len, h, buf, buf_len, thr_id);
+        blake2b_final(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id);
+
+        for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) {
+            cursor_out[thr_id] = cursor_in[thr_id];
+        }
+
+        out += OUT_BYTES / 2;
+
+        int to_produce = out_len - OUT_BYTES / 2;
+        while (to_produce > OUT_BYTES) {
+            buf_len = blake2b_init(h, OUT_BYTES, thr_id);
+            buf_len = blake2b_update(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id);
+            blake2b_final(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id);
+
+            cursor_out = out;
+            cursor_in = out_buffer;
+            for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) {
+                cursor_out[thr_id] = cursor_in[thr_id];
+            }
+
+            out += OUT_BYTES / 2;
+            to_produce -= OUT_BYTES / 2;
+        }
+
+        buf_len = blake2b_init(h, to_produce, thr_id);
+        buf_len = blake2b_update(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id);
+        blake2b_final(out, to_produce, h, buf, buf_len, thr_id);
+    }
+}
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp
new file mode 100644
index 00000000..2046a321
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp
@@ -0,0 +1,340 @@
+//
+// Created by Haifa Bogdan Adnan on 03/08/2018.
+//
+
+#include <crypto/Argon2_constants.h>
+
+#include "../../../common/common.h"
+
+#include "crypto/argon2_hasher/hash/Hasher.h"
+#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
+
+#if defined(WITH_CUDA)
+
+#include <cuda_runtime.h>
+#include <driver_types.h>
+
+#include "cuda_hasher.h"
+#include "../../../common/DLLExport.h"
+
+cuda_hasher::cuda_hasher() {
+    m_type = "GPU";
+    m_subType = "CUDA";
+    m_shortSubType = "NVD";
+    m_intensity = 0;
+    m_description = "";
+    m_computingThreads = 0;
+}
+
+
+cuda_hasher::~cuda_hasher() {
+	this->cleanup();
+}
+
+bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
+	cudaError_t error = cudaSuccess;
+	string error_message;
+
+    m_profile = getArgon2Profile(algorithm, variant);
+
+	__devices = __query_cuda_devices(error, error_message);
+
+	if(error != cudaSuccess) {
+		m_description = "No compatible GPU detected: " + error_message;
+		return false;
+	}
+
+	if (__devices.empty()) {
+		m_description = "No compatible GPU detected.";
+		return false;
+	}
+
+	return true;
+}
+
+vector<cuda_device_info *> cuda_hasher::__query_cuda_devices(cudaError_t &error, string &error_message) {
+	vector<cuda_device_info *> devices;
+	int devCount = 0;
+	error = cudaGetDeviceCount(&devCount);
+
+	if(error != cudaSuccess) {
+		error_message = "Error querying CUDA device count.";
+		return devices;
+	}
+
+	if(devCount == 0)
+		return devices;
+
+	for (int i = 0; i < devCount; ++i)
+	{
+		cuda_device_info *dev = __get_device_info(i);
+		if(dev == NULL)
+			continue;
+		if(dev->error != cudaSuccess) {
+			error = dev->error;
+			error_message = dev->error_message;
+			continue;
+		}
+		devices.push_back(dev);
+	}
+	return devices;
+}
+
+cuda_device_info *cuda_hasher::__get_device_info(int device_index) {
+	cuda_device_info *device_info = new cuda_device_info();
+	device_info->error = cudaSuccess;
+	device_info->cuda_index = device_index;
+
+	device_info->error = cudaSetDevice(device_index);
+	if(device_info->error != cudaSuccess) {
+		device_info->error_message = "Error setting current device.";
+		return device_info;
+	}
+
+    cudaDeviceProp devProp;
+	device_info->error = cudaGetDeviceProperties(&devProp, device_index);
+	if(device_info->error != cudaSuccess) {
+		device_info->error_message = "Error setting current device.";
+		return device_info;
+	}
+
+    device_info->device_string = devProp.name;
+
+    size_t freemem, totalmem;
+    device_info->error = cudaMemGetInfo(&freemem, &totalmem);
+	if(device_info->error != cudaSuccess) {
+		device_info->error_message = "Error setting current device.";
+		return device_info;
+	}
+
+    device_info->free_mem_size = freemem;
+    device_info->max_allocable_mem_size = freemem / 4;
+
+    double mem_in_gb = totalmem / 1073741824.0;
+    stringstream ss;
+    ss << setprecision(2) << mem_in_gb;
+    device_info->device_string += (" (" + ss.str() + "GB)");
+
+    return device_info;
+}
+
+bool cuda_hasher::configure(xmrig::HasherConfig &config) {
+    int index = config.getGPUCardsCount();
+    double intensity = 0;
+
+    int total_threads = 0;
+    intensity = config.getAverageGPUIntensity();
+
+	if (intensity == 0) {
+		m_intensity = 0;
+		m_description = "Status: DISABLED - by user.";
+		return false;
+	}
+
+	bool cards_selected = false;
+	intensity = 0;
+
+	for(vector<cuda_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++, index++) {
+		stringstream ss;
+		ss << "["<< (index + 1) << "] " << (*d)->device_string;
+		string device_description = ss.str();
+		(*d)->device_index = index;
+        (*d)->profile_info.profile = m_profile;
+
+        if(config.gpuFilter().size() > 0) {
+			bool found = false;
+            for(xmrig::GPUFilter fit : config.gpuFilter()) {
+                if(device_description.find(fit.filter) != string::npos) {
+                    found = true;
+                    break;
+                }
+            }
+			if(!found) {
+				(*d)->profile_info.threads = 0;
+				ss << " - DISABLED" << endl;
+				m_description += ss.str();
+				continue;
+			}
+			else {
+				cards_selected = true;
+			}
+		}
+		else {
+			cards_selected = true;
+		}
+
+		ss << endl;
+
+        double device_intensity = config.getGPUIntensity((*d)->device_index);
+
+		m_description += ss.str();
+
+		if(!(__setup_device_info((*d), device_intensity))) {
+			m_description += (*d)->error_message;
+			m_description += "\n";
+			continue;
+		};
+
+		DeviceInfo device;
+
+		char bus_id[100];
+		if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cuda_index) == cudaSuccess) {
+			device.bus_id = bus_id;
+			int domain_separator = device.bus_id.find(":");
+			if(domain_separator != string::npos) {
+				device.bus_id.erase(0, domain_separator + 1);
+			}
+		}
+
+		device.name = (*d)->device_string;
+		device.intensity = device_intensity;
+        storeDeviceInfo((*d)->device_index, device);
+
+        __enabledDevices.push_back(*d);
+
+		total_threads += (*d)->profile_info.threads;
+        intensity += device_intensity;
+	}
+
+    config.addGPUCardsCount(index - config.getGPUCardsCount());
+
+	if(!cards_selected) {
+		m_intensity = 0;
+		m_description += "Status: DISABLED - no card enabled because of filtering.";
+		return false;
+	}
+
+	if (total_threads == 0) {
+		m_intensity = 0;
+		m_description += "Status: DISABLED - not enough resources.";
+		return false;
+	}
+
+    if(!buildThreadData())
+        return false;
+
+    m_intensity = intensity / __enabledDevices.size();
+    m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device
+    m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads.";
+
+	return true;
+}
+
+void cuda_hasher::cleanup() {
+	for(vector<cuda_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++) {
+		cuda_free(*d);
+	}
+}
+
+bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity) {
+    device->profile_info.threads_per_chunk = (uint32_t)(device->max_allocable_mem_size / device->profile_info.profile->memSize);
+    size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize;
+
+    if(chunk_size == 0) {
+        device->error = cudaErrorInitializationError;
+        device->error_message = "Not enough memory on GPU.";
+        return false;
+    }
+
+    uint64_t usable_memory = device->free_mem_size;
+    double chunks = (double)usable_memory / (double)chunk_size;
+
+    uint32_t max_threads = (uint32_t)(device->profile_info.threads_per_chunk * chunks);
+
+    if(max_threads == 0) {
+        device->error = cudaErrorInitializationError;
+        device->error_message = "Not enough memory on GPU.";
+        return false;
+    }
+
+    device->profile_info.threads = (uint32_t)(max_threads * intensity / 100.0);
+	device->profile_info.threads = (device->profile_info.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution
+	if(max_threads > 0 && device->profile_info.threads == 0 && intensity > 0)
+        device->profile_info.threads = 2;
+
+    chunks = (double)device->profile_info.threads / (double)device->profile_info.threads_per_chunk;
+
+	cuda_allocate(device, chunks, chunk_size);
+
+	if(device->error != cudaSuccess)
+		return false;
+
+    return true;
+}
+
+bool cuda_hasher::buildThreadData() {
+    __thread_data = new cuda_gpumgmt_thread_data[__enabledDevices.size() * 2];
+
+    for(int i=0; i < __enabledDevices.size(); i++) {
+        cuda_device_info *device = __enabledDevices[i];
+        for(int threadId = 0; threadId < 2; threadId ++) {
+            cuda_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId];
+            thread_data.device = device;
+            thread_data.thread_id = threadId;
+
+            cudaStream_t stream;
+            device->error = cudaStreamCreate(&stream);
+            if(device->error != cudaSuccess) {
+                LOG("Error running kernel: (" + to_string(device->error) + ") cannot create cuda stream.");
+                return false;
+            }
+
+        	thread_data.device_data = stream;
+
+            #ifdef PARALLEL_CUDA
+                if(threadId == 0) {
+                    thread_data.threads_idx = 0;
+                    thread_data.threads = device->profile_info.threads / 2;
+                }
+                else {
+                    thread_data.threads_idx = device->profile_info.threads / 2;
+                    thread_data.threads = device->profile_info.threads - thread_data.threads_idx;
+                }
+            #else
+                thread_data.threads_idx = 0;
+                thread_data.threads = device->profile_info.threads;
+            #endif
+
+            thread_data.argon2 = new Argon2(cuda_kernel_prehasher, cuda_kernel_filler, cuda_kernel_posthasher,
+                                            nullptr, &thread_data);
+            thread_data.argon2->setThreads(thread_data.threads);
+            thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4;
+        }
+    }
+
+    return true;
+}
+
+int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
+    cuda_gpumgmt_thread_data &threadData = __thread_data[threadIdx];
+
+	cudaSetDevice(threadData.device->cuda_index);
+
+    threadData.hashData.input = input;
+    threadData.hashData.inSize = size;
+    threadData.hashData.output = output;
+    int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData);
+    if(threadData.device->error != cudaSuccess) {
+        LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message);
+        return 0;
+    }
+
+    uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39));
+    (*nonce) += threadData.threads;
+
+    return hashCount;
+
+}
+
+size_t cuda_hasher::parallelism(int workerIdx) {
+    cuda_gpumgmt_thread_data &threadData = __thread_data[workerIdx];
+    return threadData.threads;
+}
+
+size_t cuda_hasher::deviceCount() {
+    return __enabledDevices.size();
+}
+
+REGISTER_HASHER(cuda_hasher);
+
+#endif //WITH_CUDA
diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h
new file mode 100644
index 00000000..2e668b8e
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h
@@ -0,0 +1,126 @@
+//
+// Created by Haifa Bogdan Adnan on 18/09/2018.
+//
+
+#ifndef ARGON2_CUDA_HASHER_H
+#define ARGON2_CUDA_HASHER_H
+
+#if defined(WITH_CUDA)
+
+struct cuda_kernel_arguments {
+    void *memory_chunk_0;
+    void *memory_chunk_1;
+    void *memory_chunk_2;
+    void *memory_chunk_3;
+    void *memory_chunk_4;
+    void *memory_chunk_5;
+
+    uint32_t *refs;
+    uint32_t *idxs;
+    uint32_t *segments;
+
+	uint32_t *preseed_memory[2];
+	uint32_t *seed_memory[2];
+	uint32_t *out_memory[2];
+	uint32_t *hash_memory[2];
+
+    uint32_t *host_seed_memory[2];
+};
+
+struct argon2profile_info {
+    argon2profile_info() {
+        threads = 0;
+        threads_per_chunk = 0;
+    }
+    uint32_t threads;
+    uint32_t threads_per_chunk;
+    Argon2Profile *profile;
+};
+
+struct cuda_device_info {
+	cuda_device_info() {
+		device_index = 0;
+		device_string = "";
+		free_mem_size = 0;
+		max_allocable_mem_size = 0;
+
+		error = cudaSuccess;
+		error_message = "";
+	}
+
+    int device_index;
+	int cuda_index;
+
+    string device_string;
+    uint64_t free_mem_size;
+    uint64_t max_allocable_mem_size;
+
+    argon2profile_info profile_info;
+	cuda_kernel_arguments arguments;
+
+    mutex device_lock;
+
+    cudaError_t error;
+    string error_message;
+};
+
+struct cuda_gpumgmt_thread_data {
+	void lock() {
+#ifndef PARALLEL_CUDA
+		device->device_lock.lock();
+#endif
+	}
+
+	void unlock() {
+#ifndef PARALLEL_CUDA
+		device->device_lock.unlock();
+#endif
+	}
+
+    int thread_id;
+    cuda_device_info *device;
+    Argon2 *argon2;
+    HashData hashData;
+
+	void *device_data;
+
+	int threads;
+	int threads_idx;
+};
+
+class cuda_hasher : public Hasher {
+public:
+	cuda_hasher();
+	~cuda_hasher();
+
+    virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
+    virtual bool configure(xmrig::HasherConfig &config);
+    virtual void cleanup();
+    virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
+    virtual size_t parallelism(int workerIdx);
+    virtual size_t deviceCount();
+
+private:
+    cuda_device_info *__get_device_info(int device_index);
+    bool __setup_device_info(cuda_device_info *device, double intensity);
+    vector<cuda_device_info*> __query_cuda_devices(cudaError_t &error, string &error_message);
+    bool buildThreadData();
+
+    vector<cuda_device_info*> __devices;
+    vector<cuda_device_info*> __enabledDevices;
+    cuda_gpumgmt_thread_data *__thread_data;
+
+    Argon2Profile *m_profile;
+};
+
+// CUDA kernel exports
+extern void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size);
+extern void cuda_free(cuda_device_info *device);
+extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
+extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data);
+extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
+// end CUDA kernel exports
+
+#endif //WITH_CUDA
+
+#endif //ARGON2_CUDA_HASHER_H
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu
new file mode 100644
index 00000000..eea358f2
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu
@@ -0,0 +1,1132 @@
+#include <driver_types.h>
+
+#include <crypto/Argon2_constants.h>
+
+#include "../../../common/common.h"
+
+#include "crypto/argon2_hasher/hash/Hasher.h"
+#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
+
+#include "cuda_hasher.h"
+
+#define THREADS_PER_LANE               32
+#define BLOCK_SIZE_UINT4                64
+#define BLOCK_SIZE_UINT                256
+#define KERNEL_WORKGROUP_SIZE   		32
+#define ARGON2_PREHASH_DIGEST_LENGTH_UINT   16
+#define ARGON2_PREHASH_SEED_LENGTH_UINT     18
+
+
+#include "blake2b.cu"
+
+#define COMPUTE	\
+	asm ("{"	\
+		".reg .u32 s1, s2, s3, s4;\n\t"	\
+		"mul.lo.u32 s3, %0, %2;\n\t"	\
+		"mul.hi.u32 s4, %0, %2;\n\t"	\
+		"add.cc.u32 s3, s3, s3;\n\t"	\
+		"addc.u32 s4, s4, s4;\n\t"	\
+		"add.cc.u32 s1, %0, %2;\n\t"	\
+		"addc.u32 s2, %1, %3;\n\t"	\
+		"add.cc.u32 %0, s1, s3;\n\t"	\
+		"addc.u32 %1, s2, s4;\n\t"	\
+		"xor.b32 s1, %0, %6;\n\t"	\
+		"xor.b32 %6, %1, %7;\n\t"	\
+		"mov.b32 %7, s1;\n\t"	\
+		"mul.lo.u32 s3, %4, %6;\n\t"	\
+		"mul.hi.u32 s4, %4, %6;\n\t"	\
+		"add.cc.u32 s3, s3, s3;\n\t"	\
+		"addc.u32 s4, s4, s4;\n\t"	\
+		"add.cc.u32 s1, %4, %6;\n\t"	\
+		"addc.u32 s2, %5, %7;\n\t"	\
+		"add.cc.u32 %4, s1, s3;\n\t"	\
+		"addc.u32 %5, s2, s4;\n\t"	\
+		"xor.b32 s3, %2, %4;\n\t"	\
+		"xor.b32 s4, %3, %5;\n\t"	\
+		"shf.r.wrap.b32 %3, s4, s3, 24;\n\t"	\
+		"shf.r.wrap.b32 %2, s3, s4, 24;\n\t"	\
+		"mul.lo.u32 s3, %0, %2;\n\t"	\
+		"mul.hi.u32 s4, %0, %2;\n\t"	\
+		"add.cc.u32 s3, s3, s3;\n\t"	\
+		"addc.u32 s4, s4, s4;\n\t"	\
+		"add.cc.u32 s1, %0, %2;\n\t"	\
+		"addc.u32 s2, %1, %3;\n\t"	\
+		"add.cc.u32 %0, s1, s3;\n\t"	\
+		"addc.u32 %1, s2, s4;\n\t"	\
+		"xor.b32 s3, %0, %6;\n\t"	\
+		"xor.b32 s4, %1, %7;\n\t"	\
+		"shf.r.wrap.b32 %7, s4, s3, 16;\n\t"	\
+		"shf.r.wrap.b32 %6, s3, s4, 16;\n\t"	\
+		"mul.lo.u32 s3, %4, %6;\n\t"	\
+		"mul.hi.u32 s4, %4, %6;\n\t"	\
+		"add.cc.u32 s3, s3, s3;\n\t"	\
+		"addc.u32 s4, s4, s4;\n\t"	\
+		"add.cc.u32 s1, %4, %6;\n\t"	\
+		"addc.u32 s2, %5, %7;\n\t"	\
+		"add.cc.u32 %4, s1, s3;\n\t"	\
+		"addc.u32 %5, s2, s4;\n\t"	\
+		"xor.b32 s3, %2, %4;\n\t"	\
+		"xor.b32 s4, %3, %5;\n\t"	\
+		"shf.r.wrap.b32 %3, s3, s4, 31;\n\t"	\
+		"shf.r.wrap.b32 %2, s4, s3, 31;\n\t"	\
+	"}" : "+r"(tmp_a.x), "+r"(tmp_a.y), "+r"(tmp_a.z), "+r"(tmp_a.w), "+r"(tmp_b.x), "+r"(tmp_b.y), "+r"(tmp_b.z), "+r"(tmp_b.w));
+
+#define G1(data)           \
+{                           \
+	COMPUTE \
+	tmp_a.z = __shfl_sync(0xffffffff, tmp_a.z, i_shfl1_1); \
+	tmp_a.w = __shfl_sync(0xffffffff, tmp_a.w, i_shfl1_1); \
+	tmp_b.x = __shfl_sync(0xffffffff, tmp_b.x, i_shfl1_2); \
+	tmp_b.y = __shfl_sync(0xffffffff, tmp_b.y, i_shfl1_2); \
+	tmp_b.z = __shfl_sync(0xffffffff, tmp_b.z, i_shfl1_3); \
+	tmp_b.w = __shfl_sync(0xffffffff, tmp_b.w, i_shfl1_3); \
+}
+
+#define G2(data)           \
+{ \
+	COMPUTE \
+    data[i2_0_0] = tmp_a.x; \
+    data[i2_0_1] = tmp_a.y; \
+    data[i2_1_0] = tmp_a.z; \
+    data[i2_1_1] = tmp_a.w; \
+    data[i2_2_0] = tmp_b.x; \
+    data[i2_2_1] = tmp_b.y; \
+    data[i2_3_0] = tmp_b.z; \
+    data[i2_3_1] = tmp_b.w; \
+    __syncwarp(); \
+}
+
+#define G3(data)           \
+{                           \
+    tmp_a.x = data[i3_0_0]; \
+    tmp_a.y = data[i3_0_1]; \
+    tmp_a.z = data[i3_1_0]; \
+    tmp_a.w = data[i3_1_1]; \
+    tmp_b.x = data[i3_2_0]; \
+    tmp_b.y = data[i3_2_1]; \
+    tmp_b.z = data[i3_3_0]; \
+    tmp_b.w = data[i3_3_1]; \
+	COMPUTE \
+	tmp_a.z = __shfl_sync(0xffffffff, tmp_a.z, i_shfl2_1); \
+	tmp_a.w = __shfl_sync(0xffffffff, tmp_a.w, i_shfl2_1); \
+	tmp_b.x = __shfl_sync(0xffffffff, tmp_b.x, i_shfl2_2); \
+	tmp_b.y = __shfl_sync(0xffffffff, tmp_b.y, i_shfl2_2); \
+	tmp_b.z = __shfl_sync(0xffffffff, tmp_b.z, i_shfl2_3); \
+	tmp_b.w = __shfl_sync(0xffffffff, tmp_b.w, i_shfl2_3); \
+}
+
+#define G4(data)           \
+{                           \
+	COMPUTE \
+    data[i4_0_0] = tmp_a.x; \
+    data[i4_0_1] = tmp_a.y; \
+    data[i4_1_0] = tmp_a.z; \
+    data[i4_1_1] = tmp_a.w; \
+    data[i4_2_0] = tmp_b.x; \
+    data[i4_2_1] = tmp_b.y; \
+    data[i4_3_0] = tmp_b.z; \
+    data[i4_3_1] = tmp_b.w; \
+    __syncwarp(); \
+    tmp_a.x = data[i1_0_0]; \
+    tmp_a.y = data[i1_0_1]; \
+    tmp_a.z = data[i1_1_0]; \
+    tmp_a.w = data[i1_1_1]; \
+    tmp_b.x = data[i1_2_0]; \
+    tmp_b.y = data[i1_2_1]; \
+    tmp_b.z = data[i1_3_0]; \
+    tmp_b.w = data[i1_3_1]; \
+}
+
+__constant__ int offsets[768] = {
+		0, 4, 8, 12,
+		1, 5, 9, 13,
+		2, 6, 10, 14,
+		3, 7, 11, 15,
+		16, 20, 24, 28,
+		17, 21, 25, 29,
+		18, 22, 26, 30,
+		19, 23, 27, 31,
+		32, 36, 40, 44,
+		33, 37, 41, 45,
+		34, 38, 42, 46,
+		35, 39, 43, 47,
+		48, 52, 56, 60,
+		49, 53, 57, 61,
+		50, 54, 58, 62,
+		51, 55, 59, 63,
+		64, 68, 72, 76,
+		65, 69, 73, 77,
+		66, 70, 74, 78,
+		67, 71, 75, 79,
+		80, 84, 88, 92,
+		81, 85, 89, 93,
+		82, 86, 90, 94,
+		83, 87, 91, 95,
+		96, 100, 104, 108,
+		97, 101, 105, 109,
+		98, 102, 106, 110,
+		99, 103, 107, 111,
+		112, 116, 120, 124,
+		113, 117, 121, 125,
+		114, 118, 122, 126,
+		115, 119, 123, 127,
+		0, 5, 10, 15,
+		1, 6, 11, 12,
+		2, 7, 8, 13,
+		3, 4, 9, 14,
+		16, 21, 26, 31,
+		17, 22, 27, 28,
+		18, 23, 24, 29,
+		19, 20, 25, 30,
+		32, 37, 42, 47,
+		33, 38, 43, 44,
+		34, 39, 40, 45,
+		35, 36, 41, 46,
+		48, 53, 58, 63,
+		49, 54, 59, 60,
+		50, 55, 56, 61,
+		51, 52, 57, 62,
+		64, 69, 74, 79,
+		65, 70, 75, 76,
+		66, 71, 72, 77,
+		67, 68, 73, 78,
+		80, 85, 90, 95,
+		81, 86, 91, 92,
+		82, 87, 88, 93,
+		83, 84, 89, 94,
+		96, 101, 106, 111,
+		97, 102, 107, 108,
+		98, 103, 104, 109,
+		99, 100, 105, 110,
+		112, 117, 122, 127,
+		113, 118, 123, 124,
+		114, 119, 120, 125,
+		115, 116, 121, 126,
+		0, 32, 64, 96,
+		1, 33, 65, 97,
+		2, 34, 66, 98,
+		3, 35, 67, 99,
+		4, 36, 68, 100,
+		5, 37, 69, 101,
+		6, 38, 70, 102,
+		7, 39, 71, 103,
+		8, 40, 72, 104,
+		9, 41, 73, 105,
+		10, 42, 74, 106,
+		11, 43, 75, 107,
+		12, 44, 76, 108,
+		13, 45, 77, 109,
+		14, 46, 78, 110,
+		15, 47, 79, 111,
+		16, 48, 80, 112,
+		17, 49, 81, 113,
+		18, 50, 82, 114,
+		19, 51, 83, 115,
+		20, 52, 84, 116,
+		21, 53, 85, 117,
+		22, 54, 86, 118,
+		23, 55, 87, 119,
+		24, 56, 88, 120,
+		25, 57, 89, 121,
+		26, 58, 90, 122,
+		27, 59, 91, 123,
+		28, 60, 92, 124,
+		29, 61, 93, 125,
+		30, 62, 94, 126,
+		31, 63, 95, 127,
+		0, 33, 80, 113,
+		1, 48, 81, 96,
+		2, 35, 82, 115,
+		3, 50, 83, 98,
+		4, 37, 84, 117,
+		5, 52, 85, 100,
+		6, 39, 86, 119,
+		7, 54, 87, 102,
+		8, 41, 88, 121,
+		9, 56, 89, 104,
+		10, 43, 90, 123,
+		11, 58, 91, 106,
+		12, 45, 92, 125,
+		13, 60, 93, 108,
+		14, 47, 94, 127,
+		15, 62, 95, 110,
+		16, 49, 64, 97,
+		17, 32, 65, 112,
+		18, 51, 66, 99,
+		19, 34, 67, 114,
+		20, 53, 68, 101,
+		21, 36, 69, 116,
+		22, 55, 70, 103,
+		23, 38, 71, 118,
+		24, 57, 72, 105,
+		25, 40, 73, 120,
+		26, 59, 74, 107,
+		27, 42, 75, 122,
+		28, 61, 76, 109,
+		29, 44, 77, 124,
+		30, 63, 78, 111,
+		31, 46, 79, 126,
+        0, 1, 2, 3,
+        1, 2, 3, 0,
+        2, 3, 0, 1,
+        3, 0, 1, 2,
+        4, 5, 6, 7,
+        5, 6, 7, 4,
+        6, 7, 4, 5,
+        7, 4, 5, 6,
+        8, 9, 10, 11,
+        9, 10, 11, 8,
+        10, 11, 8, 9,
+        11, 8, 9, 10,
+        12, 13, 14, 15,
+        13, 14, 15, 12,
+        14, 15, 12, 13,
+        15, 12, 13, 14,
+        16, 17, 18, 19,
+        17, 18, 19, 16,
+        18, 19, 16, 17,
+        19, 16, 17, 18,
+        20, 21, 22, 23,
+        21, 22, 23, 20,
+        22, 23, 20, 21,
+        23, 20, 21, 22,
+        24, 25, 26, 27,
+        25, 26, 27, 24,
+        26, 27, 24, 25,
+        27, 24, 25, 26,
+        28, 29, 30, 31,
+        29, 30, 31, 28,
+        30, 31, 28, 29,
+        31, 28, 29, 30,
+        0, 1, 16, 17,
+        1, 16, 17, 0,
+        2, 3, 18, 19,
+        3, 18, 19, 2,
+        4, 5, 20, 21,
+        5, 20, 21, 4,
+        6, 7, 22, 23,
+        7, 22, 23, 6,
+        8, 9, 24, 25,
+        9, 24, 25, 8,
+        10, 11, 26, 27,
+        11, 26, 27, 10,
+        12, 13, 28, 29,
+        13, 28, 29, 12,
+        14, 15, 30, 31,
+        15, 30, 31, 14,
+        16, 17, 0, 1,
+        17, 0, 1, 16,
+        18, 19, 2, 3,
+        19, 2, 3, 18,
+        20, 21, 4, 5,
+        21, 4, 5, 20,
+        22, 23, 6, 7,
+        23, 6, 7, 22,
+        24, 25, 8, 9,
+        25, 8, 9, 24,
+        26, 27, 10, 11,
+        27, 10, 11, 26,
+        28, 29, 12, 13,
+        29, 12, 13, 28,
+        30, 31, 14, 15,
+        31, 14, 15, 30
+};
+
+inline __host__ __device__ void operator^=( uint4& a, uint4 s) {
+   a.x ^= s.x; a.y ^= s.y; a.z ^= s.z; a.w ^= s.w;
+}
+
+__global__ void fill_blocks(uint32_t *scratchpad0,
+							uint32_t *scratchpad1,
+							uint32_t *scratchpad2,
+							uint32_t *scratchpad3,
+							uint32_t *scratchpad4,
+							uint32_t *scratchpad5,
+							uint32_t *seed,
+							uint32_t *out,
+                            uint32_t *refs, // 32 bit
+                            uint32_t *idxs, // first bit is keep flag, next 31 bit is current idx
+							uint32_t *segments,
+							int memsize,
+							int lanes,
+                            int seg_length,
+                            int seg_count,
+							int threads_per_chunk,
+							int thread_idx) {
+    extern __shared__ uint32_t shared[]; // lanes * BLOCK_SIZE_UINT [local state] + lanes * 32 [refs buffer] ( + lanes * 32 [idx buffer])
+
+	uint32_t *local_state = shared;
+	uint32_t *local_refs = shared + (lanes * BLOCK_SIZE_UINT);
+	uint32_t *local_idxs = shared + (lanes * BLOCK_SIZE_UINT + lanes * 32);
+
+	uint4 tmp_a, tmp_b, tmp_c, tmp_d, tmp_p, tmp_q, tmp_l, tmp_m;
+
+	int hash = blockIdx.x;
+	int mem_hash = hash + thread_idx;
+	int local_id = threadIdx.x;
+	int lane_length = seg_length * 4;
+
+	int id = local_id % THREADS_PER_LANE;
+	int lane = local_id / THREADS_PER_LANE;
+
+	int offset = id << 2;
+
+	int i1_0_0 = 2 * offsets[offset];
+	int i1_0_1 = i1_0_0 + 1;
+	int i1_1_0 = 2 * offsets[offset + 1];
+	int i1_1_1 = i1_1_0 + 1;
+	int i1_2_0 = 2 * offsets[offset + 2];
+	int i1_2_1 = i1_2_0 + 1;
+	int i1_3_0 = 2 * offsets[offset + 3];
+	int i1_3_1 = i1_3_0 + 1;
+
+	int i2_0_0 = 2 * offsets[offset + 128];
+	int i2_0_1 = i2_0_0 + 1;
+	int i2_1_0 = 2 * offsets[offset + 129];
+	int i2_1_1 = i2_1_0 + 1;
+	int i2_2_0 = 2 * offsets[offset + 130];
+	int i2_2_1 = i2_2_0 + 1;
+	int i2_3_0 = 2 * offsets[offset + 131];
+	int i2_3_1 = i2_3_0 + 1;
+
+	int i3_0_0 = 2 * offsets[offset + 256];
+	int i3_0_1 = i3_0_0 + 1;
+	int i3_1_0 = 2 * offsets[offset + 257];
+	int i3_1_1 = i3_1_0 + 1;
+	int i3_2_0 = 2 * offsets[offset + 258];
+	int i3_2_1 = i3_2_0 + 1;
+	int i3_3_0 = 2 * offsets[offset + 259];
+	int i3_3_1 = i3_3_0 + 1;
+
+	int i4_0_0 = 2 * offsets[offset + 384];
+	int i4_0_1 = i4_0_0 + 1;
+	int i4_1_0 = 2 * offsets[offset + 385];
+	int i4_1_1 = i4_1_0 + 1;
+	int i4_2_0 = 2 * offsets[offset + 386];
+	int i4_2_1 = i4_2_0 + 1;
+	int i4_3_0 = 2 * offsets[offset + 387];
+	int i4_3_1 = i4_3_0 + 1;
+
+	int i_shfl1_1 = offsets[offset + 513];
+	int i_shfl1_2 = offsets[offset + 514];
+	int i_shfl1_3 = offsets[offset + 515];
+	int i_shfl2_1 = offsets[offset + 641];
+	int i_shfl2_2 = offsets[offset + 642];
+	int i_shfl2_3 = offsets[offset + 643];
+
+    int scratchpad_location = mem_hash / threads_per_chunk;
+    uint4 *memory = reinterpret_cast<uint4*>(scratchpad0);
+    if(scratchpad_location == 1) memory = reinterpret_cast<uint4*>(scratchpad1);
+    if(scratchpad_location == 2) memory = reinterpret_cast<uint4*>(scratchpad2);
+    if(scratchpad_location == 3) memory = reinterpret_cast<uint4*>(scratchpad3);
+    if(scratchpad_location == 4) memory = reinterpret_cast<uint4*>(scratchpad4);
+    if(scratchpad_location == 5) memory = reinterpret_cast<uint4*>(scratchpad5);
+    int hash_offset = mem_hash - scratchpad_location * threads_per_chunk;
+    memory = memory + hash_offset * (memsize >> 4); // memsize / 16 -> 16 bytes in uint4
+
+	uint32_t *mem_seed = seed + hash * lanes * 2 * BLOCK_SIZE_UINT;
+
+	uint32_t *seed_src = mem_seed + lane * 2 * BLOCK_SIZE_UINT;
+	uint4 *seed_dst = memory + lane * lane_length * BLOCK_SIZE_UINT4;
+
+	seed_dst[id] = make_uint4(seed_src[i1_0_0], seed_src[i1_0_1], seed_src[i1_1_0], seed_src[i1_1_1]);
+	seed_dst[id + 32] = make_uint4(seed_src[i1_2_0], seed_src[i1_2_1], seed_src[i1_3_0], seed_src[i1_3_1]);
+	seed_src += BLOCK_SIZE_UINT;
+	seed_dst += BLOCK_SIZE_UINT4;
+	seed_dst[id] = make_uint4(seed_src[i1_0_0], seed_src[i1_0_1], seed_src[i1_1_0], seed_src[i1_1_1]);
+	seed_dst[id + 32] = make_uint4(seed_src[i1_2_0], seed_src[i1_2_1], seed_src[i1_3_0], seed_src[i1_3_1]);
+
+	uint4 *next_block;
+	uint4 *prev_block;
+	uint4 *ref_block;
+    uint32_t *seg_refs, *seg_idxs;
+
+	local_state = local_state + lane * BLOCK_SIZE_UINT;
+	local_refs = local_refs + lane * 32;
+	local_idxs = local_idxs + lane * 32;
+
+    segments += (lane * 3);
+
+	for(int s = 0; s < (seg_count / lanes); s++) {
+		int idx = ((s == 0) ? 2 : 0); // index for first slice in each lane is 2
+		int with_xor = ((s >= 4) ? 1 : 0);
+		int keep = 1;
+		int slice = s % 4;
+		int pass = s / 4;
+
+		uint32_t *cur_seg = &segments[s * lanes * 3];
+
+		uint32_t cur_idx = cur_seg[0];
+        uint32_t prev_idx = cur_seg[1];
+        uint32_t seg_type = cur_seg[2];
+        uint32_t ref_idx = 0;
+
+        prev_block = memory + prev_idx * BLOCK_SIZE_UINT4;
+
+        tmp_a = prev_block[id];
+        tmp_b = prev_block[id + 32];
+
+        __syncthreads();
+
+        if(seg_type == 0) {
+            seg_refs = refs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2);
+            if(idxs != NULL) seg_idxs = idxs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2);
+
+            for (cur_idx--;idx < seg_length; seg_refs += 32, seg_idxs += 32) {
+				uint64_t i_limit = seg_length - idx;
+				if (i_limit > 32) i_limit = 32;
+
+				local_refs[id] = seg_refs[id];
+				ref_idx = local_refs[0];
+
+				if(idxs != NULL) {
+					local_idxs[id] = seg_idxs[id];
+					cur_idx = local_idxs[0];
+					keep = cur_idx & 0x80000000;
+					cur_idx = cur_idx & 0x7FFFFFFF;
+				} else
+				    cur_idx++;
+
+                ref_block = memory + ref_idx * BLOCK_SIZE_UINT4;
+                tmp_p = ref_block[id];
+                tmp_q = ref_block[id + 32];
+
+                for (int i = 0; i < i_limit; i++, idx++) {
+                    next_block = memory + cur_idx * BLOCK_SIZE_UINT4;
+					if(with_xor == 1) {
+						tmp_l = next_block[id];
+						tmp_m = next_block[id + 32];
+					}
+
+					tmp_a ^= tmp_p;
+                    tmp_b ^= tmp_q;
+
+                    if (i < (i_limit - 1)) {
+						ref_idx = local_refs[i + 1];
+
+						if(idxs != NULL) {
+							cur_idx = local_idxs[i + 1];
+							keep = cur_idx & 0x80000000;
+							cur_idx = cur_idx & 0x7FFFFFFF;
+						}
+						else
+							cur_idx++;
+
+                        ref_block = memory + ref_idx * BLOCK_SIZE_UINT4;
+                        tmp_p = ref_block[id];
+                        tmp_q = ref_block[id + 32];
+                    }
+
+					tmp_c = tmp_a;
+					tmp_d = tmp_b;
+
+					G1(local_state);
+                    G2(local_state);
+                    G3(local_state);
+                    G4(local_state);
+
+                    if(with_xor == 1) {
+						tmp_c ^= tmp_l;
+						tmp_d ^= tmp_m;
+					}
+
+                    tmp_a ^= tmp_c;
+                    tmp_b ^= tmp_d;
+
+                    if(keep > 0) {
+						next_block[id] = tmp_a;
+						next_block[id + 32] = tmp_b;
+					}
+                }
+            }
+        }
+        else {
+
+            for (; idx < seg_length; idx++, cur_idx++) {
+				next_block = memory + cur_idx * BLOCK_SIZE_UINT4;
+
+				if(with_xor == 1) {
+					tmp_l = next_block[id];
+					tmp_m = next_block[id + 32];
+				}
+
+				uint32_t pseudo_rand_lo = __shfl_sync(0xffffffff, tmp_a.x, 0);
+				uint32_t pseudo_rand_hi = __shfl_sync(0xffffffff, tmp_a.y, 0);
+
+				uint64_t ref_lane = pseudo_rand_hi % lanes; // thr_cost
+				uint32_t reference_area_size = 0;
+				if(pass > 0) {
+					if (lane == ref_lane) {
+						reference_area_size = lane_length - seg_length + idx - 1;
+					} else {
+						reference_area_size = lane_length - seg_length + ((idx == 0) ? (-1) : 0);
+					}
+				}
+				else {
+					if (lane == ref_lane) {
+						reference_area_size = slice * seg_length + idx - 1; // seg_length
+					} else {
+						reference_area_size = slice * seg_length + ((idx == 0) ? (-1) : 0);
+					}
+				}
+				asm("{mul.hi.u32 %0, %1, %1; mul.hi.u32 %0, %0, %2; }": "=r"(pseudo_rand_lo) : "r"(pseudo_rand_lo), "r"(reference_area_size));
+
+				uint32_t relative_position = reference_area_size - 1 - pseudo_rand_lo;
+
+				ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length;
+
+				ref_block = memory + ref_idx * BLOCK_SIZE_UINT4;
+
+				tmp_a ^= ref_block[id];
+				tmp_b ^= ref_block[id + 32];
+
+				tmp_c = tmp_a;
+				tmp_d = tmp_b;
+
+				G1(local_state);
+				G2(local_state);
+				G3(local_state);
+				G4(local_state);
+
+				if(with_xor == 1) {
+					tmp_c ^= tmp_l;
+					tmp_d ^= tmp_m;
+				}
+
+				tmp_a ^= tmp_c;
+				tmp_b ^= tmp_d;
+
+				next_block[id] = tmp_a;
+				next_block[id + 32] = tmp_b;
+            }
+        }
+	}
+
+    local_state[i1_0_0] = tmp_a.x;
+    local_state[i1_0_1] = tmp_a.y;
+    local_state[i1_1_0] = tmp_a.z;
+    local_state[i1_1_1] = tmp_a.w;
+    local_state[i1_2_0] = tmp_b.x;
+    local_state[i1_2_1] = tmp_b.y;
+    local_state[i1_3_0] = tmp_b.z;
+    local_state[i1_3_1] = tmp_b.w;
+
+    __syncthreads();
+
+	// at this point local_state will contain the final blocks
+
+	if(lane == 0) { // first lane needs to acumulate results
+		tmp_a = make_uint4(0, 0, 0, 0);
+		tmp_b = make_uint4(0, 0, 0, 0);
+
+		for(int l=0; l<lanes; l++) {
+			uint4 *block = (uint4 *)(shared + l * BLOCK_SIZE_UINT);
+			tmp_a ^= block[id];
+			tmp_b ^= block[id + 32];
+		}
+
+		uint4 *out_mem = (uint4 *)(out + hash * BLOCK_SIZE_UINT);
+		out_mem[id] = tmp_a;
+		out_mem[id + 32] = tmp_b;
+	}
+};
+
+__global__ void prehash (
+        uint32_t *preseed,
+        uint32_t *seed,
+		int memsz,
+		int lanes,
+		int passes,
+		int pwdlen,
+		int saltlen,
+        int threads) { // len is given in uint32 units
+    extern __shared__ uint32_t shared[]; // size = max(lanes * 2, 8) * 88
+
+	int seeds_batch_size = blockDim.x / 4; // number of seeds per block
+	int hash_batch_size = seeds_batch_size / (lanes * 2); // number of hashes per block
+
+	int id = threadIdx.x; // minimum 32 threads
+	int thr_id = id % 4; // thread id in session
+	int session = id / 4; // blake2b hashing session
+
+    int hash = blockIdx.x * hash_batch_size;
+    int hash_idx = session / (lanes * 2);
+    hash += hash_idx;
+
+    if(hash < threads) {
+        int hash_session = session % (lanes * 2); // session in hash
+
+        int lane = hash_session / 2;  // 2 lanes
+        int idx = hash_session % 2; // idx in lane
+
+        uint32_t *local_mem = &shared[session * BLAKE_SHARED_MEM_UINT];
+        uint32_t *local_seed = seed + (hash * lanes * 2 + hash_session) * BLOCK_SIZE_UINT;
+
+        uint64_t *h = (uint64_t *) &local_mem[20];
+        uint32_t *buf = (uint32_t *) &h[10];
+        uint32_t *value = &buf[32];
+        uint32_t *local_preseed = &value[1];
+
+        uint32_t *cursor_in = preseed;
+        uint32_t *cursor_out = local_preseed;
+
+        for(int i=0; i < (pwdlen >> 2); i++, cursor_in += 4, cursor_out += 4) {
+            cursor_out[thr_id] = cursor_in[thr_id];
+        }
+
+        if(thr_id == 0) {
+            for (int i = 0; i < (pwdlen % 4); i++) {
+                cursor_out[i] = cursor_in[i];
+            }
+
+            uint32_t nonce = (preseed[9] >> 24) | (preseed[10] << 8);
+            nonce += hash;
+            local_preseed[9] = (preseed[9] & 0x00FFFFFF) | (nonce << 24);
+            local_preseed[10] = (preseed[10] & 0xFF000000) | (nonce >> 8);
+        }
+
+        int buf_len = blake2b_init(h, ARGON2_PREHASH_DIGEST_LENGTH_UINT, thr_id);
+        *value = lanes; //lanes
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+        *value = 32; //outlen
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+        *value = memsz; //m_cost
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+        *value = passes; //t_cost
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+        *value = ARGON2_VERSION; //version
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+        *value = ARGON2_TYPE_VALUE; //type
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+        *value = pwdlen * 4; //pw_len
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+        buf_len = blake2b_update(local_preseed, pwdlen, h, buf, buf_len, thr_id);
+        *value = saltlen * 4; //salt_len
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+		buf_len = blake2b_update(local_preseed, saltlen, h, buf, buf_len, thr_id);
+        *value = 0; //secret_len
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+        buf_len = blake2b_update(NULL, 0, h, buf, buf_len, thr_id);
+        *value = 0; //ad_len
+        buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id);
+        buf_len = blake2b_update(NULL, 0, h, buf, buf_len, thr_id);
+
+        blake2b_final(local_mem, ARGON2_PREHASH_DIGEST_LENGTH_UINT, h, buf, buf_len, thr_id);
+
+        if (thr_id == 0) {
+            local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT] = idx;
+            local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT + 1] = lane;
+        }
+
+        blake2b_digestLong(local_seed, ARGON2_DWORDS_IN_BLOCK, local_mem, ARGON2_PREHASH_SEED_LENGTH_UINT, thr_id,
+                           &local_mem[20]);
+    }
+}
+
+__global__ void posthash (
+        uint32_t *hash,
+        uint32_t *out,
+        uint32_t *preseed) {
+    extern __shared__ uint32_t shared[]; // size = 120
+
+    int hash_id = blockIdx.x;
+    int thread = threadIdx.x;
+
+    uint32_t *local_hash = hash + hash_id * ((ARGON2_RAW_LENGTH / 4) + 1);
+    uint32_t *local_out = out + hash_id * BLOCK_SIZE_UINT;
+
+    blake2b_digestLong(local_hash, ARGON2_RAW_LENGTH / 4, local_out, ARGON2_DWORDS_IN_BLOCK, thread, shared);
+
+    if(thread == 0) {
+        uint32_t nonce = (preseed[9] >> 24) | (preseed[10] << 8);
+        nonce += hash_id;
+        local_hash[ARGON2_RAW_LENGTH / 4] = nonce;
+    }
+}
+
+void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) {
+	Argon2Profile *profile = device->profile_info.profile;
+
+	device->error = cudaSetDevice(device->cuda_index);
+	if(device->error != cudaSuccess) {
+		device->error_message = "Error setting current device for memory allocation.";
+		return;
+	}
+
+	size_t allocated_mem_for_current_chunk = 0;
+
+	if (chunks > 0) {
+		allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks);
+		chunks -= 1;
+	}
+	else {
+		allocated_mem_for_current_chunk = 1;
+	}
+	device->error = cudaMalloc(&device->arguments.memory_chunk_0, allocated_mem_for_current_chunk);
+	if (device->error != cudaSuccess) {
+		device->error_message = "Error allocating memory.";
+		return;
+	}
+	if (chunks > 0) {
+		allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks);
+		chunks -= 1;
+	}
+	else {
+		allocated_mem_for_current_chunk = 1;
+	}
+	device->error = cudaMalloc(&device->arguments.memory_chunk_1, allocated_mem_for_current_chunk);
+	if (device->error != cudaSuccess) {
+		device->error_message = "Error allocating memory.";
+		return;
+	}
+	if (chunks > 0) {
+		allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks);
+		chunks -= 1;
+	}
+	else {
+		allocated_mem_for_current_chunk = 1;
+	}
+	device->error = cudaMalloc(&device->arguments.memory_chunk_2, allocated_mem_for_current_chunk);
+	if (device->error != cudaSuccess) {
+		device->error_message = "Error allocating memory.";
+		return;
+	}
+	if (chunks > 0) {
+		allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks);
+		chunks -= 1;
+	}
+	else {
+		allocated_mem_for_current_chunk = 1;
+	}
+	device->error = cudaMalloc(&device->arguments.memory_chunk_3, allocated_mem_for_current_chunk);
+	if (device->error != cudaSuccess) {
+		device->error_message = "Error allocating memory.";
+		return;
+	}
+	if (chunks > 0) {
+		allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks);
+		chunks -= 1;
+	}
+	else {
+		allocated_mem_for_current_chunk = 1;
+	}
+	device->error = cudaMalloc(&device->arguments.memory_chunk_4, allocated_mem_for_current_chunk);
+	if (device->error != cudaSuccess) {
+		device->error_message = "Error allocating memory.";
+		return;
+	}
+	if (chunks > 0) {
+		allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks);
+		chunks -= 1;
+	}
+	else {
+		allocated_mem_for_current_chunk = 1;
+	}
+	device->error = cudaMalloc(&device->arguments.memory_chunk_5, allocated_mem_for_current_chunk);
+	if (device->error != cudaSuccess) {
+		device->error_message = "Error allocating memory.";
+		return;
+	}
+
+	uint32_t *refs = (uint32_t *)malloc(profile->blockRefsSize * sizeof(uint32_t));
+	for(int i=0;i<profile->blockRefsSize;i++) {
+		refs[i] = profile->blockRefs[i*3 + 1];
+	}
+
+	device->error = cudaMalloc(&device->arguments.refs, profile->blockRefsSize * sizeof(uint32_t));
+	if(device->error != cudaSuccess) {
+		device->error_message = "Error allocating memory.";
+		return;
+	}
+
+	device->error = cudaMemcpy(device->arguments.refs, refs, profile->blockRefsSize * sizeof(uint32_t), cudaMemcpyHostToDevice);
+	if(device->error != cudaSuccess) {
+		device->error_message = "Error copying memory.";
+		return;
+	}
+	free(refs);
+
+	if(profile->succesiveIdxs == 1) {
+		device->arguments.idxs = NULL;
+	}
+	else {
+		uint32_t *idxs = (uint32_t *) malloc(profile->blockRefsSize * sizeof(uint32_t));
+		for (int i = 0; i < profile->blockRefsSize; i++) {
+			idxs[i] = profile->blockRefs[i * 3];
+			if (profile->blockRefs[i * 3 + 2] == 1) {
+				idxs[i] |= 0x80000000;
+			}
+		}
+
+		device->error = cudaMalloc(&device->arguments.idxs, profile->blockRefsSize * sizeof(uint32_t));
+		if (device->error != cudaSuccess) {
+			device->error_message = "Error allocating memory.";
+			return;
+		}
+
+		device->error = cudaMemcpy(device->arguments.idxs, idxs, profile->blockRefsSize * sizeof(uint32_t),
+								   cudaMemcpyHostToDevice);
+		if (device->error != cudaSuccess) {
+			device->error_message = "Error copying memory.";
+			return;
+		}
+		free(idxs);
+	}
+
+	//reorganize segments data
+	device->error = cudaMalloc(&device->arguments.segments, profile->segCount * 3 * sizeof(uint32_t));
+	if(device->error != cudaSuccess) {
+		device->error_message = "Error allocating memory.";
+		return;
+	}
+	device->error = cudaMemcpy(device->arguments.segments, profile->segments, profile->segCount * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice);
+	if(device->error != cudaSuccess) {
+		device->error_message = "Error copying memory.";
+		return;
+	}
+
+#ifdef PARALLEL_CUDA
+	int threads = device->profile_info.threads / 2;
+#else
+	int threads = device->profile_info.threads;
+#endif
+
+	size_t preseed_memory_size = profile->pwdLen * 4;
+	size_t seed_memory_size = threads * (profile->thrCost * 2) * ARGON2_BLOCK_SIZE;
+	size_t out_memory_size = threads * ARGON2_BLOCK_SIZE;
+	size_t hash_memory_size = threads * (xmrig::ARGON2_HASHLEN + 4);
+
+    device->error = cudaMalloc(&device->arguments.preseed_memory[0], preseed_memory_size);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating memory.";
+        return;
+    }
+    device->error = cudaMalloc(&device->arguments.seed_memory[0], seed_memory_size);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating memory.";
+        return;
+    }
+    device->error = cudaMalloc(&device->arguments.out_memory[0], out_memory_size);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating memory.";
+        return;
+    }
+    device->error = cudaMalloc(&device->arguments.hash_memory[0], hash_memory_size);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating memory.";
+        return;
+    }
+    device->error = cudaMallocHost(&device->arguments.host_seed_memory[0], 132 * threads);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating pinned memory.";
+        return;
+    }
+    device->error = cudaMalloc(&device->arguments.preseed_memory[1], preseed_memory_size);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating memory.";
+        return;
+    }
+    device->error = cudaMalloc(&device->arguments.seed_memory[1], seed_memory_size);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating memory.";
+        return;
+    }
+    device->error = cudaMalloc(&device->arguments.out_memory[1], out_memory_size);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating memory.";
+        return;
+    }
+    device->error = cudaMalloc(&device->arguments.hash_memory[1], hash_memory_size);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating memory.";
+        return;
+    }
+    device->error = cudaMallocHost(&device->arguments.host_seed_memory[1], 132 * threads);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error allocating pinned memory.";
+        return;
+    }
+}
+
+void cuda_free(cuda_device_info *device) {
+	cudaSetDevice(device->cuda_index);
+
+	if(device->arguments.idxs != NULL) {
+		cudaFree(device->arguments.idxs);
+		device->arguments.idxs = NULL;
+	}
+
+	if(device->arguments.refs != NULL) {
+		cudaFree(device->arguments.refs);
+		device->arguments.refs = NULL;
+	}
+
+	if(device->arguments.segments != NULL) {
+		cudaFree(device->arguments.segments);
+		device->arguments.segments = NULL;
+	}
+
+    if(device->arguments.memory_chunk_0 != NULL) {
+        cudaFree(device->arguments.memory_chunk_0);
+        device->arguments.memory_chunk_0 = NULL;
+    }
+
+    if(device->arguments.memory_chunk_1 != NULL) {
+        cudaFree(device->arguments.memory_chunk_1);
+        device->arguments.memory_chunk_1 = NULL;
+    }
+
+    if(device->arguments.memory_chunk_2 != NULL) {
+        cudaFree(device->arguments.memory_chunk_2);
+        device->arguments.memory_chunk_2 = NULL;
+    }
+
+    if(device->arguments.memory_chunk_3 != NULL) {
+        cudaFree(device->arguments.memory_chunk_3);
+        device->arguments.memory_chunk_3 = NULL;
+    }
+
+    if(device->arguments.memory_chunk_4 != NULL) {
+        cudaFree(device->arguments.memory_chunk_4);
+        device->arguments.memory_chunk_4 = NULL;
+    }
+
+    if(device->arguments.memory_chunk_5 != NULL) {
+        cudaFree(device->arguments.memory_chunk_5);
+        device->arguments.memory_chunk_5 = NULL;
+    }
+
+    if(device->arguments.preseed_memory != NULL) {
+        for(int i=0;i<2;i++) {
+            if(device->arguments.preseed_memory[i] != NULL)
+                cudaFree(device->arguments.preseed_memory[i]);
+            device->arguments.preseed_memory[i] = NULL;
+        }
+    }
+
+	if(device->arguments.seed_memory != NULL) {
+		for(int i=0;i<2;i++) {
+			if(device->arguments.seed_memory[i] != NULL)
+				cudaFree(device->arguments.seed_memory[i]);
+			device->arguments.seed_memory[i] = NULL;
+		}
+	}
+
+	if(device->arguments.out_memory != NULL) {
+		for(int i=0;i<2;i++) {
+			if(device->arguments.out_memory[i] != NULL)
+				cudaFree(device->arguments.out_memory[i]);
+			device->arguments.out_memory[i] = NULL;
+		}
+	}
+
+    if(device->arguments.hash_memory != NULL) {
+        for(int i=0;i<2;i++) {
+            if(device->arguments.hash_memory[i] != NULL)
+                cudaFree(device->arguments.hash_memory[i]);
+            device->arguments.hash_memory[i] = NULL;
+        }
+    }
+
+	if(device->arguments.host_seed_memory != NULL) {
+		for(int i=0;i<2;i++) {
+			if(device->arguments.host_seed_memory[i] != NULL)
+				cudaFreeHost(device->arguments.host_seed_memory[i]);
+			device->arguments.host_seed_memory[i] = NULL;
+		}
+	}
+
+	cudaDeviceReset();
+}
+
+bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
+    cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data;
+    cuda_device_info *device = gpumgmt_thread->device;
+    cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data;
+
+    int sessions = max(profile->thrCost * 2, (uint32_t)8);
+    double hashes_per_block = sessions / (profile->thrCost * 2.0);
+    size_t work_items = sessions * 4;
+
+    gpumgmt_thread->lock();
+
+    memcpy(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], memory, gpumgmt_thread->hashData.inSize);
+
+    device->error = cudaMemcpyAsync(device->arguments.preseed_memory[gpumgmt_thread->thread_id], device->arguments.host_seed_memory[gpumgmt_thread->thread_id], gpumgmt_thread->hashData.inSize, cudaMemcpyHostToDevice, stream);
+    if (device->error != cudaSuccess) {
+        device->error_message = "Error writing to gpu memory.";
+        gpumgmt_thread->unlock();
+        return false;
+    }
+
+	prehash <<< ceil(threads / hashes_per_block), work_items, sessions * BLAKE_SHARED_MEM, stream>>> (
+			device->arguments.preseed_memory[gpumgmt_thread->thread_id],
+			device->arguments.seed_memory[gpumgmt_thread->thread_id],
+			profile->memCost,
+			profile->thrCost,
+			profile->segCount / (4 * profile->thrCost),
+            gpumgmt_thread->hashData.inSize / 4,
+			profile->saltLen,
+            threads);
+
+    return true;
+}
+
+void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
+	cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data;
+	cuda_device_info *device = gpumgmt_thread->device;
+	cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data;
+
+    size_t work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost;
+    size_t shared_mem = profile->thrCost * (ARGON2_BLOCK_SIZE + 128 + (profile->succesiveIdxs == 1 ? 128 : 0));
+
+	fill_blocks <<<threads, work_items, shared_mem, stream>>> ((uint32_t*)device->arguments.memory_chunk_0,
+			(uint32_t*)device->arguments.memory_chunk_1,
+			(uint32_t*)device->arguments.memory_chunk_2,
+			(uint32_t*)device->arguments.memory_chunk_3,
+			(uint32_t*)device->arguments.memory_chunk_4,
+			(uint32_t*)device->arguments.memory_chunk_5,
+			device->arguments.seed_memory[gpumgmt_thread->thread_id],
+			device->arguments.out_memory[gpumgmt_thread->thread_id],
+			device->arguments.refs,
+			device->arguments.idxs,
+			device->arguments.segments,
+			profile->memSize,
+			profile->thrCost,
+			profile->segSize,
+			profile->segCount,
+			device->profile_info.threads_per_chunk,
+            gpumgmt_thread->threads_idx);
+
+	return (void *)1;
+}
+
+bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
+	cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data;
+	cuda_device_info *device = gpumgmt_thread->device;
+	cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data;
+
+    size_t work_items = 4;
+
+	posthash <<<threads, work_items, BLAKE_SHARED_MEM, stream>>> (
+            device->arguments.hash_memory[gpumgmt_thread->thread_id],
+            device->arguments.out_memory[gpumgmt_thread->thread_id],
+            device->arguments.preseed_memory[gpumgmt_thread->thread_id]);
+
+	device->error = cudaMemcpyAsync(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], device->arguments.hash_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream);
+	if (device->error != cudaSuccess) {
+		device->error_message = "Error reading gpu memory.";
+		gpumgmt_thread->unlock();
+		return false;
+	}
+
+	while(cudaStreamQuery(stream) != cudaSuccess) {
+		this_thread::sleep_for(chrono::milliseconds(10));
+		continue;
+	}
+
+    memcpy(memory, device->arguments.host_seed_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4));
+	gpumgmt_thread->unlock();
+
+	return memory;
+}
\ No newline at end of file
diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp
new file mode 100755
index 00000000..b217dc79
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp
@@ -0,0 +1,888 @@
+//
+// Created by Haifa Bogdan Adnan on 03/08/2018.
+//
+
+#include <crypto/Argon2_constants.h>
+#include "../../../common/common.h"
+
+#include "crypto/argon2_hasher/hash/Hasher.h"
+#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
+
+#include "OpenCLHasher.h"
+#include "OpenCLKernel.h"
+
+#include "crypto/argon2_hasher/common/DLLExport.h"
+
+#if defined(WITH_OPENCL)
+
+#ifndef CL_DEVICE_BOARD_NAME_AMD
+#define CL_DEVICE_BOARD_NAME_AMD                    0x4038
+#endif
+#ifndef CL_DEVICE_TOPOLOGY_AMD
+#define CL_DEVICE_TOPOLOGY_AMD                      0x4037
+#endif
+#ifndef CL_DEVICE_PCI_BUS_ID_NV
+#define CL_DEVICE_PCI_BUS_ID_NV                     0x4008
+#endif
+#ifndef CL_DEVICE_PCI_SLOT_ID_NV
+#define CL_DEVICE_PCI_SLOT_ID_NV                    0x4009
+#endif
+
+typedef union
+{
+    struct { cl_uint type; cl_uint data[5]; } raw;
+    struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
+} device_topology_amd;
+
+#define KERNEL_WORKGROUP_SIZE   32
+
+opencl_hasher::opencl_hasher() {
+    m_type = "GPU";
+    m_subType = "OPENCL";
+    m_shortSubType = "OCL";
+    m_intensity = 0;
+    m_description = "";
+    m_computingThreads = 0;
+}
+
+opencl_hasher::~opencl_hasher() {
+//    this->cleanup();
+}
+
+bool opencl_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
+    cl_int error = CL_SUCCESS;
+    string error_message;
+
+    m_profile = getArgon2Profile(algorithm, variant);
+
+    __devices = __query_opencl_devices(error, error_message);
+    if(error != CL_SUCCESS) {
+        m_description = "No compatible GPU detected: " + error_message;
+        return false;
+    }
+
+    if (__devices.empty()) {
+        m_description = "No compatible GPU detected.";
+        return false;
+    }
+
+    return true;
+}
+
+vector<opencl_device_info*> opencl_hasher::__query_opencl_devices(cl_int &error, string &error_message) {
+    cl_int err;
+
+    cl_uint platform_count = 0;
+    cl_uint device_count = 0;
+
+    vector<opencl_device_info*> result;
+
+    clGetPlatformIDs(0, NULL, &platform_count);
+    if(platform_count == 0) {
+        return result;
+    }
+
+    cl_platform_id *platforms = (cl_platform_id*)malloc(platform_count * sizeof(cl_platform_id));
+
+    err=clGetPlatformIDs(platform_count, platforms, &platform_count);
+    if(err != CL_SUCCESS)  {
+        free(platforms);
+        error = err;
+        error_message = "Error querying for opencl platforms.";
+        return result;
+    }
+
+    int counter = 0;
+
+    for(uint32_t i=0; i < platform_count; i++) {
+        device_count = 0;
+        clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &device_count);
+        if(device_count == 0) {
+            continue;
+        }
+
+        cl_device_id * devices = (cl_device_id*)malloc(device_count * sizeof(cl_device_id));
+        err=clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, device_count, devices, &device_count);
+
+        if(err != CL_SUCCESS)  {
+            free(devices);
+            error = err;
+            error_message = "Error querying for opencl devices.";
+            continue;
+        }
+
+        for(uint32_t j=0; j < device_count; j++) {
+            opencl_device_info *info = __get_device_info(platforms[i], devices[j]);
+            if(info->error != CL_SUCCESS) {
+                error = info->error;
+                error_message = info->error_message;
+            }
+            else {
+                info->device_index = counter;
+                result.push_back(info);
+                counter++;
+            }
+        }
+
+        free(devices);
+    }
+
+    free(platforms);
+
+    return result;
+}
+
+opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl_device_id device) {
+    opencl_device_info *device_info = new opencl_device_info(CL_SUCCESS, "");
+
+    device_info->platform = platform;
+    device_info->device = device;
+
+    char *buffer;
+    size_t sz;
+
+    // device name
+    string device_vendor;
+    sz = 0;
+    clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, NULL, &sz);
+    buffer = (char *)malloc(sz + 1);
+    device_info->error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sz, buffer, &sz);
+    if(device_info->error != CL_SUCCESS) {
+        free(buffer);
+        device_info->error_message = "Error querying device vendor.";
+        return device_info;
+    }
+    else {
+        buffer[sz] = 0;
+        device_vendor = buffer;
+        free(buffer);
+    }
+
+    string device_name;
+    cl_device_info query_type = CL_DEVICE_NAME;
+
+    if(device_vendor.find("Advanced Micro Devices") != string::npos)
+        query_type = CL_DEVICE_BOARD_NAME_AMD;
+
+    sz = 0;
+    clGetDeviceInfo(device, query_type, 0, NULL, &sz);
+    buffer = (char *) malloc(sz + 1);
+    device_info->error = clGetDeviceInfo(device, query_type, sz, buffer, &sz);
+    if (device_info->error != CL_SUCCESS) {
+        free(buffer);
+        device_info->error_message = "Error querying device name.";
+        return device_info;
+    } else {
+        buffer[sz] = 0;
+        device_name = buffer;
+        free(buffer);
+    }
+
+    string device_version;
+    sz = 0;
+    clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, NULL, &sz);
+    buffer = (char *)malloc(sz + 1);
+    device_info->error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sz, buffer, &sz);
+    if(device_info->error != CL_SUCCESS) {
+        free(buffer);
+        device_info->error_message = "Error querying device version.";
+        return device_info;
+    }
+    else {
+        buffer[sz] = 0;
+        device_version = buffer;
+        free(buffer);
+    }
+
+    device_info->device_string = device_vendor + " - " + device_name/* + " : " + device_version*/;
+
+    device_info->error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(device_info->max_mem_size), &(device_info->max_mem_size), NULL);
+    if(device_info->error != CL_SUCCESS) {
+        device_info->error_message = "Error querying device global memory size.";
+        return device_info;
+    }
+
+    device_info->error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(device_info->max_allocable_mem_size), &(device_info->max_allocable_mem_size), NULL);
+    if(device_info->error != CL_SUCCESS) {
+        device_info->error_message = "Error querying device max memory allocation.";
+        return device_info;
+    }
+
+    double mem_in_gb = device_info->max_mem_size / 1073741824.0;
+    stringstream ss;
+    ss << setprecision(2) << mem_in_gb;
+    device_info->device_string += (" (" + ss.str() + "GB)");
+
+    return device_info;
+}
+
+bool opencl_hasher::configure(xmrig::HasherConfig &config) {
+    int index = config.getGPUCardsCount();
+    double intensity = 0;
+
+    int total_threads = 0;
+    intensity = config.getAverageGPUIntensity();
+
+    if (intensity == 0) {
+        m_intensity = 0;
+        m_description = "Status: DISABLED - by user.";
+        return false;
+    }
+
+    bool cards_selected = false;
+
+    intensity = 0;
+
+    for(vector<opencl_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++, index++) {
+        stringstream ss;
+        ss << "["<< (index + 1) << "] " << (*d)->device_string;
+        string device_description = ss.str();
+        (*d)->device_index = index;
+        (*d)->profile_info.profile = m_profile;
+
+        if(config.gpuFilter().size() > 0) {
+            bool found = false;
+            for(xmrig::GPUFilter fit : config.gpuFilter()) {
+                if(device_description.find(fit.filter) != string::npos) {
+                    found = true;
+                    break;
+                }
+            }
+            if(!found) {
+                (*d)->profile_info.threads = 0;
+                ss << " - DISABLED" << endl;
+                m_description += ss.str();
+                continue;
+            }
+            else {
+                cards_selected = true;
+            }
+        }
+        else {
+            cards_selected = true;
+        }
+
+        ss << endl;
+
+        double device_intensity = config.getGPUIntensity((*d)->device_index);
+
+        m_description += ss.str();
+
+        if(!(__setup_device_info((*d), device_intensity))) {
+            m_description += (*d)->error_message;
+            m_description += "\n";
+            continue;
+        };
+
+        DeviceInfo device;
+
+        if((*d)->device_string.find("Advanced Micro Devices") != string::npos) {
+            device_topology_amd amdtopo;
+            if(clGetDeviceInfo((*d)->device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL) == CL_SUCCESS) {
+                char bus_id[50];
+                sprintf(bus_id, "%02x:%02x.%x", amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
+                device.bus_id = bus_id;
+            }
+        }
+        else if((*d)->device_string.find("NVIDIA") != string::npos) {
+            cl_uint bus;
+            cl_uint slot;
+
+            if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_BUS_ID_NV, sizeof(bus), &bus, NULL) == CL_SUCCESS) {
+                if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof(slot), &slot, NULL) == CL_SUCCESS) {
+                    char bus_id[50];
+                    sprintf(bus_id, "%02x:%02x.0", bus, slot);
+                    device.bus_id = bus_id;
+                }
+            }
+        }
+
+        device.name = (*d)->device_string;
+        device.intensity = device_intensity;
+        storeDeviceInfo((*d)->device_index, device);
+
+        __enabledDevices.push_back(*d);
+
+        total_threads += (*d)->profile_info.threads;
+        intensity += device_intensity;
+    }
+
+    config.addGPUCardsCount(index - config.getGPUCardsCount());
+
+    if(!cards_selected) {
+        m_intensity = 0;
+        m_description += "Status: DISABLED - no card enabled because of filtering.";
+        return false;
+    }
+
+    if (total_threads == 0) {
+        m_intensity = 0;
+        m_description += "Status: DISABLED - not enough resources.";
+        return false;
+    }
+
+    buildThreadData();
+
+    m_intensity = intensity / __enabledDevices.size();
+    m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device
+    m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads.";
+
+    return true;
+}
+
+bool opencl_hasher::__setup_device_info(opencl_device_info *device, double intensity) {
+    cl_int error;
+
+    cl_context_properties properties[] = {
+            CL_CONTEXT_PLATFORM, (cl_context_properties) device->platform,
+            0};
+
+    device->context = clCreateContext(properties, 1, &(device->device), NULL, NULL, &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error getting device context.";
+        return false;
+    }
+
+    device->queue = clCreateCommandQueue(device->context, device->device, CL_QUEUE_PROFILING_ENABLE, &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error getting device command queue.";
+        return false;
+    }
+
+    const char *srcptr[] = {OpenCLKernel.c_str()};
+    size_t srcsize = OpenCLKernel.size();
+
+    device->program = clCreateProgramWithSource(device->context, 1, srcptr, &srcsize, &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating opencl program for device.";
+        return false;
+    }
+
+    error = clBuildProgram(device->program, 1, &device->device, "", NULL, NULL);
+    if (error != CL_SUCCESS) {
+        size_t log_size;
+        clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
+        char *log = (char *) malloc(log_size + 1);
+        clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
+        log[log_size] = 0;
+        string build_log = log;
+        free(log);
+
+        device->error = error;
+        device->error_message = "Error building opencl program for device: " + build_log;
+        return false;
+    }
+
+    device->kernel_prehash = clCreateKernel(device->program, "prehash", &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating opencl prehash kernel for device.";
+        return false;
+    }
+    device->kernel_fill_blocks = clCreateKernel(device->program, "fill_blocks", &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating opencl main kernel for device.";
+        return false;
+    }
+    device->kernel_posthash = clCreateKernel(device->program, "posthash", &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating opencl posthash kernel for device.";
+        return false;
+    }
+
+    device->profile_info.threads_per_chunk = (uint32_t) (device->max_allocable_mem_size / device->profile_info.profile->memSize);
+    size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize;
+
+    if (chunk_size == 0) {
+        device->error = -1;
+        device->error_message = "Not enough memory on GPU.";
+        return false;
+    }
+
+    uint64_t usable_memory = device->max_mem_size;
+    double chunks = (double) usable_memory / (double) chunk_size;
+
+    uint32_t max_threads = (uint32_t) (device->profile_info.threads_per_chunk * chunks);
+
+    if (max_threads == 0) {
+        device->error = -1;
+        device->error_message = "Not enough memory on GPU.";
+        return false;
+    }
+
+    device->profile_info.threads = (uint32_t) (max_threads * intensity / 100.0);
+    device->profile_info.threads = (device->profile_info.threads / 4) * 4; // make it divisible by 4
+    if (max_threads > 0 && device->profile_info.threads == 0 && intensity > 0)
+        device->profile_info.threads = 4;
+
+    double counter = (double) device->profile_info.threads / (double) device->profile_info.threads_per_chunk;
+    size_t allocated_mem_for_current_chunk = 0;
+
+    if (counter > 0) {
+        if (counter > 1) {
+            allocated_mem_for_current_chunk = chunk_size;
+        } else {
+            allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
+        }
+        counter -= 1;
+    } else {
+        allocated_mem_for_current_chunk = 1;
+    }
+    device->arguments.memory_chunk_0 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
+                                                      allocated_mem_for_current_chunk, NULL, &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    if (counter > 0) {
+        if (counter > 1) {
+            allocated_mem_for_current_chunk = chunk_size;
+        } else {
+            allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
+        }
+        counter -= 1;
+    } else {
+        allocated_mem_for_current_chunk = 1;
+    }
+    device->arguments.memory_chunk_1 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
+                                                      allocated_mem_for_current_chunk, NULL, &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    if (counter > 0) {
+        if (counter > 1) {
+            allocated_mem_for_current_chunk = chunk_size;
+        } else {
+            allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
+        }
+        counter -= 1;
+    } else {
+        allocated_mem_for_current_chunk = 1;
+    }
+    device->arguments.memory_chunk_2 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
+                                                      allocated_mem_for_current_chunk, NULL, &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    if (counter > 0) {
+        if (counter > 1) {
+            allocated_mem_for_current_chunk = chunk_size;
+        } else {
+            allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
+        }
+        counter -= 1;
+    } else {
+        allocated_mem_for_current_chunk = 1;
+    }
+    device->arguments.memory_chunk_3 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
+                                                      allocated_mem_for_current_chunk, NULL, &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    if (counter > 0) {
+        if (counter > 1) {
+            allocated_mem_for_current_chunk = chunk_size;
+        } else {
+            allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
+        }
+        counter -= 1;
+    } else {
+        allocated_mem_for_current_chunk = 1;
+    }
+    device->arguments.memory_chunk_4 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
+                                                      allocated_mem_for_current_chunk, NULL, &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    if (counter > 0) {
+        if (counter > 1) {
+            allocated_mem_for_current_chunk = chunk_size;
+        } else {
+            allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
+        }
+        counter -= 1;
+    } else {
+        allocated_mem_for_current_chunk = 1;
+    }
+    device->arguments.memory_chunk_5 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
+                                                      allocated_mem_for_current_chunk, NULL, &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    device->arguments.refs = clCreateBuffer(device->context, CL_MEM_READ_ONLY,
+                                            device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL,
+                                            &error);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    if (device->profile_info.profile->succesiveIdxs == 1) {
+        device->arguments.idxs = NULL;
+    }
+    else {
+        device->arguments.idxs = clCreateBuffer(device->context, CL_MEM_READ_ONLY,
+                                                device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL,
+                                                &error);
+        if (error != CL_SUCCESS) {
+            device->error = error;
+            device->error_message = "Error creating memory buffer.";
+            return false;
+        }
+    }
+
+    device->arguments.segments = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), NULL, &error);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    size_t preseed_memory_size = device->profile_info.profile->pwdLen * 4;
+    size_t seed_memory_size = device->profile_info.threads * (device->profile_info.profile->thrCost * 2) * ARGON2_BLOCK_SIZE;
+    size_t out_memory_size = device->profile_info.threads * ARGON2_BLOCK_SIZE;
+    size_t hash_memory_size = device->profile_info.threads * (xmrig::ARGON2_HASHLEN + 4);
+
+    device->arguments.preseed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    device->arguments.preseed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    device->arguments.seed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    device->arguments.seed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    device->arguments.out_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    device->arguments.out_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    device->arguments.hash_memory[0] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+    device->arguments.hash_memory[1] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error creating memory buffer.";
+        return false;
+    }
+
+	//optimise address sizes
+    uint32_t *refs = (uint32_t *)malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t));
+    for(int i=0;i<device->profile_info.profile->blockRefsSize;i++) {
+        refs[i] = device->profile_info.profile->blockRefs[i*3 + 1];
+    }
+
+    error=clEnqueueWriteBuffer(device->queue, device->arguments.refs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), refs, 0, NULL, NULL);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error writing to gpu memory.";
+        return false;
+    }
+
+    free(refs);
+
+    if(device->profile_info.profile->succesiveIdxs == 0) {
+        uint32_t *idxs = (uint32_t *) malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t));
+        for (int i = 0; i < device->profile_info.profile->blockRefsSize; i++) {
+            idxs[i] = device->profile_info.profile->blockRefs[i * 3];
+            if (device->profile_info.profile->blockRefs[i * 3 + 2] == 1) {
+                idxs[i] |= 0x80000000;
+            }
+        }
+
+        error=clEnqueueWriteBuffer(device->queue, device->arguments.idxs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), idxs, 0, NULL, NULL);
+        if(error != CL_SUCCESS) {
+            device->error = error;
+            device->error_message = "Error writing to gpu memory.";
+            return false;
+        }
+
+        free(idxs);
+    }
+
+    error=clEnqueueWriteBuffer(device->queue, device->arguments.segments, CL_TRUE, 0, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), device->profile_info.profile->segments, 0, NULL, NULL);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error writing to gpu memory.";
+        return false;
+    }
+
+	clSetKernelArg(device->kernel_fill_blocks, 0, sizeof(device->arguments.memory_chunk_0), &device->arguments.memory_chunk_0);
+	clSetKernelArg(device->kernel_fill_blocks, 1, sizeof(device->arguments.memory_chunk_1), &device->arguments.memory_chunk_1);
+	clSetKernelArg(device->kernel_fill_blocks, 2, sizeof(device->arguments.memory_chunk_2), &device->arguments.memory_chunk_2);
+	clSetKernelArg(device->kernel_fill_blocks, 3, sizeof(device->arguments.memory_chunk_3), &device->arguments.memory_chunk_3);
+	clSetKernelArg(device->kernel_fill_blocks, 4, sizeof(device->arguments.memory_chunk_4), &device->arguments.memory_chunk_4);
+	clSetKernelArg(device->kernel_fill_blocks, 5, sizeof(device->arguments.memory_chunk_5), &device->arguments.memory_chunk_5);
+    clSetKernelArg(device->kernel_fill_blocks, 8, sizeof(device->arguments.refs), &device->arguments.refs);
+    if(device->profile_info.profile->succesiveIdxs == 0)
+        clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(device->arguments.idxs), &device->arguments.idxs);
+    else
+        clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(cl_mem), NULL);
+	clSetKernelArg(device->kernel_fill_blocks, 10, sizeof(device->arguments.segments), &device->arguments.segments);
+    clSetKernelArg(device->kernel_fill_blocks, 11, sizeof(int32_t), &device->profile_info.profile->memSize);
+    clSetKernelArg(device->kernel_fill_blocks, 12, sizeof(int32_t), &device->profile_info.profile->thrCost);
+    clSetKernelArg(device->kernel_fill_blocks, 13, sizeof(int32_t), &device->profile_info.profile->segSize);
+    clSetKernelArg(device->kernel_fill_blocks, 14, sizeof(int32_t), &device->profile_info.profile->segCount);
+    clSetKernelArg(device->kernel_fill_blocks, 15, sizeof(int32_t), &device->profile_info.threads_per_chunk);
+
+    clSetKernelArg(device->kernel_prehash, 2, sizeof(int32_t), &device->profile_info.profile->memCost);
+    clSetKernelArg(device->kernel_prehash, 3, sizeof(int32_t), &device->profile_info.profile->thrCost);
+    int passes = device->profile_info.profile->segCount / (4 * device->profile_info.profile->thrCost);
+    clSetKernelArg(device->kernel_prehash, 4, sizeof(int32_t), &passes);
+    clSetKernelArg(device->kernel_prehash, 6, sizeof(int32_t), &device->profile_info.profile->saltLen);
+
+    return true;
+}
+
+bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
+    opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data;
+    opencl_device_info *device = gpumgmt_thread->device;
+
+    cl_int error;
+
+    int sessions = max(profile->thrCost * 2, (uint32_t)16);
+    double hashes_per_block = sessions / (profile->thrCost * 2.0);
+
+    size_t total_work_items = sessions * 4 * ceil(threads / hashes_per_block);
+    size_t local_work_items = sessions * 4;
+
+    device->device_lock.lock();
+
+    error = clEnqueueWriteBuffer(device->queue, device->arguments.preseed_memory[gpumgmt_thread->thread_id],
+                                 CL_FALSE, 0, gpumgmt_thread->hashData.inSize, memory, 0, NULL, NULL);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error writing to gpu memory.";
+        device->device_lock.unlock();
+        return false;
+    }
+
+    int inSizeInInt = gpumgmt_thread->hashData.inSize / 4;
+    clSetKernelArg(device->kernel_prehash, 0, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]);
+    clSetKernelArg(device->kernel_prehash, 1, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]);
+    clSetKernelArg(device->kernel_prehash, 5, sizeof(int), &inSizeInInt);
+    clSetKernelArg(device->kernel_prehash, 7, sizeof(int), &threads);
+    clSetKernelArg(device->kernel_prehash, 8, sessions * sizeof(cl_ulong) * 76, NULL); // (preseed size is 16 ulongs = 128 bytes)
+
+    error=clEnqueueNDRangeKernel(device->queue, device->kernel_prehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error running the kernel.";
+        device->device_lock.unlock();
+        return false;
+    }
+
+    return true;
+}
+
+void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
+	opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data;
+    opencl_device_info *device = gpumgmt_thread->device;
+
+    cl_int error;
+
+	size_t total_work_items = threads * KERNEL_WORKGROUP_SIZE * profile->thrCost;
+	size_t local_work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost;
+
+    size_t shared_mem = profile->thrCost * ARGON2_QWORDS_IN_BLOCK;
+
+    clSetKernelArg(device->kernel_fill_blocks, 6, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]);
+    clSetKernelArg(device->kernel_fill_blocks, 7, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]);
+    clSetKernelArg(device->kernel_fill_blocks, 16, sizeof(cl_ulong) * shared_mem, NULL);
+
+    error=clEnqueueNDRangeKernel(device->queue, device->kernel_fill_blocks, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error running the kernel.";
+        device->device_lock.unlock();
+        return NULL;
+    }
+
+	return (void *)1;
+}
+
+bool opencl_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
+    opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data;
+    opencl_device_info *device = gpumgmt_thread->device;
+
+    cl_int error;
+
+    size_t total_work_items = threads * 4;
+    size_t local_work_items = 4;
+
+    clSetKernelArg(device->kernel_posthash, 0, sizeof(device->arguments.hash_memory[gpumgmt_thread->thread_id]), &device->arguments.hash_memory[gpumgmt_thread->thread_id]);
+    clSetKernelArg(device->kernel_posthash, 1, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]);
+    clSetKernelArg(device->kernel_posthash, 2, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]);
+    clSetKernelArg(device->kernel_posthash, 3, sizeof(cl_ulong) * 60, NULL);
+
+    error=clEnqueueNDRangeKernel(device->queue, device->kernel_posthash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error running the kernel.";
+        device->device_lock.unlock();
+        return false;
+    }
+
+    error = clEnqueueReadBuffer(device->queue, device->arguments.hash_memory[gpumgmt_thread->thread_id], CL_FALSE, 0, threads * (xmrig::ARGON2_HASHLEN + 4), memory, 0, NULL, NULL);
+    if (error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error reading gpu memory.";
+        device->device_lock.unlock();
+        return false;
+    }
+
+    error=clFinish(device->queue);
+    if(error != CL_SUCCESS) {
+        device->error = error;
+        device->error_message = "Error flushing GPU queue.";
+        device->device_lock.unlock();
+        return false;
+    }
+
+    device->device_lock.unlock();
+
+    return true;
+}
+
+void opencl_hasher::buildThreadData() {
+    __thread_data = new opencl_gpumgmt_thread_data[__enabledDevices.size() * 2];
+
+    for(int i=0; i < __enabledDevices.size(); i++) {
+        opencl_device_info *device = __enabledDevices[i];
+        for(int threadId = 0; threadId < 2; threadId ++) {
+            opencl_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId];
+            thread_data.device = device;
+            thread_data.thread_id = threadId;
+            thread_data.argon2 = new Argon2(opencl_kernel_prehasher, opencl_kernel_filler, opencl_kernel_posthasher,
+                                            nullptr, &thread_data);
+            thread_data.argon2->setThreads(device->profile_info.threads);
+            thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4;
+        }
+    }
+}
+
+int opencl_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
+    opencl_gpumgmt_thread_data &threadData = __thread_data[threadIdx];
+    threadData.hashData.input = input;
+    threadData.hashData.inSize = size;
+    threadData.hashData.output = output;
+    int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData);
+    if(threadData.device->error != CL_SUCCESS) {
+        LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message);
+        return 0;
+    }
+
+    uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39));
+    (*nonce) += threadData.device->profile_info.threads;
+
+    return hashCount;
+}
+
+void opencl_hasher::cleanup() {
+    vector<cl_platform_id> platforms;
+
+    for(vector<opencl_device_info *>::iterator it=__devices.begin(); it != __devices.end(); it++) {
+		if ((*it)->profile_info.threads != 0) {
+			clReleaseMemObject((*it)->arguments.memory_chunk_0);
+			clReleaseMemObject((*it)->arguments.memory_chunk_1);
+			clReleaseMemObject((*it)->arguments.memory_chunk_2);
+			clReleaseMemObject((*it)->arguments.memory_chunk_3);
+			clReleaseMemObject((*it)->arguments.memory_chunk_4);
+			clReleaseMemObject((*it)->arguments.memory_chunk_5);
+			clReleaseMemObject((*it)->arguments.refs);
+			clReleaseMemObject((*it)->arguments.segments);
+            clReleaseMemObject((*it)->arguments.preseed_memory[0]);
+            clReleaseMemObject((*it)->arguments.preseed_memory[1]);
+            clReleaseMemObject((*it)->arguments.seed_memory[0]);
+            clReleaseMemObject((*it)->arguments.seed_memory[1]);
+            clReleaseMemObject((*it)->arguments.out_memory[0]);
+            clReleaseMemObject((*it)->arguments.out_memory[1]);
+            clReleaseMemObject((*it)->arguments.hash_memory[0]);
+            clReleaseMemObject((*it)->arguments.hash_memory[1]);
+
+            clReleaseKernel((*it)->kernel_prehash);
+            clReleaseKernel((*it)->kernel_fill_blocks);
+            clReleaseKernel((*it)->kernel_posthash);
+			clReleaseProgram((*it)->program);
+			clReleaseCommandQueue((*it)->queue);
+			clReleaseContext((*it)->context);
+		}
+        clReleaseDevice((*it)->device);
+        delete (*it);
+	}
+    __devices.clear();
+}
+
+size_t opencl_hasher::parallelism(int workerIdx) {
+    // there are 2 computing threads per device, so divide by 2 to get device index
+    workerIdx /= 2;
+
+    if(workerIdx < 0 || workerIdx > __enabledDevices.size())
+        return 0;
+
+    return __enabledDevices[workerIdx]->profile_info.threads;
+}
+
+size_t opencl_hasher::deviceCount() {
+    return __enabledDevices.size();
+}
+
+REGISTER_HASHER(opencl_hasher);
+
+#endif // WITH_OPENCL
diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h
new file mode 100755
index 00000000..ece7c971
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h
@@ -0,0 +1,110 @@
+//
+// Created by Haifa Bogdan Adnan on 03/08/2018.
+//
+
+#ifndef ARGON2_OPENCL_HASHER_H
+#define ARGON2_OPENCL_HASHER_H
+
+#if defined(WITH_OPENCL)
+
+#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
+
+#if defined(__APPLE__) || defined(__MACOSX)
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif // !__APPLE__
+
+struct opencl_kernel_arguments {
+    cl_mem memory_chunk_0;
+    cl_mem memory_chunk_1;
+    cl_mem memory_chunk_2;
+    cl_mem memory_chunk_3;
+    cl_mem memory_chunk_4;
+    cl_mem memory_chunk_5;
+    cl_mem refs;
+    cl_mem idxs;
+    cl_mem segments;
+    cl_mem preseed_memory[2];
+    cl_mem seed_memory[2];
+    cl_mem out_memory[2];
+    cl_mem hash_memory[2];
+};
+
+struct argon2profile_info {
+    argon2profile_info() {
+        threads = 0;
+        threads_per_chunk = 0;
+    }
+
+    uint32_t threads;
+    uint32_t threads_per_chunk;
+    Argon2Profile *profile;
+};
+
+struct opencl_device_info {
+    opencl_device_info(cl_int err, const string &err_msg) {
+        error = err;
+        error_message = err_msg;
+    }
+
+    cl_platform_id platform;
+    cl_device_id device;
+    cl_context context;
+    cl_command_queue queue;
+
+    cl_program program;
+    cl_kernel kernel_prehash;
+    cl_kernel kernel_fill_blocks;
+    cl_kernel kernel_posthash;
+
+    int device_index;
+
+    opencl_kernel_arguments arguments;
+    argon2profile_info profile_info;
+
+    string device_string;
+    uint64_t max_mem_size;
+    uint64_t max_allocable_mem_size;
+
+    cl_int error;
+    string error_message;
+
+    mutex device_lock;
+};
+
+struct opencl_gpumgmt_thread_data {
+    int thread_id;
+    opencl_device_info *device;
+    Argon2 *argon2;
+    HashData hashData;
+};
+
+class opencl_hasher : public Hasher {
+public:
+    opencl_hasher();
+    ~opencl_hasher();
+
+    virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
+    virtual bool configure(xmrig::HasherConfig &config);
+    virtual void cleanup();
+    virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
+    virtual size_t parallelism(int workerIdx);
+    virtual size_t deviceCount();
+
+private:
+    opencl_device_info *__get_device_info(cl_platform_id platform, cl_device_id device);
+    bool __setup_device_info(opencl_device_info *device, double intensity);
+    vector<opencl_device_info*> __query_opencl_devices(cl_int &error, string &error_message);
+    void buildThreadData();
+
+    vector<opencl_device_info*> __devices;
+    vector<opencl_device_info*> __enabledDevices;
+    opencl_gpumgmt_thread_data *__thread_data;
+
+    Argon2Profile *m_profile;
+};
+
+#endif //WITH_OPENCL
+
+#endif //ARGON2_OPENCL_HASHER_H
diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp
new file mode 100644
index 00000000..b65539bc
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp
@@ -0,0 +1,1085 @@
+//
+// Created by Haifa Bogdan Adnan on 06/08/2018.
+//
+
+#include "../../../common/common.h"
+
+#include "OpenCLKernel.h"
+
+string OpenCLKernel = R"OCL(
+#define THREADS_PER_LANE               32
+#define BLOCK_SIZE_ULONG                128
+#define BLOCK_SIZE_UINT                 256
+#define ARGON2_PREHASH_DIGEST_LENGTH_UINT   16
+#define ARGON2_PREHASH_SEED_LENGTH_UINT     18
+
+#define ARGON2_BLOCK_SIZE 1024
+#define ARGON2_DWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 4)
+
+#define BLAKE_SHARED_MEM_ULONG       76
+
+#define ARGON2_RAW_LENGTH           8
+
+#define ARGON2_TYPE_VALUE               2
+#define ARGON2_VERSION                  0x13
+
+#define BLOCK_BYTES	32
+#define OUT_BYTES	16
+
+#define G(m, r, i, a, b, c, d) \
+do { \
+	a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
+	d = rotr64(d ^ a, 32); \
+	c = c + d; \
+	b = rotr64(b ^ c, 24); \
+	a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
+	d = rotr64(d ^ a, 16); \
+	c = c + d; \
+	b = rotr64(b ^ c, 63); \
+} while ((void)0, 0)
+
+#define G_S(m, a, b, c, d) \
+do { \
+	a = a + b + m; \
+	d = rotr64(d ^ a, 32); \
+	c = c + d; \
+	b = rotr64(b ^ c, 24); \
+	a = a + b + m; \
+	d = rotr64(d ^ a, 16); \
+	c = c + d; \
+	b = rotr64(b ^ c, 63); \
+} while ((void)0, 0)
+
+#define ROUND(m, t, r, shfl) \
+do { \
+	G(m, r, t, v0, v1, v2, v3); \
+    shfl[t + 4] = v1; \
+    shfl[t + 8] = v2; \
+    shfl[t + 12] = v3; \
+    barrier(CLK_LOCAL_MEM_FENCE); \
+    v1 = shfl[((t + 1) % 4)+ 4]; \
+    v2 = shfl[((t + 2) % 4)+ 8]; \
+    v3 = shfl[((t + 3) % 4)+ 12]; \
+	G(m, r, (t + 4), v0, v1, v2, v3); \
+    shfl[((t + 1) % 4)+ 4] = v1; \
+    shfl[((t + 2) % 4)+ 8] = v2; \
+    shfl[((t + 3) % 4)+ 12] = v3; \
+    barrier(CLK_LOCAL_MEM_FENCE); \
+    v1 = shfl[t + 4]; \
+    v2 = shfl[t + 8]; \
+    v3 = shfl[t + 12]; \
+} while ((void)0, 0)
+
+#define ROUND_S(m, t, shfl) \
+do { \
+	G_S(m, v0, v1, v2, v3); \
+    shfl[t + 4] = v1; \
+    shfl[t + 8] = v2; \
+    shfl[t + 12] = v3; \
+    barrier(CLK_LOCAL_MEM_FENCE); \
+    v1 = shfl[((t + 1) % 4)+ 4]; \
+    v2 = shfl[((t + 2) % 4)+ 8]; \
+    v3 = shfl[((t + 3) % 4)+ 12]; \
+	G_S(m, v0, v1, v2, v3); \
+    shfl[((t + 1) % 4)+ 4] = v1; \
+    shfl[((t + 2) % 4)+ 8] = v2; \
+    shfl[((t + 3) % 4)+ 12] = v3; \
+    barrier(CLK_LOCAL_MEM_FENCE); \
+    v1 = shfl[t + 4]; \
+    v2 = shfl[t + 8]; \
+    v3 = shfl[t + 12]; \
+} while ((void)0, 0)
+
+ulong rotr64(ulong x, ulong n)
+{
+	return rotate(x, 64 - n);
+}
+
+__constant ulong blake2b_IV[8] = {
+        0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
+        0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
+        0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
+        0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
+};
+
+__constant uint blake2b_sigma[12][16] = {
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+        {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+        {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
+        {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
+        {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
+        {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
+        {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
+        {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
+        {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+};
+
+void blake2b_compress(__local ulong *h, __local ulong *m, ulong f0, __local ulong *shfl, int thr_id)
+{
+    ulong v0, v1, v2, v3;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    v0 = h[thr_id];
+    v1 = h[thr_id + 4];
+    v2 = blake2b_IV[thr_id];
+    v3 = blake2b_IV[thr_id + 4];
+
+    if(thr_id == 0) v3 ^= h[8];
+    if(thr_id == 1) v3 ^= h[9];
+    if(thr_id == 2) v3 ^= f0;
+
+    ROUND(m, thr_id, 0, shfl);
+    ROUND(m, thr_id, 1, shfl);
+    ROUND(m, thr_id, 2, shfl);
+    ROUND(m, thr_id, 3, shfl);
+    ROUND(m, thr_id, 4, shfl);
+    ROUND(m, thr_id, 5, shfl);
+    ROUND(m, thr_id, 6, shfl);
+    ROUND(m, thr_id, 7, shfl);
+    ROUND(m, thr_id, 8, shfl);
+    ROUND(m, thr_id, 9, shfl);
+    ROUND(m, thr_id, 10, shfl);
+    ROUND(m, thr_id, 11, shfl);
+
+    h[thr_id] ^= v0 ^ v2;
+    h[thr_id + 4] ^= v1 ^ v3;
+}
+
+void blake2b_compress_static(__local ulong *h, ulong m, ulong f0, __local ulong *shfl, int thr_id)
+{
+    ulong v0, v1, v2, v3;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    v0 = h[thr_id];
+    v1 = h[thr_id + 4];
+    v2 = blake2b_IV[thr_id];
+    v3 = blake2b_IV[thr_id + 4];
+
+    if(thr_id == 0) v3 ^= h[8];
+    if(thr_id == 1) v3 ^= h[9];
+    if(thr_id == 2) v3 ^= f0;
+
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+    ROUND_S(m, thr_id, shfl);
+
+    h[thr_id] ^= v0 ^ v2;
+    h[thr_id + 4] ^= v1 ^ v3;
+}
+
+void blake2b_incrementCounter(__local ulong *h, int inc)
+{
+    h[8] += (inc * 4);
+    h[9] += (h[8] < (inc * 4));
+}
+
+void blake2b_final_global(__global uint *out, int out_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id)
+{
+    int left = BLOCK_BYTES - buf_len;
+    __local uint *cursor_out_local = buf + buf_len;
+
+    for(int i=0; i < (left >> 2); i++, cursor_out_local += 4) {
+        cursor_out_local[thr_id] = 0;
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (left % 4); i++) {
+            cursor_out_local[i] = 0;
+        }
+        blake2b_incrementCounter(h, buf_len);
+    }
+
+    blake2b_compress(h, (__local ulong *)buf, 0xFFFFFFFFFFFFFFFF, shfl, thr_id);
+
+    __local uint *cursor_in = (__local uint *)h;
+    __global uint *cursor_out_global = out;
+
+    for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out_global += 4) {
+        cursor_out_global[thr_id] = cursor_in[thr_id];
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (out_len % 4); i++) {
+            cursor_out_global[i] = cursor_in[i];
+        }
+    }
+}
+
+void blake2b_final_local(__local uint *out, int out_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id)
+{
+    int left = BLOCK_BYTES - buf_len;
+    __local uint *cursor_out = buf + buf_len;
+
+    for(int i=0; i < (left >> 2); i++, cursor_out += 4) {
+        cursor_out[thr_id] = 0;
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (left % 4); i++) {
+            cursor_out[i] = 0;
+        }
+        blake2b_incrementCounter(h, buf_len);
+    }
+
+    blake2b_compress(h, (__local ulong *)buf, 0xFFFFFFFFFFFFFFFF, shfl, thr_id);
+
+    __local uint *cursor_in = (__local uint *)h;
+    cursor_out = out;
+
+    for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out += 4) {
+        cursor_out[thr_id] = cursor_in[thr_id];
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (out_len % 4); i++) {
+            cursor_out[i] = cursor_in[i];
+        }
+    }
+}
+
+int blake2b_update_global(__global uint *in, int in_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id)
+{
+    __global uint *cursor_in = in;
+    __local uint *cursor_out = buf + buf_len;
+
+    if (buf_len + in_len > BLOCK_BYTES) {
+        int left = BLOCK_BYTES - buf_len;
+
+        for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) {
+            cursor_out[thr_id] = cursor_in[thr_id];
+        }
+
+        if(thr_id == 0) {
+            for (int i = 0; i < (left % 4); i++) {
+                cursor_out[i] = cursor_in[i];
+            }
+            blake2b_incrementCounter(h, BLOCK_BYTES);
+        }
+
+        blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id);
+
+        buf_len = 0;
+
+        in_len -= left;
+        in += left;
+
+        while (in_len > BLOCK_BYTES) {
+            if(thr_id == 0)
+                blake2b_incrementCounter(h, BLOCK_BYTES);
+
+            cursor_in = in;
+            cursor_out = buf;
+
+            for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) {
+                cursor_out[thr_id] = cursor_in[thr_id];
+            }
+
+            blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id);
+
+            in_len -= BLOCK_BYTES;
+            in += BLOCK_BYTES;
+        }
+    }
+
+    cursor_in = in;
+    cursor_out = buf + buf_len;
+
+    for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) {
+        cursor_out[thr_id] = cursor_in[thr_id];
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (in_len % 4); i++) {
+            cursor_out[i] = cursor_in[i];
+        }
+    }
+
+    return buf_len + in_len;
+}
+
+int blake2b_update_static(uint in, int in_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id)
+{
+    ulong in64 = in;
+    in64 = in64 << 32;
+    in64 = in64 | in;
+
+    __local uint *cursor_out = buf + buf_len;
+
+    if (buf_len + in_len > BLOCK_BYTES) {
+        int left = BLOCK_BYTES - buf_len;
+
+        for(int i=0; i < (left >> 2); i++, cursor_out += 4) {
+            cursor_out[thr_id] = in;
+        }
+
+        if(thr_id == 0) {
+            for (int i = 0; i < (left % 4); i++) {
+                cursor_out[i] = in;
+            }
+            blake2b_incrementCounter(h, BLOCK_BYTES);
+        }
+
+        blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id);
+
+        buf_len = 0;
+
+        in_len -= left;
+
+        while (in_len > BLOCK_BYTES) {
+            if(thr_id == 0)
+                blake2b_incrementCounter(h, BLOCK_BYTES);
+
+            blake2b_compress_static(h, in64, 0, shfl, thr_id);
+
+            in_len -= BLOCK_BYTES;
+        }
+    }
+
+    cursor_out = buf + buf_len;
+
+    for(int i=0; i < (in_len >> 2); i++, cursor_out += 4) {
+        cursor_out[thr_id] = in;
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (in_len % 4); i++) {
+            cursor_out[i] = in;
+        }
+    }
+
+    return buf_len + in_len;
+}
+
+int blake2b_update_local(__local uint *in, int in_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id)
+{
+    __local uint *cursor_in = in;
+    __local uint *cursor_out = buf + buf_len;
+
+    if (buf_len + in_len > BLOCK_BYTES) {
+        int left = BLOCK_BYTES - buf_len;
+
+        for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) {
+            cursor_out[thr_id] = cursor_in[thr_id];
+        }
+
+        if(thr_id == 0) {
+            for (int i = 0; i < (left % 4); i++) {
+                cursor_out[i] = cursor_in[i];
+            }
+            blake2b_incrementCounter(h, BLOCK_BYTES);
+        }
+
+        blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id);
+
+        buf_len = 0;
+
+        in_len -= left;
+        in += left;
+
+        while (in_len > BLOCK_BYTES) {
+            if(thr_id == 0)
+                blake2b_incrementCounter(h, BLOCK_BYTES);
+
+            cursor_in = in;
+            cursor_out = buf;
+
+            for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) {
+                cursor_out[thr_id] = cursor_in[thr_id];
+            }
+
+            blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id);
+
+            in_len -= BLOCK_BYTES;
+            in += BLOCK_BYTES;
+        }
+    }
+
+    cursor_in = in;
+    cursor_out = buf + buf_len;
+
+    for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) {
+        cursor_out[thr_id] = cursor_in[thr_id];
+    }
+
+    if(thr_id == 0) {
+        for (int i = 0; i < (in_len % 4); i++) {
+            cursor_out[i] = cursor_in[i];
+        }
+    }
+
+    return buf_len + in_len;
+}
+
+int blake2b_init(__local ulong *h, int out_len, int thr_id)
+{
+    h[thr_id * 2] = blake2b_IV[thr_id * 2];
+    h[thr_id * 2 + 1] = blake2b_IV[thr_id * 2 + 1];
+
+    if(thr_id == 0) {
+        h[8] = h[9] = 0;
+        h[0] = 0x6A09E667F3BCC908 ^ ((out_len * 4) | (1 << 16) | (1 << 24));
+    }
+
+    return 0;
+}
+
+void blake2b_digestLong_global(__global uint *out, int out_len,
+                       __global uint *in, int in_len,
+                       int thr_id, __local ulong* shared)
+{
+    __local ulong *h = shared;
+	__local ulong *shfl = &h[10];
+    __local uint *buf = (__local uint *)&shfl[16];
+    __local uint *out_buffer = &buf[32];
+    int buf_len;
+
+    if(thr_id == 0) buf[0] = (out_len * 4);
+    buf_len = 1;
+
+    if (out_len <= OUT_BYTES) {
+        blake2b_init(h, out_len, thr_id);
+        buf_len = blake2b_update_global(in, in_len, h, buf, buf_len, shfl, thr_id);
+        blake2b_final_global(out, out_len, h, buf, buf_len, shfl, thr_id);
+    } else {
+        __local uint *cursor_in = out_buffer;
+        __global uint *cursor_out = out;
+
+        blake2b_init(h, OUT_BYTES, thr_id);
+        buf_len = blake2b_update_global(in, in_len, h, buf, buf_len, shfl, thr_id);
+        blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id);
+
+        for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) {
+            cursor_out[thr_id] = cursor_in[thr_id];
+        }
+
+        out += OUT_BYTES / 2;
+
+        int to_produce = out_len - OUT_BYTES / 2;
+        while (to_produce > OUT_BYTES) {
+            buf_len = blake2b_init(h, OUT_BYTES, thr_id);
+            buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id);
+            blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id);
+
+            cursor_out = out;
+            cursor_in = out_buffer;
+            for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) {
+                cursor_out[thr_id] = cursor_in[thr_id];
+            }
+
+            out += OUT_BYTES / 2;
+            to_produce -= OUT_BYTES / 2;
+        }
+
+        buf_len = blake2b_init(h, to_produce, thr_id);
+        buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id);
+        blake2b_final_global(out, to_produce, h, buf, buf_len, shfl, thr_id);
+    }
+}
+
+void blake2b_digestLong_local(__global uint *out, int out_len,
+                        __local uint *in, int in_len,
+                        int thr_id, __local ulong* shared)
+{
+    __local ulong *h = shared;
+    __local ulong *shfl = &h[10];
+    __local uint *buf = (__local uint *)&shfl[16];
+    __local uint *out_buffer = &buf[32];
+    int buf_len;
+
+    if(thr_id == 0) buf[0] = (out_len * 4);
+    buf_len = 1;
+
+    if (out_len <= OUT_BYTES) {
+        blake2b_init(h, out_len, thr_id);
+        buf_len = blake2b_update_local(in, in_len, h, buf, buf_len, shfl, thr_id);
+        blake2b_final_global(out, out_len, h, buf, buf_len, shfl, thr_id);
+    } else {
+        __local uint *cursor_in = out_buffer;
+        __global uint *cursor_out = out;
+
+        blake2b_init(h, OUT_BYTES, thr_id);
+        buf_len = blake2b_update_local(in, in_len, h, buf, buf_len, shfl, thr_id);
+        blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id);
+
+        for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) {
+            cursor_out[thr_id] = cursor_in[thr_id];
+        }
+
+        out += OUT_BYTES / 2;
+
+        int to_produce = out_len - OUT_BYTES / 2;
+        while (to_produce > OUT_BYTES) {
+            buf_len = blake2b_init(h, OUT_BYTES, thr_id);
+            buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id);
+            blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id);
+
+            cursor_out = out;
+            cursor_in = out_buffer;
+            for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) {
+                cursor_out[thr_id] = cursor_in[thr_id];
+            }
+
+            out += OUT_BYTES / 2;
+            to_produce -= OUT_BYTES / 2;
+        }
+
+        buf_len = blake2b_init(h, to_produce, thr_id);
+        buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id);
+        blake2b_final_global(out, to_produce, h, buf, buf_len, shfl, thr_id);
+    }
+}
+
+#define fBlaMka(x, y) ((x) + (y) + 2 * upsample(mul_hi((uint)(x), (uint)(y)), (uint)(x) * (uint)y))
+
+#define COMPUTE \
+    a = fBlaMka(a, b);          \
+    d = rotate(d ^ a, (ulong)32);      \
+    c = fBlaMka(c, d);          \
+    b = rotate(b ^ c, (ulong)40);      \
+    a = fBlaMka(a, b);          \
+    d = rotate(d ^ a, (ulong)48);      \
+    c = fBlaMka(c, d);          \
+    b = rotate(b ^ c, (ulong)1);
+
+__constant char offsets_round_1[32][4] = {
+        { 0, 4, 8, 12 },
+        { 1, 5, 9, 13 },
+        { 2, 6, 10, 14 },
+        { 3, 7, 11, 15 },
+        { 16, 20, 24, 28 },
+        { 17, 21, 25, 29 },
+        { 18, 22, 26, 30 },
+        { 19, 23, 27, 31 },
+        { 32, 36, 40, 44 },
+        { 33, 37, 41, 45 },
+        { 34, 38, 42, 46 },
+        { 35, 39, 43, 47 },
+        { 48, 52, 56, 60 },
+        { 49, 53, 57, 61 },
+        { 50, 54, 58, 62 },
+        { 51, 55, 59, 63 },
+        { 64, 68, 72, 76 },
+        { 65, 69, 73, 77 },
+        { 66, 70, 74, 78 },
+        { 67, 71, 75, 79 },
+        { 80, 84, 88, 92 },
+        { 81, 85, 89, 93 },
+        { 82, 86, 90, 94 },
+        { 83, 87, 91, 95 },
+        { 96, 100, 104, 108 },
+        { 97, 101, 105, 109 },
+        { 98, 102, 106, 110 },
+        { 99, 103, 107, 111 },
+        { 112, 116, 120, 124 },
+        { 113, 117, 121, 125 },
+        { 114, 118, 122, 126 },
+        { 115, 119, 123, 127 },
+};
+
+__constant char offsets_round_2[32][4] = {
+        { 0, 5, 10, 15 },
+        { 1, 6, 11, 12 },
+        { 2, 7, 8, 13 },
+        { 3, 4, 9, 14 },
+        { 16, 21, 26, 31 },
+        { 17, 22, 27, 28 },
+        { 18, 23, 24, 29 },
+        { 19, 20, 25, 30 },
+        { 32, 37, 42, 47 },
+        { 33, 38, 43, 44 },
+        { 34, 39, 40, 45 },
+        { 35, 36, 41, 46 },
+        { 48, 53, 58, 63 },
+        { 49, 54, 59, 60 },
+        { 50, 55, 56, 61 },
+        { 51, 52, 57, 62 },
+        { 64, 69, 74, 79 },
+        { 65, 70, 75, 76 },
+        { 66, 71, 72, 77 },
+        { 67, 68, 73, 78 },
+        { 80, 85, 90, 95 },
+        { 81, 86, 91, 92 },
+        { 82, 87, 88, 93 },
+        { 83, 84, 89, 94 },
+        { 96, 101, 106, 111 },
+        { 97, 102, 107, 108 },
+        { 98, 103, 104, 109 },
+        { 99, 100, 105, 110 },
+        { 112, 117, 122, 127 },
+        { 113, 118, 123, 124 },
+        { 114, 119, 120, 125 },
+        { 115, 116, 121, 126 },
+};
+
+__constant char offsets_round_3[32][4] = {
+        { 0, 32, 64, 96 },
+        { 1, 33, 65, 97 },
+        { 16, 48, 80, 112 },
+        { 17, 49, 81, 113 },
+        { 2, 34, 66, 98 },
+        { 3, 35, 67, 99 },
+        { 18, 50, 82, 114 },
+        { 19, 51, 83, 115 },
+        { 4, 36, 68, 100 },
+        { 5, 37, 69, 101 },
+        { 20, 52, 84, 116 },
+        { 21, 53, 85, 117 },
+        { 6, 38, 70, 102 },
+        { 7, 39, 71, 103 },
+        { 22, 54, 86, 118 },
+        { 23, 55, 87, 119 },
+        { 8, 40, 72, 104 },
+        { 9, 41, 73, 105 },
+        { 24, 56, 88, 120 },
+        { 25, 57, 89, 121 },
+        { 10, 42, 74, 106 },
+        { 11, 43, 75, 107 },
+        { 26, 58, 90, 122 },
+        { 27, 59, 91, 123 },
+        { 12, 44, 76, 108 },
+        { 13, 45, 77, 109 },
+        { 28, 60, 92, 124 },
+        { 29, 61, 93, 125 },
+        { 14, 46, 78, 110 },
+        { 15, 47, 79, 111 },
+        { 30, 62, 94, 126 },
+        { 31, 63, 95, 127 },
+};
+
+__constant char offsets_round_4[32][4] = {
+        { 0, 33, 80, 113 },
+        { 1, 48, 81, 96 },
+        { 16, 49, 64, 97 },
+        { 17, 32, 65, 112 },
+        { 2, 35, 82, 115 },
+        { 3, 50, 83, 98 },
+        { 18, 51, 66, 99 },
+        { 19, 34, 67, 114 },
+        { 4, 37, 84, 117 },
+        { 5, 52, 85, 100 },
+        { 20, 53, 68, 101 },
+        { 21, 36, 69, 116 },
+        { 6, 39, 86, 119 },
+        { 7, 54, 87, 102 },
+        { 22, 55, 70, 103 },
+        { 23, 38, 71, 118 },
+        { 8, 41, 88, 121 },
+        { 9, 56, 89, 104 },
+        { 24, 57, 72, 105 },
+        { 25, 40, 73, 120 },
+        { 10, 43, 90, 123 },
+        { 11, 58, 91, 106 },
+        { 26, 59, 74, 107 },
+        { 27, 42, 75, 122 },
+        { 12, 45, 92, 125 },
+        { 13, 60, 93, 108 },
+        { 28, 61, 76, 109 },
+        { 29, 44, 77, 124 },
+        { 14, 47, 94, 127 },
+        { 15, 62, 95, 110 },
+        { 30, 63, 78, 111 },
+        { 31, 46, 79, 126 },
+};
+
+#define G1(data) \
+{ \
+	barrier(CLK_LOCAL_MEM_FENCE); \
+	a = data[i1_0]; \
+	b = data[i1_1]; \
+	c = data[i1_2]; \
+	d = data[i1_3]; \
+	COMPUTE \
+	data[i1_1] = b; \
+    data[i1_2] = c; \
+    data[i1_3] = d; \
+    barrier(CLK_LOCAL_MEM_FENCE); \
+}
+
+#define G2(data) \
+{ \
+	b = data[i2_1]; \
+	c = data[i2_2]; \
+	d = data[i2_3]; \
+	COMPUTE \
+	data[i2_0] = a; \
+	data[i2_1] = b; \
+    data[i2_2] = c; \
+    data[i2_3] = d; \
+    barrier(CLK_LOCAL_MEM_FENCE); \
+}
+
+#define G3(data) \
+{ \
+	a = data[i3_0]; \
+	b = data[i3_1]; \
+	c = data[i3_2]; \
+	d = data[i3_3]; \
+	COMPUTE \
+	data[i3_1] = b; \
+    data[i3_2] = c; \
+    data[i3_3] = d; \
+    barrier(CLK_LOCAL_MEM_FENCE); \
+}
+
+#define G4(data) \
+{ \
+	b = data[i4_1]; \
+	c = data[i4_2]; \
+	d = data[i4_3]; \
+	COMPUTE \
+	data[i4_0] = a; \
+	data[i4_1] = b; \
+    data[i4_2] = c; \
+    data[i4_3] = d; \
+    barrier(CLK_LOCAL_MEM_FENCE); \
+}
+
+__kernel void fill_blocks(__global ulong *chunk_0,
+						__global ulong *chunk_1,
+						__global ulong *chunk_2,
+						__global ulong *chunk_3,
+						__global ulong *chunk_4,
+						__global ulong *chunk_5,
+						__global ulong *seed,
+						__global ulong *out,
+						__global uint *refs,
+						__global uint *idxs,
+						__global uint *segments,
+                        int memsize,
+                        int lanes,
+                        int seg_length,
+                        int seg_count,
+						int threads_per_chunk,
+                        __local ulong *scratchpad) { // lanes * BLOCK_SIZE_ULONG
+    ulong4 tmp;
+	ulong a, b, c, d;
+
+	int hash = get_group_id(0);
+	int local_id = get_local_id(0);
+
+	int id = local_id % THREADS_PER_LANE;
+	int lane = local_id / THREADS_PER_LANE;
+	int lane_length = seg_length * 4;
+
+	ulong chunks[6];
+	chunks[0] = (ulong)chunk_0;
+	chunks[1] = (ulong)chunk_1;
+	chunks[2] = (ulong)chunk_2;
+	chunks[3] = (ulong)chunk_3;
+	chunks[4] = (ulong)chunk_4;
+	chunks[5] = (ulong)chunk_5;
+	int chunk_index = hash / threads_per_chunk;
+	int chunk_offset = hash - chunk_index * threads_per_chunk;
+	__global ulong *memory = (__global ulong *)chunks[chunk_index] + chunk_offset * (memsize / 8);
+
+	int i1_0 = offsets_round_1[id][0];
+	int i1_1 = offsets_round_1[id][1];
+	int i1_2 = offsets_round_1[id][2];
+	int i1_3 = offsets_round_1[id][3];
+
+	int i2_0 = offsets_round_2[id][0];
+	int i2_1 = offsets_round_2[id][1];
+	int i2_2 = offsets_round_2[id][2];
+	int i2_3 = offsets_round_2[id][3];
+
+	int i3_0 = offsets_round_3[id][0];
+	int i3_1 = offsets_round_3[id][1];
+	int i3_2 = offsets_round_3[id][2];
+	int i3_3 = offsets_round_3[id][3];
+
+	int i4_0 = offsets_round_4[id][0];
+	int i4_1 = offsets_round_4[id][1];
+	int i4_2 = offsets_round_4[id][2];
+	int i4_3 = offsets_round_4[id][3];
+
+	__global ulong *out_mem = out + hash * BLOCK_SIZE_ULONG;
+	__global ulong *seed_mem = seed + hash * lanes * 2 * BLOCK_SIZE_ULONG + lane * 2 * BLOCK_SIZE_ULONG;
+
+	__global ulong *seed_dst = memory + lane * lane_length * BLOCK_SIZE_ULONG;
+
+	vstore4(vload4(id, seed_mem), id, seed_dst);
+
+	seed_mem += BLOCK_SIZE_ULONG;
+	seed_dst += BLOCK_SIZE_ULONG;
+
+	vstore4(vload4(id, seed_mem), id, seed_dst);
+
+	__global ulong *next_block;
+	__global ulong *prev_block;
+    __global uint *seg_refs;
+    __global uint *seg_idxs;
+
+	__local ulong *state = scratchpad + lane * BLOCK_SIZE_ULONG;
+
+	segments += (lane * 3);
+
+	for(int s=0; s < (seg_count / lanes); s++) {
+		int idx = ((s == 0) ? 2 : 0); // index for first slice in each lane is 2
+
+		int with_xor = ((s >= 4) ? 1 : 0);
+		int keep = 1;
+		int slice = s % 4;
+		int pass = s / 4;
+		__global int *cur_seg = &segments[s * lanes * 3];
+
+		int cur_idx = cur_seg[0];
+        int prev_idx = cur_seg[1];
+        int seg_type = cur_seg[2];
+        int ref_idx = 0;
+        ulong4 ref = 0, next = 0;
+
+		prev_block = memory + prev_idx * BLOCK_SIZE_ULONG;
+
+		tmp = vload4(id, prev_block);
+
+        if(seg_type == 0) {
+            seg_refs = refs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2);
+            ref_idx = seg_refs[0];
+
+            if(idxs != 0) {
+                seg_idxs = idxs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2);
+                cur_idx = seg_idxs[0];
+            }
+
+            ulong4 nextref = vload4(id, memory + ref_idx * BLOCK_SIZE_ULONG);
+
+            for (int i=0;idx < seg_length;i++, idx++) {
+    			next_block = memory + (cur_idx & 0x7FFFFFFF) * BLOCK_SIZE_ULONG;
+
+                if(with_xor == 1)
+                    next = vload4(id, next_block);
+
+                ref = nextref;
+
+                if (idx < seg_length - 1) {
+                    ref_idx = seg_refs[i + 1];
+
+                    if(idxs != 0) {
+                        keep = cur_idx & 0x80000000;
+                        cur_idx = seg_idxs[i + 1];
+                    }
+                    else
+                        cur_idx++;
+
+                    nextref = vload4(id, memory + ref_idx * BLOCK_SIZE_ULONG);
+                }
+
+                tmp ^= ref;
+
+                vstore4(tmp, id, state);
+
+                G1(state);
+                G2(state);
+                G3(state);
+                G4(state);
+
+                if(with_xor == 1)
+                    tmp ^= next;
+
+                tmp ^= vload4(id, state);
+
+                if(keep > 0) {
+                    vstore4(tmp, id, next_block);
+                    barrier(CLK_GLOBAL_MEM_FENCE);
+                }
+            }
+        }
+        else {
+            vstore4(tmp, id, state);
+            barrier(CLK_LOCAL_MEM_FENCE);
+
+            for (int i=0;idx < seg_length;i++, idx++, cur_idx++) {
+                ulong pseudo_rand = state[0];
+
+                ulong ref_lane = ((pseudo_rand >> 32)) % lanes; // thr_cost
+                uint reference_area_size = 0;
+
+				if(pass > 0) {
+					if (lane == ref_lane) {
+						reference_area_size = lane_length - seg_length + idx - 1;
+					} else {
+						reference_area_size = lane_length - seg_length + ((idx == 0) ? (-1) : 0);
+					}
+				}
+				else {
+					if (lane == ref_lane) {
+						reference_area_size = slice * seg_length + idx - 1; // seg_length
+					} else {
+						reference_area_size = slice * seg_length + ((idx == 0) ? (-1) : 0);
+					}
+				}
+
+                ulong relative_position = pseudo_rand & 0xFFFFFFFF;
+                relative_position = (relative_position * relative_position) >> 32;
+
+                relative_position = reference_area_size - 1 -
+                                    ((reference_area_size * relative_position) >> 32);
+
+				ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length;
+
+        		ref = vload4(id, memory + ref_idx * BLOCK_SIZE_ULONG);
+
+    			next_block = memory + cur_idx * BLOCK_SIZE_ULONG;
+
+                if(with_xor == 1)
+                    next = vload4(id, next_block);
+
+                tmp ^= ref;
+
+                vstore4(tmp, id, state);
+
+                G1(state);
+                G2(state);
+                G3(state);
+                G4(state);
+
+                if(with_xor == 1)
+                    tmp ^= next;
+
+                tmp ^= vload4(id, state);
+
+                vstore4(tmp, id, state);
+                vstore4(tmp, id, next_block);
+                barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
+            }
+        }
+    }
+
+    vstore4(tmp, id, state);
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+	if(lane == 0) { // first lane needs to acumulate results
+		for(int l=1; l<lanes; l++)
+            tmp ^= vload4(id, scratchpad + l * BLOCK_SIZE_ULONG);
+
+        vstore4(tmp, id, out_mem);
+	}
+};
+
+__kernel void prehash (
+        __global uint *preseed,
+        __global uint *seed,
+		int memsz,
+		int lanes,
+		int passes,
+		int pwdlen,
+		int saltlen,
+        int threads,
+        __local ulong *blake_shared) {
+	int seeds_batch_size = get_local_size(0) / 4; // number of seeds per block
+	int hash_batch_size = seeds_batch_size / (lanes * 2); // number of hashes per block
+
+	int id = get_local_id(0); // minimum 64 threads
+	int thr_id = id % 4; // thread id in session
+	int session = id / 4; // blake2b hashing session
+
+    int hash = get_group_id(0) * hash_batch_size;
+    int hash_idx = session / (lanes * 2);
+
+    hash += hash_idx;
+
+    if(hash < threads) {
+        int hash_session = session % (lanes * 2); // session in hash
+
+        int lane = hash_session / 2;  // 2 seeds per lane
+        int idx = hash_session % 2; // seed idx in lane
+
+        __local uint *local_mem = (__local uint *)&blake_shared[session * BLAKE_SHARED_MEM_ULONG];
+        __global uint *local_seed = seed + (hash * lanes * 2 + hash_session) * BLOCK_SIZE_UINT;
+
+        __local ulong *h = (__local ulong *)&local_mem[20];
+        __local ulong *shfl = &h[10];
+        __local uint *buf = (__local uint *)&shfl[16];
+        __local uint *value = &buf[32];
+        __local uint *local_preseed = &value[1];
+
+        __global uint *cursor_in = preseed;
+        __local uint *cursor_out = local_preseed;
+
+        for(int i=0; i < (pwdlen >> 2); i++, cursor_in += 4, cursor_out += 4) {
+            cursor_out[thr_id] = cursor_in[thr_id];
+        }
+
+        if(thr_id == 0) {
+            for (int i = 0; i < (pwdlen % 4); i++) {
+                cursor_out[i] = cursor_in[i];
+            }
+
+            uint nonce = (preseed[9] >> 24) | (preseed[10] << 8);
+            nonce += hash;
+            local_preseed[9] = (preseed[9] & 0x00FFFFFF) | (nonce << 24);
+            local_preseed[10] = (preseed[10] & 0xFF000000) | (nonce >> 8);
+        }
+
+        int buf_len = blake2b_init(h, ARGON2_PREHASH_DIGEST_LENGTH_UINT, thr_id);
+        *value = lanes; //lanes
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        *value = 32; //outlen
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        *value = memsz; //m_cost
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        *value = passes; //t_cost
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        *value = ARGON2_VERSION; //version
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        *value = ARGON2_TYPE_VALUE; //type
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        *value = pwdlen * 4; //pw_len
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        buf_len = blake2b_update_local(local_preseed, pwdlen, h, buf, buf_len, shfl, thr_id);
+        *value = saltlen * 4; //salt_len
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        buf_len = blake2b_update_local(local_preseed, saltlen, h, buf, buf_len, shfl, thr_id);
+        *value = 0; //secret_len
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        buf_len = blake2b_update_local(0, 0, h, buf, buf_len, shfl, thr_id);
+        *value = 0; //ad_len
+        buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id);
+        buf_len = blake2b_update_local(0, 0, h, buf, buf_len, shfl, thr_id);
+
+        blake2b_final_local(local_mem, ARGON2_PREHASH_DIGEST_LENGTH_UINT, h, buf, buf_len, shfl, thr_id);
+
+        if (thr_id == 0) {
+            local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT] = idx;
+            local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT + 1] = lane;
+        }
+
+        blake2b_digestLong_local(local_seed, ARGON2_DWORDS_IN_BLOCK, local_mem, ARGON2_PREHASH_SEED_LENGTH_UINT, thr_id, (__local ulong *)&local_mem[20]);
+    }
+}
+
+__kernel void posthash (
+        __global uint *hash,
+        __global uint *out,
+        __global uint *preseed,
+        __local ulong *blake_shared) {
+
+	int hash_id = get_group_id(0);
+	int thread = get_local_id(0);
+
+    __global uint *local_hash = hash + hash_id * (ARGON2_RAW_LENGTH + 1);
+    __global uint *local_out = out + hash_id * BLOCK_SIZE_UINT;
+
+    blake2b_digestLong_global(local_hash, ARGON2_RAW_LENGTH, local_out, ARGON2_DWORDS_IN_BLOCK, thread, blake_shared);
+    if(thread == 0) {
+        uint nonce = (preseed[9] >> 24) | (preseed[10] << 8);
+        nonce += hash_id;
+        local_hash[ARGON2_RAW_LENGTH] = nonce;
+    }
+}
+
+)OCL";
diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h
new file mode 100644
index 00000000..386659f8
--- /dev/null
+++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h
@@ -0,0 +1,10 @@
+//
+// Created by Haifa Bogdan Adnan on 06/08/2018.
+//
+
+#ifndef ARGON2_OPENCL_KERNEL_H
+#define ARGON2_OPENCL_KERNEL_H
+
+extern string OpenCLKernel;
+
+#endif //ARGON2_OPENCL_KERNEL_H
diff --git a/src/crypto/asm/CryptonightR_soft_aes_template.inc b/src/crypto/asm/CryptonightR_soft_aes_template.inc
deleted file mode 100644
index e9e1bb4f..00000000
--- a/src/crypto/asm/CryptonightR_soft_aes_template.inc
+++ /dev/null
@@ -1,281 +0,0 @@
-PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
-PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
-PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
-PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
-PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
-
-ALIGN(64)
-FN_PREFIX(CryptonightR_soft_aes_template_part1):
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+8], rcx
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 232
-
-	mov	eax, [rcx+96]
-	mov	ebx, [rcx+100]
-	mov	esi, [rcx+104]
-	mov	edx, [rcx+108]
-	mov [rsp+144], eax
-	mov [rsp+148], ebx
-	mov [rsp+152], esi
-	mov [rsp+156], edx
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r10, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r9, QWORD PTR [rcx+40]
-	xor	r9, QWORD PTR [rcx+8]
-	movq	xmm4, rax
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	r11, QWORD PTR [rcx+224]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r10+72]
-	mov	rax, QWORD PTR [r10+80]
-	movq	xmm0, rdx
-	xor	rax, QWORD PTR [r10+64]
-
-	movaps	XMMWORD PTR [rsp+16], xmm6
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+48], xmm8
-	movaps	XMMWORD PTR [rsp+64], xmm9
-	movaps	XMMWORD PTR [rsp+80], xmm10
-	movaps	XMMWORD PTR [rsp+96], xmm11
-	movaps	XMMWORD PTR [rsp+112], xmm12
-	movaps	XMMWORD PTR [rsp+128], xmm13
-
-	movq	xmm5, rax
-
-	mov	rax, r8
-	punpcklqdq xmm4, xmm0
-	and	eax, 2097136
-	movq	xmm10, QWORD PTR [r10+96]
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r10+104]
-	xorps	xmm9, xmm9
-	mov	QWORD PTR [rsp+328], rax
-	movq	xmm12, r11
-	mov	QWORD PTR [rsp+320], r9
-	punpcklqdq xmm5, xmm0
-	movq xmm13, rcx
-	mov r12d, 524288
-
-	ALIGN(64)
-FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
-	movd xmm11, r12d
-	mov	r12, QWORD PTR [r10+272]
-	lea	r13, QWORD PTR [rax+r11]
-	mov	esi, DWORD PTR [r13]
-	movq	xmm0, r9
-	mov	r10d, DWORD PTR [r13+4]
-	movq	xmm7, r8
-	mov	ebp, DWORD PTR [r13+12]
-	mov	r14d, DWORD PTR [r13+8]
-	mov	rdx, QWORD PTR [rsp+328]
-	movzx	ecx, sil
-	shr	esi, 8
-	punpcklqdq xmm7, xmm0
-	mov	r15d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	mov	edi, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	ebx, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	shr	ebp, 8
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	xor	r15d, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	eax, r14d
-	shr	eax, 8
-	xor	edi, DWORD PTR [r12+rcx*4+1024]
-	add	eax, 256
-	movzx	ecx, bpl
-	shr	ebp, 8
-	xor	ebx, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, sil
-	shr	esi, 8
-	xor	r9d, DWORD PTR [r12+rcx*4+1024]
-	add	r12, 2048
-	movzx	ecx, r10b
-	shr	r10d, 8
-	add	r10d, 256
-	mov	r11d, DWORD PTR [r12+rax*4]
-	xor	r11d, DWORD PTR [r12+rcx*4]
-	xor	r11d, r9d
-	movzx	ecx, sil
-	mov	r10d, DWORD PTR [r12+r10*4]
-	shr	esi, 8
-	add	esi, 256
-	xor	r10d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	xor	r10d, ebx
-	shr	ebp, 8
-	movd	xmm1, r11d
-	add	ebp, 256
-	movq	r11, xmm12
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	xor	r9d, DWORD PTR [r12+rsi*4]
-	mov	eax, DWORD PTR [r12+rbp*4]
-	xor	r9d, edi
-	movzx	ecx, r14b
-	movd	xmm0, r10d
-	movd	xmm2, r9d
-	xor	eax, DWORD PTR [r12+rcx*4]
-	mov	rcx, rdx
-	xor	eax, r15d
-	punpckldq xmm2, xmm1
-	xor	rcx, 16
-	movd	xmm6, eax
-	mov	rax, rdx
-	punpckldq xmm6, xmm0
-	xor	rax, 32
-	punpckldq xmm6, xmm2
-	xor	rdx, 48
-	movdqu	xmm2, XMMWORD PTR [rcx+r11]
-	pxor xmm6, xmm2
-	pxor	xmm6, xmm7
-	paddq	xmm2, xmm4
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	movdqu	xmm0, XMMWORD PTR [rdx+r11]
-	pxor xmm6, xmm1
-	pxor xmm6, xmm0
-	paddq	xmm0, xmm5
-	movdqu	XMMWORD PTR [rcx+r11], xmm0
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movq rcx, xmm13
-	paddq	xmm1, xmm7
-	movdqu	XMMWORD PTR [rdx+r11], xmm1
-	movq	rdi, xmm6
-	mov	r10, rdi
-	and	r10d, 2097136
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm4
-	movdqu	XMMWORD PTR [r13], xmm0
-
-	mov ebx, [rsp+144]
-	mov ebp, [rsp+152]
-	add ebx, [rsp+148]
-	add ebp, [rsp+156]
-	shl rbp, 32
-	or rbx, rbp
-
-	xor rbx, QWORD PTR [r10+r11]
-	lea	r14, QWORD PTR [r10+r11]
-	mov	rbp, QWORD PTR [r14+8]
-
-	mov [rsp+160], rbx
-	mov [rsp+168], rdi
-	mov [rsp+176], rbp
-	mov [rsp+184], r10
-	mov r10, rsp
-
-	mov ebx, [rsp+144]
-	mov esi, [rsp+148]
-	mov edi, [rsp+152]
-	mov ebp, [rsp+156]
-
-	movd esp, xmm7
-	movaps xmm0, xmm7
-	psrldq xmm0, 8
-	movd r15d, xmm0
-	movd eax, xmm4
-	movd edx, xmm5
-	movaps xmm0, xmm5
-	psrldq xmm0, 8
-	movd r9d, xmm0
-
-FN_PREFIX(CryptonightR_soft_aes_template_part2):
-	mov rsp, r10
-	mov [rsp+144], ebx
-	mov [rsp+148], esi
-	mov [rsp+152], edi
-	mov [rsp+156], ebp
-
-	mov edi, edi
-	shl rbp, 32
-	or rbp, rdi
-	xor r8, rbp
-
-	mov ebx, ebx
-	shl rsi, 32
-	or rsi, rbx
-	xor QWORD PTR [rsp+320], rsi
-
-	mov rbx, [rsp+160]
-	mov rdi, [rsp+168]
-	mov rbp, [rsp+176]
-	mov r10, [rsp+184]
-
-	mov	r9, r10
-	xor	r9, 16
-	mov	rcx, r10
-	xor	rcx, 32
-	xor	r10, 48
-	mov	rax, rbx
-	mul	rdi
-	movdqu	xmm2, XMMWORD PTR [r9+r11]
-	movdqu	xmm1, XMMWORD PTR [rcx+r11]
-	pxor xmm6, xmm2
-	pxor xmm6, xmm1
-	paddq	xmm1, xmm7
-	add	r8, rdx
-	movdqu	xmm0, XMMWORD PTR [r10+r11]
-	pxor xmm6, xmm0
-	paddq	xmm0, xmm5
-	paddq	xmm2, xmm4
-	movdqu	XMMWORD PTR [r9+r11], xmm0
-	movdqa	xmm5, xmm4
-	mov	r9, QWORD PTR [rsp+320]
-	movdqa	xmm4, xmm6
-	add	r9, rax
-	movdqu	XMMWORD PTR [rcx+r11], xmm2
-	movdqu	XMMWORD PTR [r10+r11], xmm1
-	mov	r10, QWORD PTR [rsp+304]
-	movd r12d, xmm11
-	mov	QWORD PTR [r14], r8
-	xor	r8, rbx
-	mov	rax, r8
-	mov	QWORD PTR [r14+8], r9
-	and	eax, 2097136
-	xor	r9, rbp
-	mov	QWORD PTR [rsp+320], r9
-	mov	QWORD PTR [rsp+328], rax
-	sub	r12d, 1
-	jne	FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
-
-FN_PREFIX(CryptonightR_soft_aes_template_part3):
-	movaps	xmm6, XMMWORD PTR [rsp+16]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+48]
-	movaps	xmm9, XMMWORD PTR [rsp+64]
-	movaps	xmm10, XMMWORD PTR [rsp+80]
-	movaps	xmm11, XMMWORD PTR [rsp+96]
-	movaps	xmm12, XMMWORD PTR [rsp+112]
-	movaps	xmm13, XMMWORD PTR [rsp+128]
-
-	add	rsp, 232
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	ret
-FN_PREFIX(CryptonightR_soft_aes_template_end):
diff --git a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc
deleted file mode 100644
index 589192ca..00000000
--- a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc
+++ /dev/null
@@ -1,281 +0,0 @@
-PUBLIC CryptonightR_soft_aes_template_part1
-PUBLIC CryptonightR_soft_aes_template_mainloop
-PUBLIC CryptonightR_soft_aes_template_part2
-PUBLIC CryptonightR_soft_aes_template_part3
-PUBLIC CryptonightR_soft_aes_template_end
-
-ALIGN(64)
-CryptonightR_soft_aes_template_part1:
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+8], rcx
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 232
-
-	mov	eax, [rcx+96]
-	mov	ebx, [rcx+100]
-	mov	esi, [rcx+104]
-	mov	edx, [rcx+108]
-	mov [rsp+144], eax
-	mov [rsp+148], ebx
-	mov [rsp+152], esi
-	mov [rsp+156], edx
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r10, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r9, QWORD PTR [rcx+40]
-	xor	r9, QWORD PTR [rcx+8]
-	movq	xmm4, rax
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	r11, QWORD PTR [rcx+224]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r10+72]
-	mov	rax, QWORD PTR [r10+80]
-	movq	xmm0, rdx
-	xor	rax, QWORD PTR [r10+64]
-
-	movaps	XMMWORD PTR [rsp+16], xmm6
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+48], xmm8
-	movaps	XMMWORD PTR [rsp+64], xmm9
-	movaps	XMMWORD PTR [rsp+80], xmm10
-	movaps	XMMWORD PTR [rsp+96], xmm11
-	movaps	XMMWORD PTR [rsp+112], xmm12
-	movaps	XMMWORD PTR [rsp+128], xmm13
-
-	movq	xmm5, rax
-
-	mov	rax, r8
-	punpcklqdq xmm4, xmm0
-	and	eax, 2097136
-	movq	xmm10, QWORD PTR [r10+96]
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r10+104]
-	xorps	xmm9, xmm9
-	mov	QWORD PTR [rsp+328], rax
-	movq	xmm12, r11
-	mov	QWORD PTR [rsp+320], r9
-	punpcklqdq xmm5, xmm0
-	movq xmm13, rcx
-	mov r12d, 524288
-
-	ALIGN(64)
-CryptonightR_soft_aes_template_mainloop:
-	movd xmm11, r12d
-	mov	r12, QWORD PTR [r10+272]
-	lea	r13, QWORD PTR [rax+r11]
-	mov	esi, DWORD PTR [r13]
-	movq	xmm0, r9
-	mov	r10d, DWORD PTR [r13+4]
-	movq	xmm7, r8
-	mov	ebp, DWORD PTR [r13+12]
-	mov	r14d, DWORD PTR [r13+8]
-	mov	rdx, QWORD PTR [rsp+328]
-	movzx	ecx, sil
-	shr	esi, 8
-	punpcklqdq xmm7, xmm0
-	mov	r15d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	mov	edi, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	ebx, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	shr	ebp, 8
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	xor	r15d, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	eax, r14d
-	shr	eax, 8
-	xor	edi, DWORD PTR [r12+rcx*4+1024]
-	add	eax, 256
-	movzx	ecx, bpl
-	shr	ebp, 8
-	xor	ebx, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, sil
-	shr	esi, 8
-	xor	r9d, DWORD PTR [r12+rcx*4+1024]
-	add	r12, 2048
-	movzx	ecx, r10b
-	shr	r10d, 8
-	add	r10d, 256
-	mov	r11d, DWORD PTR [r12+rax*4]
-	xor	r11d, DWORD PTR [r12+rcx*4]
-	xor	r11d, r9d
-	movzx	ecx, sil
-	mov	r10d, DWORD PTR [r12+r10*4]
-	shr	esi, 8
-	add	esi, 256
-	xor	r10d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	xor	r10d, ebx
-	shr	ebp, 8
-	movd	xmm1, r11d
-	add	ebp, 256
-	movq	r11, xmm12
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	xor	r9d, DWORD PTR [r12+rsi*4]
-	mov	eax, DWORD PTR [r12+rbp*4]
-	xor	r9d, edi
-	movzx	ecx, r14b
-	movd	xmm0, r10d
-	movd	xmm2, r9d
-	xor	eax, DWORD PTR [r12+rcx*4]
-	mov	rcx, rdx
-	xor	eax, r15d
-	punpckldq xmm2, xmm1
-	xor	rcx, 16
-	movd	xmm6, eax
-	mov	rax, rdx
-	punpckldq xmm6, xmm0
-	xor	rax, 32
-	punpckldq xmm6, xmm2
-	xor	rdx, 48
-	movdqu	xmm2, XMMWORD PTR [rcx+r11]
-	pxor xmm6, xmm2
-	pxor	xmm6, xmm7
-	paddq	xmm2, xmm4
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	movdqu	xmm0, XMMWORD PTR [rdx+r11]
-	pxor xmm6, xmm1
-	pxor xmm6, xmm0
-	paddq	xmm0, xmm5
-	movdqu	XMMWORD PTR [rcx+r11], xmm0
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movq rcx, xmm13
-	paddq	xmm1, xmm7
-	movdqu	XMMWORD PTR [rdx+r11], xmm1
-	movq	rdi, xmm6
-	mov	r10, rdi
-	and	r10d, 2097136
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm4
-	movdqu	XMMWORD PTR [r13], xmm0
-
-	mov ebx, [rsp+144]
-	mov ebp, [rsp+152]
-	add ebx, [rsp+148]
-	add ebp, [rsp+156]
-	shl rbp, 32
-	or rbx, rbp
-
-	xor rbx, QWORD PTR [r10+r11]
-	lea	r14, QWORD PTR [r10+r11]
-	mov	rbp, QWORD PTR [r14+8]
-
-	mov [rsp+160], rbx
-	mov [rsp+168], rdi
-	mov [rsp+176], rbp
-	mov [rsp+184], r10
-	mov r10, rsp
-
-	mov ebx, [rsp+144]
-	mov esi, [rsp+148]
-	mov edi, [rsp+152]
-	mov ebp, [rsp+156]
-
-	movd esp, xmm7
-	movaps xmm0, xmm7
-	psrldq xmm0, 8
-	movd r15d, xmm0
-	movd eax, xmm4
-	movd edx, xmm5
-	movaps xmm0, xmm5
-	psrldq xmm0, 8
-	movd r9d, xmm0
-
-CryptonightR_soft_aes_template_part2:
-	mov rsp, r10
-	mov [rsp+144], ebx
-	mov [rsp+148], esi
-	mov [rsp+152], edi
-	mov [rsp+156], ebp
-
-	mov edi, edi
-	shl rbp, 32
-	or rbp, rdi
-	xor r8, rbp
-
-	mov ebx, ebx
-	shl rsi, 32
-	or rsi, rbx
-	xor QWORD PTR [rsp+320], rsi
-
-	mov rbx, [rsp+160]
-	mov rdi, [rsp+168]
-	mov rbp, [rsp+176]
-	mov r10, [rsp+184]
-
-	mov	r9, r10
-	xor	r9, 16
-	mov	rcx, r10
-	xor	rcx, 32
-	xor	r10, 48
-	mov	rax, rbx
-	mul	rdi
-	movdqu	xmm2, XMMWORD PTR [r9+r11]
-	movdqu	xmm1, XMMWORD PTR [rcx+r11]
-	pxor xmm6, xmm2
-	pxor xmm6, xmm1
-	paddq	xmm1, xmm7
-	add	r8, rdx
-	movdqu	xmm0, XMMWORD PTR [r10+r11]
-	pxor xmm6, xmm0
-	paddq	xmm0, xmm5
-	paddq	xmm2, xmm4
-	movdqu	XMMWORD PTR [r9+r11], xmm0
-	movdqa	xmm5, xmm4
-	mov	r9, QWORD PTR [rsp+320]
-	movdqa	xmm4, xmm6
-	add	r9, rax
-	movdqu	XMMWORD PTR [rcx+r11], xmm2
-	movdqu	XMMWORD PTR [r10+r11], xmm1
-	mov	r10, QWORD PTR [rsp+304]
-	movd r12d, xmm11
-	mov	QWORD PTR [r14], r8
-	xor	r8, rbx
-	mov	rax, r8
-	mov	QWORD PTR [r14+8], r9
-	and	eax, 2097136
-	xor	r9, rbp
-	mov	QWORD PTR [rsp+320], r9
-	mov	QWORD PTR [rsp+328], rax
-	sub	r12d, 1
-	jne	CryptonightR_soft_aes_template_mainloop
-
-CryptonightR_soft_aes_template_part3:
-	movaps	xmm6, XMMWORD PTR [rsp+16]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+48]
-	movaps	xmm9, XMMWORD PTR [rsp+64]
-	movaps	xmm10, XMMWORD PTR [rsp+80]
-	movaps	xmm11, XMMWORD PTR [rsp+96]
-	movaps	xmm12, XMMWORD PTR [rsp+112]
-	movaps	xmm13, XMMWORD PTR [rsp+128]
-
-	add	rsp, 232
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	ret
-CryptonightR_soft_aes_template_end:
diff --git a/src/crypto/asm/CryptonightR_template.S b/src/crypto/asm/CryptonightR_template.S
deleted file mode 100644
index d2974d16..00000000
--- a/src/crypto/asm/CryptonightR_template.S
+++ /dev/null
@@ -1,1595 +0,0 @@
-#ifdef __APPLE__
-#   define ALIGN(x) .align 6
-#else
-#   define ALIGN(x) .align 64
-#endif
-.intel_syntax noprefix
-#ifdef __APPLE__
-#   define FN_PREFIX(fn) _ ## fn
-.text
-#else
-#   define FN_PREFIX(fn) fn
-.section .text
-#endif
-
-#define PUBLIC .global
-
-PUBLIC FN_PREFIX(CryptonightR_instruction0)
-PUBLIC FN_PREFIX(CryptonightR_instruction1)
-PUBLIC FN_PREFIX(CryptonightR_instruction2)
-PUBLIC FN_PREFIX(CryptonightR_instruction3)
-PUBLIC FN_PREFIX(CryptonightR_instruction4)
-PUBLIC FN_PREFIX(CryptonightR_instruction5)
-PUBLIC FN_PREFIX(CryptonightR_instruction6)
-PUBLIC FN_PREFIX(CryptonightR_instruction7)
-PUBLIC FN_PREFIX(CryptonightR_instruction8)
-PUBLIC FN_PREFIX(CryptonightR_instruction9)
-PUBLIC FN_PREFIX(CryptonightR_instruction10)
-PUBLIC FN_PREFIX(CryptonightR_instruction11)
-PUBLIC FN_PREFIX(CryptonightR_instruction12)
-PUBLIC FN_PREFIX(CryptonightR_instruction13)
-PUBLIC FN_PREFIX(CryptonightR_instruction14)
-PUBLIC FN_PREFIX(CryptonightR_instruction15)
-PUBLIC FN_PREFIX(CryptonightR_instruction16)
-PUBLIC FN_PREFIX(CryptonightR_instruction17)
-PUBLIC FN_PREFIX(CryptonightR_instruction18)
-PUBLIC FN_PREFIX(CryptonightR_instruction19)
-PUBLIC FN_PREFIX(CryptonightR_instruction20)
-PUBLIC FN_PREFIX(CryptonightR_instruction21)
-PUBLIC FN_PREFIX(CryptonightR_instruction22)
-PUBLIC FN_PREFIX(CryptonightR_instruction23)
-PUBLIC FN_PREFIX(CryptonightR_instruction24)
-PUBLIC FN_PREFIX(CryptonightR_instruction25)
-PUBLIC FN_PREFIX(CryptonightR_instruction26)
-PUBLIC FN_PREFIX(CryptonightR_instruction27)
-PUBLIC FN_PREFIX(CryptonightR_instruction28)
-PUBLIC FN_PREFIX(CryptonightR_instruction29)
-PUBLIC FN_PREFIX(CryptonightR_instruction30)
-PUBLIC FN_PREFIX(CryptonightR_instruction31)
-PUBLIC FN_PREFIX(CryptonightR_instruction32)
-PUBLIC FN_PREFIX(CryptonightR_instruction33)
-PUBLIC FN_PREFIX(CryptonightR_instruction34)
-PUBLIC FN_PREFIX(CryptonightR_instruction35)
-PUBLIC FN_PREFIX(CryptonightR_instruction36)
-PUBLIC FN_PREFIX(CryptonightR_instruction37)
-PUBLIC FN_PREFIX(CryptonightR_instruction38)
-PUBLIC FN_PREFIX(CryptonightR_instruction39)
-PUBLIC FN_PREFIX(CryptonightR_instruction40)
-PUBLIC FN_PREFIX(CryptonightR_instruction41)
-PUBLIC FN_PREFIX(CryptonightR_instruction42)
-PUBLIC FN_PREFIX(CryptonightR_instruction43)
-PUBLIC FN_PREFIX(CryptonightR_instruction44)
-PUBLIC FN_PREFIX(CryptonightR_instruction45)
-PUBLIC FN_PREFIX(CryptonightR_instruction46)
-PUBLIC FN_PREFIX(CryptonightR_instruction47)
-PUBLIC FN_PREFIX(CryptonightR_instruction48)
-PUBLIC FN_PREFIX(CryptonightR_instruction49)
-PUBLIC FN_PREFIX(CryptonightR_instruction50)
-PUBLIC FN_PREFIX(CryptonightR_instruction51)
-PUBLIC FN_PREFIX(CryptonightR_instruction52)
-PUBLIC FN_PREFIX(CryptonightR_instruction53)
-PUBLIC FN_PREFIX(CryptonightR_instruction54)
-PUBLIC FN_PREFIX(CryptonightR_instruction55)
-PUBLIC FN_PREFIX(CryptonightR_instruction56)
-PUBLIC FN_PREFIX(CryptonightR_instruction57)
-PUBLIC FN_PREFIX(CryptonightR_instruction58)
-PUBLIC FN_PREFIX(CryptonightR_instruction59)
-PUBLIC FN_PREFIX(CryptonightR_instruction60)
-PUBLIC FN_PREFIX(CryptonightR_instruction61)
-PUBLIC FN_PREFIX(CryptonightR_instruction62)
-PUBLIC FN_PREFIX(CryptonightR_instruction63)
-PUBLIC FN_PREFIX(CryptonightR_instruction64)
-PUBLIC FN_PREFIX(CryptonightR_instruction65)
-PUBLIC FN_PREFIX(CryptonightR_instruction66)
-PUBLIC FN_PREFIX(CryptonightR_instruction67)
-PUBLIC FN_PREFIX(CryptonightR_instruction68)
-PUBLIC FN_PREFIX(CryptonightR_instruction69)
-PUBLIC FN_PREFIX(CryptonightR_instruction70)
-PUBLIC FN_PREFIX(CryptonightR_instruction71)
-PUBLIC FN_PREFIX(CryptonightR_instruction72)
-PUBLIC FN_PREFIX(CryptonightR_instruction73)
-PUBLIC FN_PREFIX(CryptonightR_instruction74)
-PUBLIC FN_PREFIX(CryptonightR_instruction75)
-PUBLIC FN_PREFIX(CryptonightR_instruction76)
-PUBLIC FN_PREFIX(CryptonightR_instruction77)
-PUBLIC FN_PREFIX(CryptonightR_instruction78)
-PUBLIC FN_PREFIX(CryptonightR_instruction79)
-PUBLIC FN_PREFIX(CryptonightR_instruction80)
-PUBLIC FN_PREFIX(CryptonightR_instruction81)
-PUBLIC FN_PREFIX(CryptonightR_instruction82)
-PUBLIC FN_PREFIX(CryptonightR_instruction83)
-PUBLIC FN_PREFIX(CryptonightR_instruction84)
-PUBLIC FN_PREFIX(CryptonightR_instruction85)
-PUBLIC FN_PREFIX(CryptonightR_instruction86)
-PUBLIC FN_PREFIX(CryptonightR_instruction87)
-PUBLIC FN_PREFIX(CryptonightR_instruction88)
-PUBLIC FN_PREFIX(CryptonightR_instruction89)
-PUBLIC FN_PREFIX(CryptonightR_instruction90)
-PUBLIC FN_PREFIX(CryptonightR_instruction91)
-PUBLIC FN_PREFIX(CryptonightR_instruction92)
-PUBLIC FN_PREFIX(CryptonightR_instruction93)
-PUBLIC FN_PREFIX(CryptonightR_instruction94)
-PUBLIC FN_PREFIX(CryptonightR_instruction95)
-PUBLIC FN_PREFIX(CryptonightR_instruction96)
-PUBLIC FN_PREFIX(CryptonightR_instruction97)
-PUBLIC FN_PREFIX(CryptonightR_instruction98)
-PUBLIC FN_PREFIX(CryptonightR_instruction99)
-PUBLIC FN_PREFIX(CryptonightR_instruction100)
-PUBLIC FN_PREFIX(CryptonightR_instruction101)
-PUBLIC FN_PREFIX(CryptonightR_instruction102)
-PUBLIC FN_PREFIX(CryptonightR_instruction103)
-PUBLIC FN_PREFIX(CryptonightR_instruction104)
-PUBLIC FN_PREFIX(CryptonightR_instruction105)
-PUBLIC FN_PREFIX(CryptonightR_instruction106)
-PUBLIC FN_PREFIX(CryptonightR_instruction107)
-PUBLIC FN_PREFIX(CryptonightR_instruction108)
-PUBLIC FN_PREFIX(CryptonightR_instruction109)
-PUBLIC FN_PREFIX(CryptonightR_instruction110)
-PUBLIC FN_PREFIX(CryptonightR_instruction111)
-PUBLIC FN_PREFIX(CryptonightR_instruction112)
-PUBLIC FN_PREFIX(CryptonightR_instruction113)
-PUBLIC FN_PREFIX(CryptonightR_instruction114)
-PUBLIC FN_PREFIX(CryptonightR_instruction115)
-PUBLIC FN_PREFIX(CryptonightR_instruction116)
-PUBLIC FN_PREFIX(CryptonightR_instruction117)
-PUBLIC FN_PREFIX(CryptonightR_instruction118)
-PUBLIC FN_PREFIX(CryptonightR_instruction119)
-PUBLIC FN_PREFIX(CryptonightR_instruction120)
-PUBLIC FN_PREFIX(CryptonightR_instruction121)
-PUBLIC FN_PREFIX(CryptonightR_instruction122)
-PUBLIC FN_PREFIX(CryptonightR_instruction123)
-PUBLIC FN_PREFIX(CryptonightR_instruction124)
-PUBLIC FN_PREFIX(CryptonightR_instruction125)
-PUBLIC FN_PREFIX(CryptonightR_instruction126)
-PUBLIC FN_PREFIX(CryptonightR_instruction127)
-PUBLIC FN_PREFIX(CryptonightR_instruction128)
-PUBLIC FN_PREFIX(CryptonightR_instruction129)
-PUBLIC FN_PREFIX(CryptonightR_instruction130)
-PUBLIC FN_PREFIX(CryptonightR_instruction131)
-PUBLIC FN_PREFIX(CryptonightR_instruction132)
-PUBLIC FN_PREFIX(CryptonightR_instruction133)
-PUBLIC FN_PREFIX(CryptonightR_instruction134)
-PUBLIC FN_PREFIX(CryptonightR_instruction135)
-PUBLIC FN_PREFIX(CryptonightR_instruction136)
-PUBLIC FN_PREFIX(CryptonightR_instruction137)
-PUBLIC FN_PREFIX(CryptonightR_instruction138)
-PUBLIC FN_PREFIX(CryptonightR_instruction139)
-PUBLIC FN_PREFIX(CryptonightR_instruction140)
-PUBLIC FN_PREFIX(CryptonightR_instruction141)
-PUBLIC FN_PREFIX(CryptonightR_instruction142)
-PUBLIC FN_PREFIX(CryptonightR_instruction143)
-PUBLIC FN_PREFIX(CryptonightR_instruction144)
-PUBLIC FN_PREFIX(CryptonightR_instruction145)
-PUBLIC FN_PREFIX(CryptonightR_instruction146)
-PUBLIC FN_PREFIX(CryptonightR_instruction147)
-PUBLIC FN_PREFIX(CryptonightR_instruction148)
-PUBLIC FN_PREFIX(CryptonightR_instruction149)
-PUBLIC FN_PREFIX(CryptonightR_instruction150)
-PUBLIC FN_PREFIX(CryptonightR_instruction151)
-PUBLIC FN_PREFIX(CryptonightR_instruction152)
-PUBLIC FN_PREFIX(CryptonightR_instruction153)
-PUBLIC FN_PREFIX(CryptonightR_instruction154)
-PUBLIC FN_PREFIX(CryptonightR_instruction155)
-PUBLIC FN_PREFIX(CryptonightR_instruction156)
-PUBLIC FN_PREFIX(CryptonightR_instruction157)
-PUBLIC FN_PREFIX(CryptonightR_instruction158)
-PUBLIC FN_PREFIX(CryptonightR_instruction159)
-PUBLIC FN_PREFIX(CryptonightR_instruction160)
-PUBLIC FN_PREFIX(CryptonightR_instruction161)
-PUBLIC FN_PREFIX(CryptonightR_instruction162)
-PUBLIC FN_PREFIX(CryptonightR_instruction163)
-PUBLIC FN_PREFIX(CryptonightR_instruction164)
-PUBLIC FN_PREFIX(CryptonightR_instruction165)
-PUBLIC FN_PREFIX(CryptonightR_instruction166)
-PUBLIC FN_PREFIX(CryptonightR_instruction167)
-PUBLIC FN_PREFIX(CryptonightR_instruction168)
-PUBLIC FN_PREFIX(CryptonightR_instruction169)
-PUBLIC FN_PREFIX(CryptonightR_instruction170)
-PUBLIC FN_PREFIX(CryptonightR_instruction171)
-PUBLIC FN_PREFIX(CryptonightR_instruction172)
-PUBLIC FN_PREFIX(CryptonightR_instruction173)
-PUBLIC FN_PREFIX(CryptonightR_instruction174)
-PUBLIC FN_PREFIX(CryptonightR_instruction175)
-PUBLIC FN_PREFIX(CryptonightR_instruction176)
-PUBLIC FN_PREFIX(CryptonightR_instruction177)
-PUBLIC FN_PREFIX(CryptonightR_instruction178)
-PUBLIC FN_PREFIX(CryptonightR_instruction179)
-PUBLIC FN_PREFIX(CryptonightR_instruction180)
-PUBLIC FN_PREFIX(CryptonightR_instruction181)
-PUBLIC FN_PREFIX(CryptonightR_instruction182)
-PUBLIC FN_PREFIX(CryptonightR_instruction183)
-PUBLIC FN_PREFIX(CryptonightR_instruction184)
-PUBLIC FN_PREFIX(CryptonightR_instruction185)
-PUBLIC FN_PREFIX(CryptonightR_instruction186)
-PUBLIC FN_PREFIX(CryptonightR_instruction187)
-PUBLIC FN_PREFIX(CryptonightR_instruction188)
-PUBLIC FN_PREFIX(CryptonightR_instruction189)
-PUBLIC FN_PREFIX(CryptonightR_instruction190)
-PUBLIC FN_PREFIX(CryptonightR_instruction191)
-PUBLIC FN_PREFIX(CryptonightR_instruction192)
-PUBLIC FN_PREFIX(CryptonightR_instruction193)
-PUBLIC FN_PREFIX(CryptonightR_instruction194)
-PUBLIC FN_PREFIX(CryptonightR_instruction195)
-PUBLIC FN_PREFIX(CryptonightR_instruction196)
-PUBLIC FN_PREFIX(CryptonightR_instruction197)
-PUBLIC FN_PREFIX(CryptonightR_instruction198)
-PUBLIC FN_PREFIX(CryptonightR_instruction199)
-PUBLIC FN_PREFIX(CryptonightR_instruction200)
-PUBLIC FN_PREFIX(CryptonightR_instruction201)
-PUBLIC FN_PREFIX(CryptonightR_instruction202)
-PUBLIC FN_PREFIX(CryptonightR_instruction203)
-PUBLIC FN_PREFIX(CryptonightR_instruction204)
-PUBLIC FN_PREFIX(CryptonightR_instruction205)
-PUBLIC FN_PREFIX(CryptonightR_instruction206)
-PUBLIC FN_PREFIX(CryptonightR_instruction207)
-PUBLIC FN_PREFIX(CryptonightR_instruction208)
-PUBLIC FN_PREFIX(CryptonightR_instruction209)
-PUBLIC FN_PREFIX(CryptonightR_instruction210)
-PUBLIC FN_PREFIX(CryptonightR_instruction211)
-PUBLIC FN_PREFIX(CryptonightR_instruction212)
-PUBLIC FN_PREFIX(CryptonightR_instruction213)
-PUBLIC FN_PREFIX(CryptonightR_instruction214)
-PUBLIC FN_PREFIX(CryptonightR_instruction215)
-PUBLIC FN_PREFIX(CryptonightR_instruction216)
-PUBLIC FN_PREFIX(CryptonightR_instruction217)
-PUBLIC FN_PREFIX(CryptonightR_instruction218)
-PUBLIC FN_PREFIX(CryptonightR_instruction219)
-PUBLIC FN_PREFIX(CryptonightR_instruction220)
-PUBLIC FN_PREFIX(CryptonightR_instruction221)
-PUBLIC FN_PREFIX(CryptonightR_instruction222)
-PUBLIC FN_PREFIX(CryptonightR_instruction223)
-PUBLIC FN_PREFIX(CryptonightR_instruction224)
-PUBLIC FN_PREFIX(CryptonightR_instruction225)
-PUBLIC FN_PREFIX(CryptonightR_instruction226)
-PUBLIC FN_PREFIX(CryptonightR_instruction227)
-PUBLIC FN_PREFIX(CryptonightR_instruction228)
-PUBLIC FN_PREFIX(CryptonightR_instruction229)
-PUBLIC FN_PREFIX(CryptonightR_instruction230)
-PUBLIC FN_PREFIX(CryptonightR_instruction231)
-PUBLIC FN_PREFIX(CryptonightR_instruction232)
-PUBLIC FN_PREFIX(CryptonightR_instruction233)
-PUBLIC FN_PREFIX(CryptonightR_instruction234)
-PUBLIC FN_PREFIX(CryptonightR_instruction235)
-PUBLIC FN_PREFIX(CryptonightR_instruction236)
-PUBLIC FN_PREFIX(CryptonightR_instruction237)
-PUBLIC FN_PREFIX(CryptonightR_instruction238)
-PUBLIC FN_PREFIX(CryptonightR_instruction239)
-PUBLIC FN_PREFIX(CryptonightR_instruction240)
-PUBLIC FN_PREFIX(CryptonightR_instruction241)
-PUBLIC FN_PREFIX(CryptonightR_instruction242)
-PUBLIC FN_PREFIX(CryptonightR_instruction243)
-PUBLIC FN_PREFIX(CryptonightR_instruction244)
-PUBLIC FN_PREFIX(CryptonightR_instruction245)
-PUBLIC FN_PREFIX(CryptonightR_instruction246)
-PUBLIC FN_PREFIX(CryptonightR_instruction247)
-PUBLIC FN_PREFIX(CryptonightR_instruction248)
-PUBLIC FN_PREFIX(CryptonightR_instruction249)
-PUBLIC FN_PREFIX(CryptonightR_instruction250)
-PUBLIC FN_PREFIX(CryptonightR_instruction251)
-PUBLIC FN_PREFIX(CryptonightR_instruction252)
-PUBLIC FN_PREFIX(CryptonightR_instruction253)
-PUBLIC FN_PREFIX(CryptonightR_instruction254)
-PUBLIC FN_PREFIX(CryptonightR_instruction255)
-PUBLIC FN_PREFIX(CryptonightR_instruction256)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov0)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov1)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov2)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov3)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov4)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov5)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov6)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov7)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov8)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov9)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov10)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov11)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov12)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov13)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov14)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov15)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov16)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov17)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov18)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov19)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov20)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov21)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov22)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov23)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov24)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov25)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov26)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov27)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov28)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov29)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov30)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov31)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov32)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov33)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov34)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov35)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov36)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov37)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov38)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov39)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov40)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov41)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov42)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov43)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov44)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov45)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov46)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov47)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov48)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov49)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov50)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov51)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov52)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov53)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov54)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov55)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov56)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov57)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov58)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov59)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov60)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov61)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov62)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov63)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov64)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov65)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov66)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov67)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov68)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov69)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov70)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov71)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov72)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov73)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov74)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov75)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov76)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov77)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov78)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov79)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov80)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov81)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov82)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov83)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov84)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov85)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov86)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov87)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov88)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov89)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov90)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov91)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov92)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov93)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov94)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov95)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov96)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov97)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov98)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov99)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov100)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov101)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov102)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov103)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov104)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov105)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov106)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov107)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov108)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov109)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov110)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov111)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov112)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov113)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov114)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov115)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov116)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov117)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov118)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov119)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov120)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov121)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov122)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov123)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov124)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov125)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov126)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov127)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov128)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov129)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov130)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov131)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov132)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov133)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov134)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov135)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov136)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov137)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov138)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov139)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov140)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov141)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov142)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov143)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov144)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov145)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov146)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov147)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov148)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov149)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov150)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov151)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov152)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov153)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov154)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov155)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov156)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov157)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov158)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov159)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov160)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov161)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov162)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov163)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov164)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov165)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov166)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov167)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov168)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov169)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov170)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov171)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov172)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov173)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov174)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov175)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov176)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov177)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov178)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov179)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov180)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov181)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov182)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov183)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov184)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov185)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov186)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov187)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov188)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov189)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov190)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov191)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov192)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov193)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov194)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov195)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov196)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov197)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov198)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov199)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov200)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov201)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov202)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov203)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov204)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov205)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov206)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov207)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov208)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov209)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov210)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov211)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov212)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov213)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov214)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov215)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov216)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov217)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov218)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov219)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov220)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov221)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov222)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov223)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov224)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov225)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov226)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov227)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov228)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov229)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov230)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov231)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov232)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov233)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov234)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov235)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov236)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov237)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov238)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov239)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov240)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov241)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov242)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov243)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov244)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov245)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov246)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov247)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov248)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov249)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov250)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov251)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov252)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov253)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov254)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov255)
-PUBLIC FN_PREFIX(CryptonightR_instruction_mov256)
-
-#include "CryptonightWOW_template.inc"
-#include "CryptonightR_template.inc"
-#include "CryptonightWOW_soft_aes_template.inc"
-#include "CryptonightR_soft_aes_template.inc"
-
-FN_PREFIX(CryptonightR_instruction0):
-	imul	rbx, rbx
-FN_PREFIX(CryptonightR_instruction1):
-	imul	rbx, rbx
-FN_PREFIX(CryptonightR_instruction2):
-	imul	rbx, rbx
-FN_PREFIX(CryptonightR_instruction3):
-	add	rbx, r9
-	add	rbx, 2147483647
-FN_PREFIX(CryptonightR_instruction4):
-	sub	rbx, r9
-FN_PREFIX(CryptonightR_instruction5):
-	ror	ebx, cl
-FN_PREFIX(CryptonightR_instruction6):
-	rol	ebx, cl
-FN_PREFIX(CryptonightR_instruction7):
-	xor	rbx, r9
-FN_PREFIX(CryptonightR_instruction8):
-	imul	rsi, rbx
-FN_PREFIX(CryptonightR_instruction9):
-	imul	rsi, rbx
-FN_PREFIX(CryptonightR_instruction10):
-	imul	rsi, rbx
-FN_PREFIX(CryptonightR_instruction11):
-	add	rsi, rbx
-	add	rsi, 2147483647
-FN_PREFIX(CryptonightR_instruction12):
-	sub	rsi, rbx
-FN_PREFIX(CryptonightR_instruction13):
-	ror	esi, cl
-FN_PREFIX(CryptonightR_instruction14):
-	rol	esi, cl
-FN_PREFIX(CryptonightR_instruction15):
-	xor	rsi, rbx
-FN_PREFIX(CryptonightR_instruction16):
-	imul	rdi, rbx
-FN_PREFIX(CryptonightR_instruction17):
-	imul	rdi, rbx
-FN_PREFIX(CryptonightR_instruction18):
-	imul	rdi, rbx
-FN_PREFIX(CryptonightR_instruction19):
-	add	rdi, rbx
-	add	rdi, 2147483647
-FN_PREFIX(CryptonightR_instruction20):
-	sub	rdi, rbx
-FN_PREFIX(CryptonightR_instruction21):
-	ror	edi, cl
-FN_PREFIX(CryptonightR_instruction22):
-	rol	edi, cl
-FN_PREFIX(CryptonightR_instruction23):
-	xor	rdi, rbx
-FN_PREFIX(CryptonightR_instruction24):
-	imul	rbp, rbx
-FN_PREFIX(CryptonightR_instruction25):
-	imul	rbp, rbx
-FN_PREFIX(CryptonightR_instruction26):
-	imul	rbp, rbx
-FN_PREFIX(CryptonightR_instruction27):
-	add	rbp, rbx
-	add	rbp, 2147483647
-FN_PREFIX(CryptonightR_instruction28):
-	sub	rbp, rbx
-FN_PREFIX(CryptonightR_instruction29):
-	ror	ebp, cl
-FN_PREFIX(CryptonightR_instruction30):
-	rol	ebp, cl
-FN_PREFIX(CryptonightR_instruction31):
-	xor	rbp, rbx
-FN_PREFIX(CryptonightR_instruction32):
-	imul	rbx, rsi
-FN_PREFIX(CryptonightR_instruction33):
-	imul	rbx, rsi
-FN_PREFIX(CryptonightR_instruction34):
-	imul	rbx, rsi
-FN_PREFIX(CryptonightR_instruction35):
-	add	rbx, rsi
-	add	rbx, 2147483647
-FN_PREFIX(CryptonightR_instruction36):
-	sub	rbx, rsi
-FN_PREFIX(CryptonightR_instruction37):
-	ror	ebx, cl
-FN_PREFIX(CryptonightR_instruction38):
-	rol	ebx, cl
-FN_PREFIX(CryptonightR_instruction39):
-	xor	rbx, rsi
-FN_PREFIX(CryptonightR_instruction40):
-	imul	rsi, rsi
-FN_PREFIX(CryptonightR_instruction41):
-	imul	rsi, rsi
-FN_PREFIX(CryptonightR_instruction42):
-	imul	rsi, rsi
-FN_PREFIX(CryptonightR_instruction43):
-	add	rsi, r9
-	add	rsi, 2147483647
-FN_PREFIX(CryptonightR_instruction44):
-	sub	rsi, r9
-FN_PREFIX(CryptonightR_instruction45):
-	ror	esi, cl
-FN_PREFIX(CryptonightR_instruction46):
-	rol	esi, cl
-FN_PREFIX(CryptonightR_instruction47):
-	xor	rsi, r9
-FN_PREFIX(CryptonightR_instruction48):
-	imul	rdi, rsi
-FN_PREFIX(CryptonightR_instruction49):
-	imul	rdi, rsi
-FN_PREFIX(CryptonightR_instruction50):
-	imul	rdi, rsi
-FN_PREFIX(CryptonightR_instruction51):
-	add	rdi, rsi
-	add	rdi, 2147483647
-FN_PREFIX(CryptonightR_instruction52):
-	sub	rdi, rsi
-FN_PREFIX(CryptonightR_instruction53):
-	ror	edi, cl
-FN_PREFIX(CryptonightR_instruction54):
-	rol	edi, cl
-FN_PREFIX(CryptonightR_instruction55):
-	xor	rdi, rsi
-FN_PREFIX(CryptonightR_instruction56):
-	imul	rbp, rsi
-FN_PREFIX(CryptonightR_instruction57):
-	imul	rbp, rsi
-FN_PREFIX(CryptonightR_instruction58):
-	imul	rbp, rsi
-FN_PREFIX(CryptonightR_instruction59):
-	add	rbp, rsi
-	add	rbp, 2147483647
-FN_PREFIX(CryptonightR_instruction60):
-	sub	rbp, rsi
-FN_PREFIX(CryptonightR_instruction61):
-	ror	ebp, cl
-FN_PREFIX(CryptonightR_instruction62):
-	rol	ebp, cl
-FN_PREFIX(CryptonightR_instruction63):
-	xor	rbp, rsi
-FN_PREFIX(CryptonightR_instruction64):
-	imul	rbx, rdi
-FN_PREFIX(CryptonightR_instruction65):
-	imul	rbx, rdi
-FN_PREFIX(CryptonightR_instruction66):
-	imul	rbx, rdi
-FN_PREFIX(CryptonightR_instruction67):
-	add	rbx, rdi
-	add	rbx, 2147483647
-FN_PREFIX(CryptonightR_instruction68):
-	sub	rbx, rdi
-FN_PREFIX(CryptonightR_instruction69):
-	ror	ebx, cl
-FN_PREFIX(CryptonightR_instruction70):
-	rol	ebx, cl
-FN_PREFIX(CryptonightR_instruction71):
-	xor	rbx, rdi
-FN_PREFIX(CryptonightR_instruction72):
-	imul	rsi, rdi
-FN_PREFIX(CryptonightR_instruction73):
-	imul	rsi, rdi
-FN_PREFIX(CryptonightR_instruction74):
-	imul	rsi, rdi
-FN_PREFIX(CryptonightR_instruction75):
-	add	rsi, rdi
-	add	rsi, 2147483647
-FN_PREFIX(CryptonightR_instruction76):
-	sub	rsi, rdi
-FN_PREFIX(CryptonightR_instruction77):
-	ror	esi, cl
-FN_PREFIX(CryptonightR_instruction78):
-	rol	esi, cl
-FN_PREFIX(CryptonightR_instruction79):
-	xor	rsi, rdi
-FN_PREFIX(CryptonightR_instruction80):
-	imul	rdi, rdi
-FN_PREFIX(CryptonightR_instruction81):
-	imul	rdi, rdi
-FN_PREFIX(CryptonightR_instruction82):
-	imul	rdi, rdi
-FN_PREFIX(CryptonightR_instruction83):
-	add	rdi, r9
-	add	rdi, 2147483647
-FN_PREFIX(CryptonightR_instruction84):
-	sub	rdi, r9
-FN_PREFIX(CryptonightR_instruction85):
-	ror	edi, cl
-FN_PREFIX(CryptonightR_instruction86):
-	rol	edi, cl
-FN_PREFIX(CryptonightR_instruction87):
-	xor	rdi, r9
-FN_PREFIX(CryptonightR_instruction88):
-	imul	rbp, rdi
-FN_PREFIX(CryptonightR_instruction89):
-	imul	rbp, rdi
-FN_PREFIX(CryptonightR_instruction90):
-	imul	rbp, rdi
-FN_PREFIX(CryptonightR_instruction91):
-	add	rbp, rdi
-	add	rbp, 2147483647
-FN_PREFIX(CryptonightR_instruction92):
-	sub	rbp, rdi
-FN_PREFIX(CryptonightR_instruction93):
-	ror	ebp, cl
-FN_PREFIX(CryptonightR_instruction94):
-	rol	ebp, cl
-FN_PREFIX(CryptonightR_instruction95):
-	xor	rbp, rdi
-FN_PREFIX(CryptonightR_instruction96):
-	imul	rbx, rbp
-FN_PREFIX(CryptonightR_instruction97):
-	imul	rbx, rbp
-FN_PREFIX(CryptonightR_instruction98):
-	imul	rbx, rbp
-FN_PREFIX(CryptonightR_instruction99):
-	add	rbx, rbp
-	add	rbx, 2147483647
-FN_PREFIX(CryptonightR_instruction100):
-	sub	rbx, rbp
-FN_PREFIX(CryptonightR_instruction101):
-	ror	ebx, cl
-FN_PREFIX(CryptonightR_instruction102):
-	rol	ebx, cl
-FN_PREFIX(CryptonightR_instruction103):
-	xor	rbx, rbp
-FN_PREFIX(CryptonightR_instruction104):
-	imul	rsi, rbp
-FN_PREFIX(CryptonightR_instruction105):
-	imul	rsi, rbp
-FN_PREFIX(CryptonightR_instruction106):
-	imul	rsi, rbp
-FN_PREFIX(CryptonightR_instruction107):
-	add	rsi, rbp
-	add	rsi, 2147483647
-FN_PREFIX(CryptonightR_instruction108):
-	sub	rsi, rbp
-FN_PREFIX(CryptonightR_instruction109):
-	ror	esi, cl
-FN_PREFIX(CryptonightR_instruction110):
-	rol	esi, cl
-FN_PREFIX(CryptonightR_instruction111):
-	xor	rsi, rbp
-FN_PREFIX(CryptonightR_instruction112):
-	imul	rdi, rbp
-FN_PREFIX(CryptonightR_instruction113):
-	imul	rdi, rbp
-FN_PREFIX(CryptonightR_instruction114):
-	imul	rdi, rbp
-FN_PREFIX(CryptonightR_instruction115):
-	add	rdi, rbp
-	add	rdi, 2147483647
-FN_PREFIX(CryptonightR_instruction116):
-	sub	rdi, rbp
-FN_PREFIX(CryptonightR_instruction117):
-	ror	edi, cl
-FN_PREFIX(CryptonightR_instruction118):
-	rol	edi, cl
-FN_PREFIX(CryptonightR_instruction119):
-	xor	rdi, rbp
-FN_PREFIX(CryptonightR_instruction120):
-	imul	rbp, rbp
-FN_PREFIX(CryptonightR_instruction121):
-	imul	rbp, rbp
-FN_PREFIX(CryptonightR_instruction122):
-	imul	rbp, rbp
-FN_PREFIX(CryptonightR_instruction123):
-	add	rbp, r9
-	add	rbp, 2147483647
-FN_PREFIX(CryptonightR_instruction124):
-	sub	rbp, r9
-FN_PREFIX(CryptonightR_instruction125):
-	ror	ebp, cl
-FN_PREFIX(CryptonightR_instruction126):
-	rol	ebp, cl
-FN_PREFIX(CryptonightR_instruction127):
-	xor	rbp, r9
-FN_PREFIX(CryptonightR_instruction128):
-	imul	rbx, rsp
-FN_PREFIX(CryptonightR_instruction129):
-	imul	rbx, rsp
-FN_PREFIX(CryptonightR_instruction130):
-	imul	rbx, rsp
-FN_PREFIX(CryptonightR_instruction131):
-	add	rbx, rsp
-	add	rbx, 2147483647
-FN_PREFIX(CryptonightR_instruction132):
-	sub	rbx, rsp
-FN_PREFIX(CryptonightR_instruction133):
-	ror	ebx, cl
-FN_PREFIX(CryptonightR_instruction134):
-	rol	ebx, cl
-FN_PREFIX(CryptonightR_instruction135):
-	xor	rbx, rsp
-FN_PREFIX(CryptonightR_instruction136):
-	imul	rsi, rsp
-FN_PREFIX(CryptonightR_instruction137):
-	imul	rsi, rsp
-FN_PREFIX(CryptonightR_instruction138):
-	imul	rsi, rsp
-FN_PREFIX(CryptonightR_instruction139):
-	add	rsi, rsp
-	add	rsi, 2147483647
-FN_PREFIX(CryptonightR_instruction140):
-	sub	rsi, rsp
-FN_PREFIX(CryptonightR_instruction141):
-	ror	esi, cl
-FN_PREFIX(CryptonightR_instruction142):
-	rol	esi, cl
-FN_PREFIX(CryptonightR_instruction143):
-	xor	rsi, rsp
-FN_PREFIX(CryptonightR_instruction144):
-	imul	rdi, rsp
-FN_PREFIX(CryptonightR_instruction145):
-	imul	rdi, rsp
-FN_PREFIX(CryptonightR_instruction146):
-	imul	rdi, rsp
-FN_PREFIX(CryptonightR_instruction147):
-	add	rdi, rsp
-	add	rdi, 2147483647
-FN_PREFIX(CryptonightR_instruction148):
-	sub	rdi, rsp
-FN_PREFIX(CryptonightR_instruction149):
-	ror	edi, cl
-FN_PREFIX(CryptonightR_instruction150):
-	rol	edi, cl
-FN_PREFIX(CryptonightR_instruction151):
-	xor	rdi, rsp
-FN_PREFIX(CryptonightR_instruction152):
-	imul	rbp, rsp
-FN_PREFIX(CryptonightR_instruction153):
-	imul	rbp, rsp
-FN_PREFIX(CryptonightR_instruction154):
-	imul	rbp, rsp
-FN_PREFIX(CryptonightR_instruction155):
-	add	rbp, rsp
-	add	rbp, 2147483647
-FN_PREFIX(CryptonightR_instruction156):
-	sub	rbp, rsp
-FN_PREFIX(CryptonightR_instruction157):
-	ror	ebp, cl
-FN_PREFIX(CryptonightR_instruction158):
-	rol	ebp, cl
-FN_PREFIX(CryptonightR_instruction159):
-	xor	rbp, rsp
-FN_PREFIX(CryptonightR_instruction160):
-	imul	rbx, r15
-FN_PREFIX(CryptonightR_instruction161):
-	imul	rbx, r15
-FN_PREFIX(CryptonightR_instruction162):
-	imul	rbx, r15
-FN_PREFIX(CryptonightR_instruction163):
-	add	rbx, r15
-	add	rbx, 2147483647
-FN_PREFIX(CryptonightR_instruction164):
-	sub	rbx, r15
-FN_PREFIX(CryptonightR_instruction165):
-	ror	ebx, cl
-FN_PREFIX(CryptonightR_instruction166):
-	rol	ebx, cl
-FN_PREFIX(CryptonightR_instruction167):
-	xor	rbx, r15
-FN_PREFIX(CryptonightR_instruction168):
-	imul	rsi, r15
-FN_PREFIX(CryptonightR_instruction169):
-	imul	rsi, r15
-FN_PREFIX(CryptonightR_instruction170):
-	imul	rsi, r15
-FN_PREFIX(CryptonightR_instruction171):
-	add	rsi, r15
-	add	rsi, 2147483647
-FN_PREFIX(CryptonightR_instruction172):
-	sub	rsi, r15
-FN_PREFIX(CryptonightR_instruction173):
-	ror	esi, cl
-FN_PREFIX(CryptonightR_instruction174):
-	rol	esi, cl
-FN_PREFIX(CryptonightR_instruction175):
-	xor	rsi, r15
-FN_PREFIX(CryptonightR_instruction176):
-	imul	rdi, r15
-FN_PREFIX(CryptonightR_instruction177):
-	imul	rdi, r15
-FN_PREFIX(CryptonightR_instruction178):
-	imul	rdi, r15
-FN_PREFIX(CryptonightR_instruction179):
-	add	rdi, r15
-	add	rdi, 2147483647
-FN_PREFIX(CryptonightR_instruction180):
-	sub	rdi, r15
-FN_PREFIX(CryptonightR_instruction181):
-	ror	edi, cl
-FN_PREFIX(CryptonightR_instruction182):
-	rol	edi, cl
-FN_PREFIX(CryptonightR_instruction183):
-	xor	rdi, r15
-FN_PREFIX(CryptonightR_instruction184):
-	imul	rbp, r15
-FN_PREFIX(CryptonightR_instruction185):
-	imul	rbp, r15
-FN_PREFIX(CryptonightR_instruction186):
-	imul	rbp, r15
-FN_PREFIX(CryptonightR_instruction187):
-	add	rbp, r15
-	add	rbp, 2147483647
-FN_PREFIX(CryptonightR_instruction188):
-	sub	rbp, r15
-FN_PREFIX(CryptonightR_instruction189):
-	ror	ebp, cl
-FN_PREFIX(CryptonightR_instruction190):
-	rol	ebp, cl
-FN_PREFIX(CryptonightR_instruction191):
-	xor	rbp, r15
-FN_PREFIX(CryptonightR_instruction192):
-	imul	rbx, rax
-FN_PREFIX(CryptonightR_instruction193):
-	imul	rbx, rax
-FN_PREFIX(CryptonightR_instruction194):
-	imul	rbx, rax
-FN_PREFIX(CryptonightR_instruction195):
-	add	rbx, rax
-	add	rbx, 2147483647
-FN_PREFIX(CryptonightR_instruction196):
-	sub	rbx, rax
-FN_PREFIX(CryptonightR_instruction197):
-	ror	ebx, cl
-FN_PREFIX(CryptonightR_instruction198):
-	rol	ebx, cl
-FN_PREFIX(CryptonightR_instruction199):
-	xor	rbx, rax
-FN_PREFIX(CryptonightR_instruction200):
-	imul	rsi, rax
-FN_PREFIX(CryptonightR_instruction201):
-	imul	rsi, rax
-FN_PREFIX(CryptonightR_instruction202):
-	imul	rsi, rax
-FN_PREFIX(CryptonightR_instruction203):
-	add	rsi, rax
-	add	rsi, 2147483647
-FN_PREFIX(CryptonightR_instruction204):
-	sub	rsi, rax
-FN_PREFIX(CryptonightR_instruction205):
-	ror	esi, cl
-FN_PREFIX(CryptonightR_instruction206):
-	rol	esi, cl
-FN_PREFIX(CryptonightR_instruction207):
-	xor	rsi, rax
-FN_PREFIX(CryptonightR_instruction208):
-	imul	rdi, rax
-FN_PREFIX(CryptonightR_instruction209):
-	imul	rdi, rax
-FN_PREFIX(CryptonightR_instruction210):
-	imul	rdi, rax
-FN_PREFIX(CryptonightR_instruction211):
-	add	rdi, rax
-	add	rdi, 2147483647
-FN_PREFIX(CryptonightR_instruction212):
-	sub	rdi, rax
-FN_PREFIX(CryptonightR_instruction213):
-	ror	edi, cl
-FN_PREFIX(CryptonightR_instruction214):
-	rol	edi, cl
-FN_PREFIX(CryptonightR_instruction215):
-	xor	rdi, rax
-FN_PREFIX(CryptonightR_instruction216):
-	imul	rbp, rax
-FN_PREFIX(CryptonightR_instruction217):
-	imul	rbp, rax
-FN_PREFIX(CryptonightR_instruction218):
-	imul	rbp, rax
-FN_PREFIX(CryptonightR_instruction219):
-	add	rbp, rax
-	add	rbp, 2147483647
-FN_PREFIX(CryptonightR_instruction220):
-	sub	rbp, rax
-FN_PREFIX(CryptonightR_instruction221):
-	ror	ebp, cl
-FN_PREFIX(CryptonightR_instruction222):
-	rol	ebp, cl
-FN_PREFIX(CryptonightR_instruction223):
-	xor	rbp, rax
-FN_PREFIX(CryptonightR_instruction224):
-	imul	rbx, rdx
-FN_PREFIX(CryptonightR_instruction225):
-	imul	rbx, rdx
-FN_PREFIX(CryptonightR_instruction226):
-	imul	rbx, rdx
-FN_PREFIX(CryptonightR_instruction227):
-	add	rbx, rdx
-	add	rbx, 2147483647
-FN_PREFIX(CryptonightR_instruction228):
-	sub	rbx, rdx
-FN_PREFIX(CryptonightR_instruction229):
-	ror	ebx, cl
-FN_PREFIX(CryptonightR_instruction230):
-	rol	ebx, cl
-FN_PREFIX(CryptonightR_instruction231):
-	xor	rbx, rdx
-FN_PREFIX(CryptonightR_instruction232):
-	imul	rsi, rdx
-FN_PREFIX(CryptonightR_instruction233):
-	imul	rsi, rdx
-FN_PREFIX(CryptonightR_instruction234):
-	imul	rsi, rdx
-FN_PREFIX(CryptonightR_instruction235):
-	add	rsi, rdx
-	add	rsi, 2147483647
-FN_PREFIX(CryptonightR_instruction236):
-	sub	rsi, rdx
-FN_PREFIX(CryptonightR_instruction237):
-	ror	esi, cl
-FN_PREFIX(CryptonightR_instruction238):
-	rol	esi, cl
-FN_PREFIX(CryptonightR_instruction239):
-	xor	rsi, rdx
-FN_PREFIX(CryptonightR_instruction240):
-	imul	rdi, rdx
-FN_PREFIX(CryptonightR_instruction241):
-	imul	rdi, rdx
-FN_PREFIX(CryptonightR_instruction242):
-	imul	rdi, rdx
-FN_PREFIX(CryptonightR_instruction243):
-	add	rdi, rdx
-	add	rdi, 2147483647
-FN_PREFIX(CryptonightR_instruction244):
-	sub	rdi, rdx
-FN_PREFIX(CryptonightR_instruction245):
-	ror	edi, cl
-FN_PREFIX(CryptonightR_instruction246):
-	rol	edi, cl
-FN_PREFIX(CryptonightR_instruction247):
-	xor	rdi, rdx
-FN_PREFIX(CryptonightR_instruction248):
-	imul	rbp, rdx
-FN_PREFIX(CryptonightR_instruction249):
-	imul	rbp, rdx
-FN_PREFIX(CryptonightR_instruction250):
-	imul	rbp, rdx
-FN_PREFIX(CryptonightR_instruction251):
-	add	rbp, rdx
-	add	rbp, 2147483647
-FN_PREFIX(CryptonightR_instruction252):
-	sub	rbp, rdx
-FN_PREFIX(CryptonightR_instruction253):
-	ror	ebp, cl
-FN_PREFIX(CryptonightR_instruction254):
-	rol	ebp, cl
-FN_PREFIX(CryptonightR_instruction255):
-	xor	rbp, rdx
-FN_PREFIX(CryptonightR_instruction256):
-	imul	rbx, rbx
-FN_PREFIX(CryptonightR_instruction_mov0):
-
-FN_PREFIX(CryptonightR_instruction_mov1):
-
-FN_PREFIX(CryptonightR_instruction_mov2):
-
-FN_PREFIX(CryptonightR_instruction_mov3):
-
-FN_PREFIX(CryptonightR_instruction_mov4):
-
-FN_PREFIX(CryptonightR_instruction_mov5):
-	mov	rcx, rbx
-FN_PREFIX(CryptonightR_instruction_mov6):
-	mov	rcx, rbx
-FN_PREFIX(CryptonightR_instruction_mov7):
-
-FN_PREFIX(CryptonightR_instruction_mov8):
-
-FN_PREFIX(CryptonightR_instruction_mov9):
-
-FN_PREFIX(CryptonightR_instruction_mov10):
-
-FN_PREFIX(CryptonightR_instruction_mov11):
-
-FN_PREFIX(CryptonightR_instruction_mov12):
-
-FN_PREFIX(CryptonightR_instruction_mov13):
-	mov	rcx, rbx
-FN_PREFIX(CryptonightR_instruction_mov14):
-	mov	rcx, rbx
-FN_PREFIX(CryptonightR_instruction_mov15):
-
-FN_PREFIX(CryptonightR_instruction_mov16):
-
-FN_PREFIX(CryptonightR_instruction_mov17):
-
-FN_PREFIX(CryptonightR_instruction_mov18):
-
-FN_PREFIX(CryptonightR_instruction_mov19):
-
-FN_PREFIX(CryptonightR_instruction_mov20):
-
-FN_PREFIX(CryptonightR_instruction_mov21):
-	mov	rcx, rbx
-FN_PREFIX(CryptonightR_instruction_mov22):
-	mov	rcx, rbx
-FN_PREFIX(CryptonightR_instruction_mov23):
-
-FN_PREFIX(CryptonightR_instruction_mov24):
-
-FN_PREFIX(CryptonightR_instruction_mov25):
-
-FN_PREFIX(CryptonightR_instruction_mov26):
-
-FN_PREFIX(CryptonightR_instruction_mov27):
-
-FN_PREFIX(CryptonightR_instruction_mov28):
-
-FN_PREFIX(CryptonightR_instruction_mov29):
-	mov	rcx, rbx
-FN_PREFIX(CryptonightR_instruction_mov30):
-	mov	rcx, rbx
-FN_PREFIX(CryptonightR_instruction_mov31):
-
-FN_PREFIX(CryptonightR_instruction_mov32):
-
-FN_PREFIX(CryptonightR_instruction_mov33):
-
-FN_PREFIX(CryptonightR_instruction_mov34):
-
-FN_PREFIX(CryptonightR_instruction_mov35):
-
-FN_PREFIX(CryptonightR_instruction_mov36):
-
-FN_PREFIX(CryptonightR_instruction_mov37):
-	mov	rcx, rsi
-FN_PREFIX(CryptonightR_instruction_mov38):
-	mov	rcx, rsi
-FN_PREFIX(CryptonightR_instruction_mov39):
-
-FN_PREFIX(CryptonightR_instruction_mov40):
-
-FN_PREFIX(CryptonightR_instruction_mov41):
-
-FN_PREFIX(CryptonightR_instruction_mov42):
-
-FN_PREFIX(CryptonightR_instruction_mov43):
-
-FN_PREFIX(CryptonightR_instruction_mov44):
-
-FN_PREFIX(CryptonightR_instruction_mov45):
-	mov	rcx, rsi
-FN_PREFIX(CryptonightR_instruction_mov46):
-	mov	rcx, rsi
-FN_PREFIX(CryptonightR_instruction_mov47):
-
-FN_PREFIX(CryptonightR_instruction_mov48):
-
-FN_PREFIX(CryptonightR_instruction_mov49):
-
-FN_PREFIX(CryptonightR_instruction_mov50):
-
-FN_PREFIX(CryptonightR_instruction_mov51):
-
-FN_PREFIX(CryptonightR_instruction_mov52):
-
-FN_PREFIX(CryptonightR_instruction_mov53):
-	mov	rcx, rsi
-FN_PREFIX(CryptonightR_instruction_mov54):
-	mov	rcx, rsi
-FN_PREFIX(CryptonightR_instruction_mov55):
-
-FN_PREFIX(CryptonightR_instruction_mov56):
-
-FN_PREFIX(CryptonightR_instruction_mov57):
-
-FN_PREFIX(CryptonightR_instruction_mov58):
-
-FN_PREFIX(CryptonightR_instruction_mov59):
-
-FN_PREFIX(CryptonightR_instruction_mov60):
-
-FN_PREFIX(CryptonightR_instruction_mov61):
-	mov	rcx, rsi
-FN_PREFIX(CryptonightR_instruction_mov62):
-	mov	rcx, rsi
-FN_PREFIX(CryptonightR_instruction_mov63):
-
-FN_PREFIX(CryptonightR_instruction_mov64):
-
-FN_PREFIX(CryptonightR_instruction_mov65):
-
-FN_PREFIX(CryptonightR_instruction_mov66):
-
-FN_PREFIX(CryptonightR_instruction_mov67):
-
-FN_PREFIX(CryptonightR_instruction_mov68):
-
-FN_PREFIX(CryptonightR_instruction_mov69):
-	mov	rcx, rdi
-FN_PREFIX(CryptonightR_instruction_mov70):
-	mov	rcx, rdi
-FN_PREFIX(CryptonightR_instruction_mov71):
-
-FN_PREFIX(CryptonightR_instruction_mov72):
-
-FN_PREFIX(CryptonightR_instruction_mov73):
-
-FN_PREFIX(CryptonightR_instruction_mov74):
-
-FN_PREFIX(CryptonightR_instruction_mov75):
-
-FN_PREFIX(CryptonightR_instruction_mov76):
-
-FN_PREFIX(CryptonightR_instruction_mov77):
-	mov	rcx, rdi
-FN_PREFIX(CryptonightR_instruction_mov78):
-	mov	rcx, rdi
-FN_PREFIX(CryptonightR_instruction_mov79):
-
-FN_PREFIX(CryptonightR_instruction_mov80):
-
-FN_PREFIX(CryptonightR_instruction_mov81):
-
-FN_PREFIX(CryptonightR_instruction_mov82):
-
-FN_PREFIX(CryptonightR_instruction_mov83):
-
-FN_PREFIX(CryptonightR_instruction_mov84):
-
-FN_PREFIX(CryptonightR_instruction_mov85):
-	mov	rcx, rdi
-FN_PREFIX(CryptonightR_instruction_mov86):
-	mov	rcx, rdi
-FN_PREFIX(CryptonightR_instruction_mov87):
-
-FN_PREFIX(CryptonightR_instruction_mov88):
-
-FN_PREFIX(CryptonightR_instruction_mov89):
-
-FN_PREFIX(CryptonightR_instruction_mov90):
-
-FN_PREFIX(CryptonightR_instruction_mov91):
-
-FN_PREFIX(CryptonightR_instruction_mov92):
-
-FN_PREFIX(CryptonightR_instruction_mov93):
-	mov	rcx, rdi
-FN_PREFIX(CryptonightR_instruction_mov94):
-	mov	rcx, rdi
-FN_PREFIX(CryptonightR_instruction_mov95):
-
-FN_PREFIX(CryptonightR_instruction_mov96):
-
-FN_PREFIX(CryptonightR_instruction_mov97):
-
-FN_PREFIX(CryptonightR_instruction_mov98):
-
-FN_PREFIX(CryptonightR_instruction_mov99):
-
-FN_PREFIX(CryptonightR_instruction_mov100):
-
-FN_PREFIX(CryptonightR_instruction_mov101):
-	mov	rcx, rbp
-FN_PREFIX(CryptonightR_instruction_mov102):
-	mov	rcx, rbp
-FN_PREFIX(CryptonightR_instruction_mov103):
-
-FN_PREFIX(CryptonightR_instruction_mov104):
-
-FN_PREFIX(CryptonightR_instruction_mov105):
-
-FN_PREFIX(CryptonightR_instruction_mov106):
-
-FN_PREFIX(CryptonightR_instruction_mov107):
-
-FN_PREFIX(CryptonightR_instruction_mov108):
-
-FN_PREFIX(CryptonightR_instruction_mov109):
-	mov	rcx, rbp
-FN_PREFIX(CryptonightR_instruction_mov110):
-	mov	rcx, rbp
-FN_PREFIX(CryptonightR_instruction_mov111):
-
-FN_PREFIX(CryptonightR_instruction_mov112):
-
-FN_PREFIX(CryptonightR_instruction_mov113):
-
-FN_PREFIX(CryptonightR_instruction_mov114):
-
-FN_PREFIX(CryptonightR_instruction_mov115):
-
-FN_PREFIX(CryptonightR_instruction_mov116):
-
-FN_PREFIX(CryptonightR_instruction_mov117):
-	mov	rcx, rbp
-FN_PREFIX(CryptonightR_instruction_mov118):
-	mov	rcx, rbp
-FN_PREFIX(CryptonightR_instruction_mov119):
-
-FN_PREFIX(CryptonightR_instruction_mov120):
-
-FN_PREFIX(CryptonightR_instruction_mov121):
-
-FN_PREFIX(CryptonightR_instruction_mov122):
-
-FN_PREFIX(CryptonightR_instruction_mov123):
-
-FN_PREFIX(CryptonightR_instruction_mov124):
-
-FN_PREFIX(CryptonightR_instruction_mov125):
-	mov	rcx, rbp
-FN_PREFIX(CryptonightR_instruction_mov126):
-	mov	rcx, rbp
-FN_PREFIX(CryptonightR_instruction_mov127):
-
-FN_PREFIX(CryptonightR_instruction_mov128):
-
-FN_PREFIX(CryptonightR_instruction_mov129):
-
-FN_PREFIX(CryptonightR_instruction_mov130):
-
-FN_PREFIX(CryptonightR_instruction_mov131):
-
-FN_PREFIX(CryptonightR_instruction_mov132):
-
-FN_PREFIX(CryptonightR_instruction_mov133):
-	mov	rcx, rsp
-FN_PREFIX(CryptonightR_instruction_mov134):
-	mov	rcx, rsp
-FN_PREFIX(CryptonightR_instruction_mov135):
-
-FN_PREFIX(CryptonightR_instruction_mov136):
-
-FN_PREFIX(CryptonightR_instruction_mov137):
-
-FN_PREFIX(CryptonightR_instruction_mov138):
-
-FN_PREFIX(CryptonightR_instruction_mov139):
-
-FN_PREFIX(CryptonightR_instruction_mov140):
-
-FN_PREFIX(CryptonightR_instruction_mov141):
-	mov	rcx, rsp
-FN_PREFIX(CryptonightR_instruction_mov142):
-	mov	rcx, rsp
-FN_PREFIX(CryptonightR_instruction_mov143):
-
-FN_PREFIX(CryptonightR_instruction_mov144):
-
-FN_PREFIX(CryptonightR_instruction_mov145):
-
-FN_PREFIX(CryptonightR_instruction_mov146):
-
-FN_PREFIX(CryptonightR_instruction_mov147):
-
-FN_PREFIX(CryptonightR_instruction_mov148):
-
-FN_PREFIX(CryptonightR_instruction_mov149):
-	mov	rcx, rsp
-FN_PREFIX(CryptonightR_instruction_mov150):
-	mov	rcx, rsp
-FN_PREFIX(CryptonightR_instruction_mov151):
-
-FN_PREFIX(CryptonightR_instruction_mov152):
-
-FN_PREFIX(CryptonightR_instruction_mov153):
-
-FN_PREFIX(CryptonightR_instruction_mov154):
-
-FN_PREFIX(CryptonightR_instruction_mov155):
-
-FN_PREFIX(CryptonightR_instruction_mov156):
-
-FN_PREFIX(CryptonightR_instruction_mov157):
-	mov	rcx, rsp
-FN_PREFIX(CryptonightR_instruction_mov158):
-	mov	rcx, rsp
-FN_PREFIX(CryptonightR_instruction_mov159):
-
-FN_PREFIX(CryptonightR_instruction_mov160):
-
-FN_PREFIX(CryptonightR_instruction_mov161):
-
-FN_PREFIX(CryptonightR_instruction_mov162):
-
-FN_PREFIX(CryptonightR_instruction_mov163):
-
-FN_PREFIX(CryptonightR_instruction_mov164):
-
-FN_PREFIX(CryptonightR_instruction_mov165):
-	mov	rcx, r15
-FN_PREFIX(CryptonightR_instruction_mov166):
-	mov	rcx, r15
-FN_PREFIX(CryptonightR_instruction_mov167):
-
-FN_PREFIX(CryptonightR_instruction_mov168):
-
-FN_PREFIX(CryptonightR_instruction_mov169):
-
-FN_PREFIX(CryptonightR_instruction_mov170):
-
-FN_PREFIX(CryptonightR_instruction_mov171):
-
-FN_PREFIX(CryptonightR_instruction_mov172):
-
-FN_PREFIX(CryptonightR_instruction_mov173):
-	mov	rcx, r15
-FN_PREFIX(CryptonightR_instruction_mov174):
-	mov	rcx, r15
-FN_PREFIX(CryptonightR_instruction_mov175):
-
-FN_PREFIX(CryptonightR_instruction_mov176):
-
-FN_PREFIX(CryptonightR_instruction_mov177):
-
-FN_PREFIX(CryptonightR_instruction_mov178):
-
-FN_PREFIX(CryptonightR_instruction_mov179):
-
-FN_PREFIX(CryptonightR_instruction_mov180):
-
-FN_PREFIX(CryptonightR_instruction_mov181):
-	mov	rcx, r15
-FN_PREFIX(CryptonightR_instruction_mov182):
-	mov	rcx, r15
-FN_PREFIX(CryptonightR_instruction_mov183):
-
-FN_PREFIX(CryptonightR_instruction_mov184):
-
-FN_PREFIX(CryptonightR_instruction_mov185):
-
-FN_PREFIX(CryptonightR_instruction_mov186):
-
-FN_PREFIX(CryptonightR_instruction_mov187):
-
-FN_PREFIX(CryptonightR_instruction_mov188):
-
-FN_PREFIX(CryptonightR_instruction_mov189):
-	mov	rcx, r15
-FN_PREFIX(CryptonightR_instruction_mov190):
-	mov	rcx, r15
-FN_PREFIX(CryptonightR_instruction_mov191):
-
-FN_PREFIX(CryptonightR_instruction_mov192):
-
-FN_PREFIX(CryptonightR_instruction_mov193):
-
-FN_PREFIX(CryptonightR_instruction_mov194):
-
-FN_PREFIX(CryptonightR_instruction_mov195):
-
-FN_PREFIX(CryptonightR_instruction_mov196):
-
-FN_PREFIX(CryptonightR_instruction_mov197):
-	mov	rcx, rax
-FN_PREFIX(CryptonightR_instruction_mov198):
-	mov	rcx, rax
-FN_PREFIX(CryptonightR_instruction_mov199):
-
-FN_PREFIX(CryptonightR_instruction_mov200):
-
-FN_PREFIX(CryptonightR_instruction_mov201):
-
-FN_PREFIX(CryptonightR_instruction_mov202):
-
-FN_PREFIX(CryptonightR_instruction_mov203):
-
-FN_PREFIX(CryptonightR_instruction_mov204):
-
-FN_PREFIX(CryptonightR_instruction_mov205):
-	mov	rcx, rax
-FN_PREFIX(CryptonightR_instruction_mov206):
-	mov	rcx, rax
-FN_PREFIX(CryptonightR_instruction_mov207):
-
-FN_PREFIX(CryptonightR_instruction_mov208):
-
-FN_PREFIX(CryptonightR_instruction_mov209):
-
-FN_PREFIX(CryptonightR_instruction_mov210):
-
-FN_PREFIX(CryptonightR_instruction_mov211):
-
-FN_PREFIX(CryptonightR_instruction_mov212):
-
-FN_PREFIX(CryptonightR_instruction_mov213):
-	mov	rcx, rax
-FN_PREFIX(CryptonightR_instruction_mov214):
-	mov	rcx, rax
-FN_PREFIX(CryptonightR_instruction_mov215):
-
-FN_PREFIX(CryptonightR_instruction_mov216):
-
-FN_PREFIX(CryptonightR_instruction_mov217):
-
-FN_PREFIX(CryptonightR_instruction_mov218):
-
-FN_PREFIX(CryptonightR_instruction_mov219):
-
-FN_PREFIX(CryptonightR_instruction_mov220):
-
-FN_PREFIX(CryptonightR_instruction_mov221):
-	mov	rcx, rax
-FN_PREFIX(CryptonightR_instruction_mov222):
-	mov	rcx, rax
-FN_PREFIX(CryptonightR_instruction_mov223):
-
-FN_PREFIX(CryptonightR_instruction_mov224):
-
-FN_PREFIX(CryptonightR_instruction_mov225):
-
-FN_PREFIX(CryptonightR_instruction_mov226):
-
-FN_PREFIX(CryptonightR_instruction_mov227):
-
-FN_PREFIX(CryptonightR_instruction_mov228):
-
-FN_PREFIX(CryptonightR_instruction_mov229):
-	mov	rcx, rdx
-FN_PREFIX(CryptonightR_instruction_mov230):
-	mov	rcx, rdx
-FN_PREFIX(CryptonightR_instruction_mov231):
-
-FN_PREFIX(CryptonightR_instruction_mov232):
-
-FN_PREFIX(CryptonightR_instruction_mov233):
-
-FN_PREFIX(CryptonightR_instruction_mov234):
-
-FN_PREFIX(CryptonightR_instruction_mov235):
-
-FN_PREFIX(CryptonightR_instruction_mov236):
-
-FN_PREFIX(CryptonightR_instruction_mov237):
-	mov	rcx, rdx
-FN_PREFIX(CryptonightR_instruction_mov238):
-	mov	rcx, rdx
-FN_PREFIX(CryptonightR_instruction_mov239):
-
-FN_PREFIX(CryptonightR_instruction_mov240):
-
-FN_PREFIX(CryptonightR_instruction_mov241):
-
-FN_PREFIX(CryptonightR_instruction_mov242):
-
-FN_PREFIX(CryptonightR_instruction_mov243):
-
-FN_PREFIX(CryptonightR_instruction_mov244):
-
-FN_PREFIX(CryptonightR_instruction_mov245):
-	mov	rcx, rdx
-FN_PREFIX(CryptonightR_instruction_mov246):
-	mov	rcx, rdx
-FN_PREFIX(CryptonightR_instruction_mov247):
-
-FN_PREFIX(CryptonightR_instruction_mov248):
-
-FN_PREFIX(CryptonightR_instruction_mov249):
-
-FN_PREFIX(CryptonightR_instruction_mov250):
-
-FN_PREFIX(CryptonightR_instruction_mov251):
-
-FN_PREFIX(CryptonightR_instruction_mov252):
-
-FN_PREFIX(CryptonightR_instruction_mov253):
-	mov	rcx, rdx
-FN_PREFIX(CryptonightR_instruction_mov254):
-	mov	rcx, rdx
-FN_PREFIX(CryptonightR_instruction_mov255):
-
-FN_PREFIX(CryptonightR_instruction_mov256):
diff --git a/src/crypto/asm/CryptonightR_template.asm b/src/crypto/asm/CryptonightR_template.asm
deleted file mode 100644
index 250eca3d..00000000
--- a/src/crypto/asm/CryptonightR_template.asm
+++ /dev/null
@@ -1,1585 +0,0 @@
-; Auto-generated file, do not edit
-
-_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE
-PUBLIC CryptonightR_instruction0
-PUBLIC CryptonightR_instruction1
-PUBLIC CryptonightR_instruction2
-PUBLIC CryptonightR_instruction3
-PUBLIC CryptonightR_instruction4
-PUBLIC CryptonightR_instruction5
-PUBLIC CryptonightR_instruction6
-PUBLIC CryptonightR_instruction7
-PUBLIC CryptonightR_instruction8
-PUBLIC CryptonightR_instruction9
-PUBLIC CryptonightR_instruction10
-PUBLIC CryptonightR_instruction11
-PUBLIC CryptonightR_instruction12
-PUBLIC CryptonightR_instruction13
-PUBLIC CryptonightR_instruction14
-PUBLIC CryptonightR_instruction15
-PUBLIC CryptonightR_instruction16
-PUBLIC CryptonightR_instruction17
-PUBLIC CryptonightR_instruction18
-PUBLIC CryptonightR_instruction19
-PUBLIC CryptonightR_instruction20
-PUBLIC CryptonightR_instruction21
-PUBLIC CryptonightR_instruction22
-PUBLIC CryptonightR_instruction23
-PUBLIC CryptonightR_instruction24
-PUBLIC CryptonightR_instruction25
-PUBLIC CryptonightR_instruction26
-PUBLIC CryptonightR_instruction27
-PUBLIC CryptonightR_instruction28
-PUBLIC CryptonightR_instruction29
-PUBLIC CryptonightR_instruction30
-PUBLIC CryptonightR_instruction31
-PUBLIC CryptonightR_instruction32
-PUBLIC CryptonightR_instruction33
-PUBLIC CryptonightR_instruction34
-PUBLIC CryptonightR_instruction35
-PUBLIC CryptonightR_instruction36
-PUBLIC CryptonightR_instruction37
-PUBLIC CryptonightR_instruction38
-PUBLIC CryptonightR_instruction39
-PUBLIC CryptonightR_instruction40
-PUBLIC CryptonightR_instruction41
-PUBLIC CryptonightR_instruction42
-PUBLIC CryptonightR_instruction43
-PUBLIC CryptonightR_instruction44
-PUBLIC CryptonightR_instruction45
-PUBLIC CryptonightR_instruction46
-PUBLIC CryptonightR_instruction47
-PUBLIC CryptonightR_instruction48
-PUBLIC CryptonightR_instruction49
-PUBLIC CryptonightR_instruction50
-PUBLIC CryptonightR_instruction51
-PUBLIC CryptonightR_instruction52
-PUBLIC CryptonightR_instruction53
-PUBLIC CryptonightR_instruction54
-PUBLIC CryptonightR_instruction55
-PUBLIC CryptonightR_instruction56
-PUBLIC CryptonightR_instruction57
-PUBLIC CryptonightR_instruction58
-PUBLIC CryptonightR_instruction59
-PUBLIC CryptonightR_instruction60
-PUBLIC CryptonightR_instruction61
-PUBLIC CryptonightR_instruction62
-PUBLIC CryptonightR_instruction63
-PUBLIC CryptonightR_instruction64
-PUBLIC CryptonightR_instruction65
-PUBLIC CryptonightR_instruction66
-PUBLIC CryptonightR_instruction67
-PUBLIC CryptonightR_instruction68
-PUBLIC CryptonightR_instruction69
-PUBLIC CryptonightR_instruction70
-PUBLIC CryptonightR_instruction71
-PUBLIC CryptonightR_instruction72
-PUBLIC CryptonightR_instruction73
-PUBLIC CryptonightR_instruction74
-PUBLIC CryptonightR_instruction75
-PUBLIC CryptonightR_instruction76
-PUBLIC CryptonightR_instruction77
-PUBLIC CryptonightR_instruction78
-PUBLIC CryptonightR_instruction79
-PUBLIC CryptonightR_instruction80
-PUBLIC CryptonightR_instruction81
-PUBLIC CryptonightR_instruction82
-PUBLIC CryptonightR_instruction83
-PUBLIC CryptonightR_instruction84
-PUBLIC CryptonightR_instruction85
-PUBLIC CryptonightR_instruction86
-PUBLIC CryptonightR_instruction87
-PUBLIC CryptonightR_instruction88
-PUBLIC CryptonightR_instruction89
-PUBLIC CryptonightR_instruction90
-PUBLIC CryptonightR_instruction91
-PUBLIC CryptonightR_instruction92
-PUBLIC CryptonightR_instruction93
-PUBLIC CryptonightR_instruction94
-PUBLIC CryptonightR_instruction95
-PUBLIC CryptonightR_instruction96
-PUBLIC CryptonightR_instruction97
-PUBLIC CryptonightR_instruction98
-PUBLIC CryptonightR_instruction99
-PUBLIC CryptonightR_instruction100
-PUBLIC CryptonightR_instruction101
-PUBLIC CryptonightR_instruction102
-PUBLIC CryptonightR_instruction103
-PUBLIC CryptonightR_instruction104
-PUBLIC CryptonightR_instruction105
-PUBLIC CryptonightR_instruction106
-PUBLIC CryptonightR_instruction107
-PUBLIC CryptonightR_instruction108
-PUBLIC CryptonightR_instruction109
-PUBLIC CryptonightR_instruction110
-PUBLIC CryptonightR_instruction111
-PUBLIC CryptonightR_instruction112
-PUBLIC CryptonightR_instruction113
-PUBLIC CryptonightR_instruction114
-PUBLIC CryptonightR_instruction115
-PUBLIC CryptonightR_instruction116
-PUBLIC CryptonightR_instruction117
-PUBLIC CryptonightR_instruction118
-PUBLIC CryptonightR_instruction119
-PUBLIC CryptonightR_instruction120
-PUBLIC CryptonightR_instruction121
-PUBLIC CryptonightR_instruction122
-PUBLIC CryptonightR_instruction123
-PUBLIC CryptonightR_instruction124
-PUBLIC CryptonightR_instruction125
-PUBLIC CryptonightR_instruction126
-PUBLIC CryptonightR_instruction127
-PUBLIC CryptonightR_instruction128
-PUBLIC CryptonightR_instruction129
-PUBLIC CryptonightR_instruction130
-PUBLIC CryptonightR_instruction131
-PUBLIC CryptonightR_instruction132
-PUBLIC CryptonightR_instruction133
-PUBLIC CryptonightR_instruction134
-PUBLIC CryptonightR_instruction135
-PUBLIC CryptonightR_instruction136
-PUBLIC CryptonightR_instruction137
-PUBLIC CryptonightR_instruction138
-PUBLIC CryptonightR_instruction139
-PUBLIC CryptonightR_instruction140
-PUBLIC CryptonightR_instruction141
-PUBLIC CryptonightR_instruction142
-PUBLIC CryptonightR_instruction143
-PUBLIC CryptonightR_instruction144
-PUBLIC CryptonightR_instruction145
-PUBLIC CryptonightR_instruction146
-PUBLIC CryptonightR_instruction147
-PUBLIC CryptonightR_instruction148
-PUBLIC CryptonightR_instruction149
-PUBLIC CryptonightR_instruction150
-PUBLIC CryptonightR_instruction151
-PUBLIC CryptonightR_instruction152
-PUBLIC CryptonightR_instruction153
-PUBLIC CryptonightR_instruction154
-PUBLIC CryptonightR_instruction155
-PUBLIC CryptonightR_instruction156
-PUBLIC CryptonightR_instruction157
-PUBLIC CryptonightR_instruction158
-PUBLIC CryptonightR_instruction159
-PUBLIC CryptonightR_instruction160
-PUBLIC CryptonightR_instruction161
-PUBLIC CryptonightR_instruction162
-PUBLIC CryptonightR_instruction163
-PUBLIC CryptonightR_instruction164
-PUBLIC CryptonightR_instruction165
-PUBLIC CryptonightR_instruction166
-PUBLIC CryptonightR_instruction167
-PUBLIC CryptonightR_instruction168
-PUBLIC CryptonightR_instruction169
-PUBLIC CryptonightR_instruction170
-PUBLIC CryptonightR_instruction171
-PUBLIC CryptonightR_instruction172
-PUBLIC CryptonightR_instruction173
-PUBLIC CryptonightR_instruction174
-PUBLIC CryptonightR_instruction175
-PUBLIC CryptonightR_instruction176
-PUBLIC CryptonightR_instruction177
-PUBLIC CryptonightR_instruction178
-PUBLIC CryptonightR_instruction179
-PUBLIC CryptonightR_instruction180
-PUBLIC CryptonightR_instruction181
-PUBLIC CryptonightR_instruction182
-PUBLIC CryptonightR_instruction183
-PUBLIC CryptonightR_instruction184
-PUBLIC CryptonightR_instruction185
-PUBLIC CryptonightR_instruction186
-PUBLIC CryptonightR_instruction187
-PUBLIC CryptonightR_instruction188
-PUBLIC CryptonightR_instruction189
-PUBLIC CryptonightR_instruction190
-PUBLIC CryptonightR_instruction191
-PUBLIC CryptonightR_instruction192
-PUBLIC CryptonightR_instruction193
-PUBLIC CryptonightR_instruction194
-PUBLIC CryptonightR_instruction195
-PUBLIC CryptonightR_instruction196
-PUBLIC CryptonightR_instruction197
-PUBLIC CryptonightR_instruction198
-PUBLIC CryptonightR_instruction199
-PUBLIC CryptonightR_instruction200
-PUBLIC CryptonightR_instruction201
-PUBLIC CryptonightR_instruction202
-PUBLIC CryptonightR_instruction203
-PUBLIC CryptonightR_instruction204
-PUBLIC CryptonightR_instruction205
-PUBLIC CryptonightR_instruction206
-PUBLIC CryptonightR_instruction207
-PUBLIC CryptonightR_instruction208
-PUBLIC CryptonightR_instruction209
-PUBLIC CryptonightR_instruction210
-PUBLIC CryptonightR_instruction211
-PUBLIC CryptonightR_instruction212
-PUBLIC CryptonightR_instruction213
-PUBLIC CryptonightR_instruction214
-PUBLIC CryptonightR_instruction215
-PUBLIC CryptonightR_instruction216
-PUBLIC CryptonightR_instruction217
-PUBLIC CryptonightR_instruction218
-PUBLIC CryptonightR_instruction219
-PUBLIC CryptonightR_instruction220
-PUBLIC CryptonightR_instruction221
-PUBLIC CryptonightR_instruction222
-PUBLIC CryptonightR_instruction223
-PUBLIC CryptonightR_instruction224
-PUBLIC CryptonightR_instruction225
-PUBLIC CryptonightR_instruction226
-PUBLIC CryptonightR_instruction227
-PUBLIC CryptonightR_instruction228
-PUBLIC CryptonightR_instruction229
-PUBLIC CryptonightR_instruction230
-PUBLIC CryptonightR_instruction231
-PUBLIC CryptonightR_instruction232
-PUBLIC CryptonightR_instruction233
-PUBLIC CryptonightR_instruction234
-PUBLIC CryptonightR_instruction235
-PUBLIC CryptonightR_instruction236
-PUBLIC CryptonightR_instruction237
-PUBLIC CryptonightR_instruction238
-PUBLIC CryptonightR_instruction239
-PUBLIC CryptonightR_instruction240
-PUBLIC CryptonightR_instruction241
-PUBLIC CryptonightR_instruction242
-PUBLIC CryptonightR_instruction243
-PUBLIC CryptonightR_instruction244
-PUBLIC CryptonightR_instruction245
-PUBLIC CryptonightR_instruction246
-PUBLIC CryptonightR_instruction247
-PUBLIC CryptonightR_instruction248
-PUBLIC CryptonightR_instruction249
-PUBLIC CryptonightR_instruction250
-PUBLIC CryptonightR_instruction251
-PUBLIC CryptonightR_instruction252
-PUBLIC CryptonightR_instruction253
-PUBLIC CryptonightR_instruction254
-PUBLIC CryptonightR_instruction255
-PUBLIC CryptonightR_instruction256
-PUBLIC CryptonightR_instruction_mov0
-PUBLIC CryptonightR_instruction_mov1
-PUBLIC CryptonightR_instruction_mov2
-PUBLIC CryptonightR_instruction_mov3
-PUBLIC CryptonightR_instruction_mov4
-PUBLIC CryptonightR_instruction_mov5
-PUBLIC CryptonightR_instruction_mov6
-PUBLIC CryptonightR_instruction_mov7
-PUBLIC CryptonightR_instruction_mov8
-PUBLIC CryptonightR_instruction_mov9
-PUBLIC CryptonightR_instruction_mov10
-PUBLIC CryptonightR_instruction_mov11
-PUBLIC CryptonightR_instruction_mov12
-PUBLIC CryptonightR_instruction_mov13
-PUBLIC CryptonightR_instruction_mov14
-PUBLIC CryptonightR_instruction_mov15
-PUBLIC CryptonightR_instruction_mov16
-PUBLIC CryptonightR_instruction_mov17
-PUBLIC CryptonightR_instruction_mov18
-PUBLIC CryptonightR_instruction_mov19
-PUBLIC CryptonightR_instruction_mov20
-PUBLIC CryptonightR_instruction_mov21
-PUBLIC CryptonightR_instruction_mov22
-PUBLIC CryptonightR_instruction_mov23
-PUBLIC CryptonightR_instruction_mov24
-PUBLIC CryptonightR_instruction_mov25
-PUBLIC CryptonightR_instruction_mov26
-PUBLIC CryptonightR_instruction_mov27
-PUBLIC CryptonightR_instruction_mov28
-PUBLIC CryptonightR_instruction_mov29
-PUBLIC CryptonightR_instruction_mov30
-PUBLIC CryptonightR_instruction_mov31
-PUBLIC CryptonightR_instruction_mov32
-PUBLIC CryptonightR_instruction_mov33
-PUBLIC CryptonightR_instruction_mov34
-PUBLIC CryptonightR_instruction_mov35
-PUBLIC CryptonightR_instruction_mov36
-PUBLIC CryptonightR_instruction_mov37
-PUBLIC CryptonightR_instruction_mov38
-PUBLIC CryptonightR_instruction_mov39
-PUBLIC CryptonightR_instruction_mov40
-PUBLIC CryptonightR_instruction_mov41
-PUBLIC CryptonightR_instruction_mov42
-PUBLIC CryptonightR_instruction_mov43
-PUBLIC CryptonightR_instruction_mov44
-PUBLIC CryptonightR_instruction_mov45
-PUBLIC CryptonightR_instruction_mov46
-PUBLIC CryptonightR_instruction_mov47
-PUBLIC CryptonightR_instruction_mov48
-PUBLIC CryptonightR_instruction_mov49
-PUBLIC CryptonightR_instruction_mov50
-PUBLIC CryptonightR_instruction_mov51
-PUBLIC CryptonightR_instruction_mov52
-PUBLIC CryptonightR_instruction_mov53
-PUBLIC CryptonightR_instruction_mov54
-PUBLIC CryptonightR_instruction_mov55
-PUBLIC CryptonightR_instruction_mov56
-PUBLIC CryptonightR_instruction_mov57
-PUBLIC CryptonightR_instruction_mov58
-PUBLIC CryptonightR_instruction_mov59
-PUBLIC CryptonightR_instruction_mov60
-PUBLIC CryptonightR_instruction_mov61
-PUBLIC CryptonightR_instruction_mov62
-PUBLIC CryptonightR_instruction_mov63
-PUBLIC CryptonightR_instruction_mov64
-PUBLIC CryptonightR_instruction_mov65
-PUBLIC CryptonightR_instruction_mov66
-PUBLIC CryptonightR_instruction_mov67
-PUBLIC CryptonightR_instruction_mov68
-PUBLIC CryptonightR_instruction_mov69
-PUBLIC CryptonightR_instruction_mov70
-PUBLIC CryptonightR_instruction_mov71
-PUBLIC CryptonightR_instruction_mov72
-PUBLIC CryptonightR_instruction_mov73
-PUBLIC CryptonightR_instruction_mov74
-PUBLIC CryptonightR_instruction_mov75
-PUBLIC CryptonightR_instruction_mov76
-PUBLIC CryptonightR_instruction_mov77
-PUBLIC CryptonightR_instruction_mov78
-PUBLIC CryptonightR_instruction_mov79
-PUBLIC CryptonightR_instruction_mov80
-PUBLIC CryptonightR_instruction_mov81
-PUBLIC CryptonightR_instruction_mov82
-PUBLIC CryptonightR_instruction_mov83
-PUBLIC CryptonightR_instruction_mov84
-PUBLIC CryptonightR_instruction_mov85
-PUBLIC CryptonightR_instruction_mov86
-PUBLIC CryptonightR_instruction_mov87
-PUBLIC CryptonightR_instruction_mov88
-PUBLIC CryptonightR_instruction_mov89
-PUBLIC CryptonightR_instruction_mov90
-PUBLIC CryptonightR_instruction_mov91
-PUBLIC CryptonightR_instruction_mov92
-PUBLIC CryptonightR_instruction_mov93
-PUBLIC CryptonightR_instruction_mov94
-PUBLIC CryptonightR_instruction_mov95
-PUBLIC CryptonightR_instruction_mov96
-PUBLIC CryptonightR_instruction_mov97
-PUBLIC CryptonightR_instruction_mov98
-PUBLIC CryptonightR_instruction_mov99
-PUBLIC CryptonightR_instruction_mov100
-PUBLIC CryptonightR_instruction_mov101
-PUBLIC CryptonightR_instruction_mov102
-PUBLIC CryptonightR_instruction_mov103
-PUBLIC CryptonightR_instruction_mov104
-PUBLIC CryptonightR_instruction_mov105
-PUBLIC CryptonightR_instruction_mov106
-PUBLIC CryptonightR_instruction_mov107
-PUBLIC CryptonightR_instruction_mov108
-PUBLIC CryptonightR_instruction_mov109
-PUBLIC CryptonightR_instruction_mov110
-PUBLIC CryptonightR_instruction_mov111
-PUBLIC CryptonightR_instruction_mov112
-PUBLIC CryptonightR_instruction_mov113
-PUBLIC CryptonightR_instruction_mov114
-PUBLIC CryptonightR_instruction_mov115
-PUBLIC CryptonightR_instruction_mov116
-PUBLIC CryptonightR_instruction_mov117
-PUBLIC CryptonightR_instruction_mov118
-PUBLIC CryptonightR_instruction_mov119
-PUBLIC CryptonightR_instruction_mov120
-PUBLIC CryptonightR_instruction_mov121
-PUBLIC CryptonightR_instruction_mov122
-PUBLIC CryptonightR_instruction_mov123
-PUBLIC CryptonightR_instruction_mov124
-PUBLIC CryptonightR_instruction_mov125
-PUBLIC CryptonightR_instruction_mov126
-PUBLIC CryptonightR_instruction_mov127
-PUBLIC CryptonightR_instruction_mov128
-PUBLIC CryptonightR_instruction_mov129
-PUBLIC CryptonightR_instruction_mov130
-PUBLIC CryptonightR_instruction_mov131
-PUBLIC CryptonightR_instruction_mov132
-PUBLIC CryptonightR_instruction_mov133
-PUBLIC CryptonightR_instruction_mov134
-PUBLIC CryptonightR_instruction_mov135
-PUBLIC CryptonightR_instruction_mov136
-PUBLIC CryptonightR_instruction_mov137
-PUBLIC CryptonightR_instruction_mov138
-PUBLIC CryptonightR_instruction_mov139
-PUBLIC CryptonightR_instruction_mov140
-PUBLIC CryptonightR_instruction_mov141
-PUBLIC CryptonightR_instruction_mov142
-PUBLIC CryptonightR_instruction_mov143
-PUBLIC CryptonightR_instruction_mov144
-PUBLIC CryptonightR_instruction_mov145
-PUBLIC CryptonightR_instruction_mov146
-PUBLIC CryptonightR_instruction_mov147
-PUBLIC CryptonightR_instruction_mov148
-PUBLIC CryptonightR_instruction_mov149
-PUBLIC CryptonightR_instruction_mov150
-PUBLIC CryptonightR_instruction_mov151
-PUBLIC CryptonightR_instruction_mov152
-PUBLIC CryptonightR_instruction_mov153
-PUBLIC CryptonightR_instruction_mov154
-PUBLIC CryptonightR_instruction_mov155
-PUBLIC CryptonightR_instruction_mov156
-PUBLIC CryptonightR_instruction_mov157
-PUBLIC CryptonightR_instruction_mov158
-PUBLIC CryptonightR_instruction_mov159
-PUBLIC CryptonightR_instruction_mov160
-PUBLIC CryptonightR_instruction_mov161
-PUBLIC CryptonightR_instruction_mov162
-PUBLIC CryptonightR_instruction_mov163
-PUBLIC CryptonightR_instruction_mov164
-PUBLIC CryptonightR_instruction_mov165
-PUBLIC CryptonightR_instruction_mov166
-PUBLIC CryptonightR_instruction_mov167
-PUBLIC CryptonightR_instruction_mov168
-PUBLIC CryptonightR_instruction_mov169
-PUBLIC CryptonightR_instruction_mov170
-PUBLIC CryptonightR_instruction_mov171
-PUBLIC CryptonightR_instruction_mov172
-PUBLIC CryptonightR_instruction_mov173
-PUBLIC CryptonightR_instruction_mov174
-PUBLIC CryptonightR_instruction_mov175
-PUBLIC CryptonightR_instruction_mov176
-PUBLIC CryptonightR_instruction_mov177
-PUBLIC CryptonightR_instruction_mov178
-PUBLIC CryptonightR_instruction_mov179
-PUBLIC CryptonightR_instruction_mov180
-PUBLIC CryptonightR_instruction_mov181
-PUBLIC CryptonightR_instruction_mov182
-PUBLIC CryptonightR_instruction_mov183
-PUBLIC CryptonightR_instruction_mov184
-PUBLIC CryptonightR_instruction_mov185
-PUBLIC CryptonightR_instruction_mov186
-PUBLIC CryptonightR_instruction_mov187
-PUBLIC CryptonightR_instruction_mov188
-PUBLIC CryptonightR_instruction_mov189
-PUBLIC CryptonightR_instruction_mov190
-PUBLIC CryptonightR_instruction_mov191
-PUBLIC CryptonightR_instruction_mov192
-PUBLIC CryptonightR_instruction_mov193
-PUBLIC CryptonightR_instruction_mov194
-PUBLIC CryptonightR_instruction_mov195
-PUBLIC CryptonightR_instruction_mov196
-PUBLIC CryptonightR_instruction_mov197
-PUBLIC CryptonightR_instruction_mov198
-PUBLIC CryptonightR_instruction_mov199
-PUBLIC CryptonightR_instruction_mov200
-PUBLIC CryptonightR_instruction_mov201
-PUBLIC CryptonightR_instruction_mov202
-PUBLIC CryptonightR_instruction_mov203
-PUBLIC CryptonightR_instruction_mov204
-PUBLIC CryptonightR_instruction_mov205
-PUBLIC CryptonightR_instruction_mov206
-PUBLIC CryptonightR_instruction_mov207
-PUBLIC CryptonightR_instruction_mov208
-PUBLIC CryptonightR_instruction_mov209
-PUBLIC CryptonightR_instruction_mov210
-PUBLIC CryptonightR_instruction_mov211
-PUBLIC CryptonightR_instruction_mov212
-PUBLIC CryptonightR_instruction_mov213
-PUBLIC CryptonightR_instruction_mov214
-PUBLIC CryptonightR_instruction_mov215
-PUBLIC CryptonightR_instruction_mov216
-PUBLIC CryptonightR_instruction_mov217
-PUBLIC CryptonightR_instruction_mov218
-PUBLIC CryptonightR_instruction_mov219
-PUBLIC CryptonightR_instruction_mov220
-PUBLIC CryptonightR_instruction_mov221
-PUBLIC CryptonightR_instruction_mov222
-PUBLIC CryptonightR_instruction_mov223
-PUBLIC CryptonightR_instruction_mov224
-PUBLIC CryptonightR_instruction_mov225
-PUBLIC CryptonightR_instruction_mov226
-PUBLIC CryptonightR_instruction_mov227
-PUBLIC CryptonightR_instruction_mov228
-PUBLIC CryptonightR_instruction_mov229
-PUBLIC CryptonightR_instruction_mov230
-PUBLIC CryptonightR_instruction_mov231
-PUBLIC CryptonightR_instruction_mov232
-PUBLIC CryptonightR_instruction_mov233
-PUBLIC CryptonightR_instruction_mov234
-PUBLIC CryptonightR_instruction_mov235
-PUBLIC CryptonightR_instruction_mov236
-PUBLIC CryptonightR_instruction_mov237
-PUBLIC CryptonightR_instruction_mov238
-PUBLIC CryptonightR_instruction_mov239
-PUBLIC CryptonightR_instruction_mov240
-PUBLIC CryptonightR_instruction_mov241
-PUBLIC CryptonightR_instruction_mov242
-PUBLIC CryptonightR_instruction_mov243
-PUBLIC CryptonightR_instruction_mov244
-PUBLIC CryptonightR_instruction_mov245
-PUBLIC CryptonightR_instruction_mov246
-PUBLIC CryptonightR_instruction_mov247
-PUBLIC CryptonightR_instruction_mov248
-PUBLIC CryptonightR_instruction_mov249
-PUBLIC CryptonightR_instruction_mov250
-PUBLIC CryptonightR_instruction_mov251
-PUBLIC CryptonightR_instruction_mov252
-PUBLIC CryptonightR_instruction_mov253
-PUBLIC CryptonightR_instruction_mov254
-PUBLIC CryptonightR_instruction_mov255
-PUBLIC CryptonightR_instruction_mov256
-
-INCLUDE CryptonightWOW_template_win.inc
-INCLUDE CryptonightR_template_win.inc
-INCLUDE CryptonightWOW_soft_aes_template_win.inc
-INCLUDE CryptonightR_soft_aes_template_win.inc
-
-CryptonightR_instruction0:
-	imul	rbx, rbx
-CryptonightR_instruction1:
-	imul	rbx, rbx
-CryptonightR_instruction2:
-	imul	rbx, rbx
-CryptonightR_instruction3:
-	add	rbx, r9
-	add	rbx, 2147483647
-CryptonightR_instruction4:
-	sub	rbx, r9
-CryptonightR_instruction5:
-	ror	ebx, cl
-CryptonightR_instruction6:
-	rol	ebx, cl
-CryptonightR_instruction7:
-	xor	rbx, r9
-CryptonightR_instruction8:
-	imul	rsi, rbx
-CryptonightR_instruction9:
-	imul	rsi, rbx
-CryptonightR_instruction10:
-	imul	rsi, rbx
-CryptonightR_instruction11:
-	add	rsi, rbx
-	add	rsi, 2147483647
-CryptonightR_instruction12:
-	sub	rsi, rbx
-CryptonightR_instruction13:
-	ror	esi, cl
-CryptonightR_instruction14:
-	rol	esi, cl
-CryptonightR_instruction15:
-	xor	rsi, rbx
-CryptonightR_instruction16:
-	imul	rdi, rbx
-CryptonightR_instruction17:
-	imul	rdi, rbx
-CryptonightR_instruction18:
-	imul	rdi, rbx
-CryptonightR_instruction19:
-	add	rdi, rbx
-	add	rdi, 2147483647
-CryptonightR_instruction20:
-	sub	rdi, rbx
-CryptonightR_instruction21:
-	ror	edi, cl
-CryptonightR_instruction22:
-	rol	edi, cl
-CryptonightR_instruction23:
-	xor	rdi, rbx
-CryptonightR_instruction24:
-	imul	rbp, rbx
-CryptonightR_instruction25:
-	imul	rbp, rbx
-CryptonightR_instruction26:
-	imul	rbp, rbx
-CryptonightR_instruction27:
-	add	rbp, rbx
-	add	rbp, 2147483647
-CryptonightR_instruction28:
-	sub	rbp, rbx
-CryptonightR_instruction29:
-	ror	ebp, cl
-CryptonightR_instruction30:
-	rol	ebp, cl
-CryptonightR_instruction31:
-	xor	rbp, rbx
-CryptonightR_instruction32:
-	imul	rbx, rsi
-CryptonightR_instruction33:
-	imul	rbx, rsi
-CryptonightR_instruction34:
-	imul	rbx, rsi
-CryptonightR_instruction35:
-	add	rbx, rsi
-	add	rbx, 2147483647
-CryptonightR_instruction36:
-	sub	rbx, rsi
-CryptonightR_instruction37:
-	ror	ebx, cl
-CryptonightR_instruction38:
-	rol	ebx, cl
-CryptonightR_instruction39:
-	xor	rbx, rsi
-CryptonightR_instruction40:
-	imul	rsi, rsi
-CryptonightR_instruction41:
-	imul	rsi, rsi
-CryptonightR_instruction42:
-	imul	rsi, rsi
-CryptonightR_instruction43:
-	add	rsi, r9
-	add	rsi, 2147483647
-CryptonightR_instruction44:
-	sub	rsi, r9
-CryptonightR_instruction45:
-	ror	esi, cl
-CryptonightR_instruction46:
-	rol	esi, cl
-CryptonightR_instruction47:
-	xor	rsi, r9
-CryptonightR_instruction48:
-	imul	rdi, rsi
-CryptonightR_instruction49:
-	imul	rdi, rsi
-CryptonightR_instruction50:
-	imul	rdi, rsi
-CryptonightR_instruction51:
-	add	rdi, rsi
-	add	rdi, 2147483647
-CryptonightR_instruction52:
-	sub	rdi, rsi
-CryptonightR_instruction53:
-	ror	edi, cl
-CryptonightR_instruction54:
-	rol	edi, cl
-CryptonightR_instruction55:
-	xor	rdi, rsi
-CryptonightR_instruction56:
-	imul	rbp, rsi
-CryptonightR_instruction57:
-	imul	rbp, rsi
-CryptonightR_instruction58:
-	imul	rbp, rsi
-CryptonightR_instruction59:
-	add	rbp, rsi
-	add	rbp, 2147483647
-CryptonightR_instruction60:
-	sub	rbp, rsi
-CryptonightR_instruction61:
-	ror	ebp, cl
-CryptonightR_instruction62:
-	rol	ebp, cl
-CryptonightR_instruction63:
-	xor	rbp, rsi
-CryptonightR_instruction64:
-	imul	rbx, rdi
-CryptonightR_instruction65:
-	imul	rbx, rdi
-CryptonightR_instruction66:
-	imul	rbx, rdi
-CryptonightR_instruction67:
-	add	rbx, rdi
-	add	rbx, 2147483647
-CryptonightR_instruction68:
-	sub	rbx, rdi
-CryptonightR_instruction69:
-	ror	ebx, cl
-CryptonightR_instruction70:
-	rol	ebx, cl
-CryptonightR_instruction71:
-	xor	rbx, rdi
-CryptonightR_instruction72:
-	imul	rsi, rdi
-CryptonightR_instruction73:
-	imul	rsi, rdi
-CryptonightR_instruction74:
-	imul	rsi, rdi
-CryptonightR_instruction75:
-	add	rsi, rdi
-	add	rsi, 2147483647
-CryptonightR_instruction76:
-	sub	rsi, rdi
-CryptonightR_instruction77:
-	ror	esi, cl
-CryptonightR_instruction78:
-	rol	esi, cl
-CryptonightR_instruction79:
-	xor	rsi, rdi
-CryptonightR_instruction80:
-	imul	rdi, rdi
-CryptonightR_instruction81:
-	imul	rdi, rdi
-CryptonightR_instruction82:
-	imul	rdi, rdi
-CryptonightR_instruction83:
-	add	rdi, r9
-	add	rdi, 2147483647
-CryptonightR_instruction84:
-	sub	rdi, r9
-CryptonightR_instruction85:
-	ror	edi, cl
-CryptonightR_instruction86:
-	rol	edi, cl
-CryptonightR_instruction87:
-	xor	rdi, r9
-CryptonightR_instruction88:
-	imul	rbp, rdi
-CryptonightR_instruction89:
-	imul	rbp, rdi
-CryptonightR_instruction90:
-	imul	rbp, rdi
-CryptonightR_instruction91:
-	add	rbp, rdi
-	add	rbp, 2147483647
-CryptonightR_instruction92:
-	sub	rbp, rdi
-CryptonightR_instruction93:
-	ror	ebp, cl
-CryptonightR_instruction94:
-	rol	ebp, cl
-CryptonightR_instruction95:
-	xor	rbp, rdi
-CryptonightR_instruction96:
-	imul	rbx, rbp
-CryptonightR_instruction97:
-	imul	rbx, rbp
-CryptonightR_instruction98:
-	imul	rbx, rbp
-CryptonightR_instruction99:
-	add	rbx, rbp
-	add	rbx, 2147483647
-CryptonightR_instruction100:
-	sub	rbx, rbp
-CryptonightR_instruction101:
-	ror	ebx, cl
-CryptonightR_instruction102:
-	rol	ebx, cl
-CryptonightR_instruction103:
-	xor	rbx, rbp
-CryptonightR_instruction104:
-	imul	rsi, rbp
-CryptonightR_instruction105:
-	imul	rsi, rbp
-CryptonightR_instruction106:
-	imul	rsi, rbp
-CryptonightR_instruction107:
-	add	rsi, rbp
-	add	rsi, 2147483647
-CryptonightR_instruction108:
-	sub	rsi, rbp
-CryptonightR_instruction109:
-	ror	esi, cl
-CryptonightR_instruction110:
-	rol	esi, cl
-CryptonightR_instruction111:
-	xor	rsi, rbp
-CryptonightR_instruction112:
-	imul	rdi, rbp
-CryptonightR_instruction113:
-	imul	rdi, rbp
-CryptonightR_instruction114:
-	imul	rdi, rbp
-CryptonightR_instruction115:
-	add	rdi, rbp
-	add	rdi, 2147483647
-CryptonightR_instruction116:
-	sub	rdi, rbp
-CryptonightR_instruction117:
-	ror	edi, cl
-CryptonightR_instruction118:
-	rol	edi, cl
-CryptonightR_instruction119:
-	xor	rdi, rbp
-CryptonightR_instruction120:
-	imul	rbp, rbp
-CryptonightR_instruction121:
-	imul	rbp, rbp
-CryptonightR_instruction122:
-	imul	rbp, rbp
-CryptonightR_instruction123:
-	add	rbp, r9
-	add	rbp, 2147483647
-CryptonightR_instruction124:
-	sub	rbp, r9
-CryptonightR_instruction125:
-	ror	ebp, cl
-CryptonightR_instruction126:
-	rol	ebp, cl
-CryptonightR_instruction127:
-	xor	rbp, r9
-CryptonightR_instruction128:
-	imul	rbx, rsp
-CryptonightR_instruction129:
-	imul	rbx, rsp
-CryptonightR_instruction130:
-	imul	rbx, rsp
-CryptonightR_instruction131:
-	add	rbx, rsp
-	add	rbx, 2147483647
-CryptonightR_instruction132:
-	sub	rbx, rsp
-CryptonightR_instruction133:
-	ror	ebx, cl
-CryptonightR_instruction134:
-	rol	ebx, cl
-CryptonightR_instruction135:
-	xor	rbx, rsp
-CryptonightR_instruction136:
-	imul	rsi, rsp
-CryptonightR_instruction137:
-	imul	rsi, rsp
-CryptonightR_instruction138:
-	imul	rsi, rsp
-CryptonightR_instruction139:
-	add	rsi, rsp
-	add	rsi, 2147483647
-CryptonightR_instruction140:
-	sub	rsi, rsp
-CryptonightR_instruction141:
-	ror	esi, cl
-CryptonightR_instruction142:
-	rol	esi, cl
-CryptonightR_instruction143:
-	xor	rsi, rsp
-CryptonightR_instruction144:
-	imul	rdi, rsp
-CryptonightR_instruction145:
-	imul	rdi, rsp
-CryptonightR_instruction146:
-	imul	rdi, rsp
-CryptonightR_instruction147:
-	add	rdi, rsp
-	add	rdi, 2147483647
-CryptonightR_instruction148:
-	sub	rdi, rsp
-CryptonightR_instruction149:
-	ror	edi, cl
-CryptonightR_instruction150:
-	rol	edi, cl
-CryptonightR_instruction151:
-	xor	rdi, rsp
-CryptonightR_instruction152:
-	imul	rbp, rsp
-CryptonightR_instruction153:
-	imul	rbp, rsp
-CryptonightR_instruction154:
-	imul	rbp, rsp
-CryptonightR_instruction155:
-	add	rbp, rsp
-	add	rbp, 2147483647
-CryptonightR_instruction156:
-	sub	rbp, rsp
-CryptonightR_instruction157:
-	ror	ebp, cl
-CryptonightR_instruction158:
-	rol	ebp, cl
-CryptonightR_instruction159:
-	xor	rbp, rsp
-CryptonightR_instruction160:
-	imul	rbx, r15
-CryptonightR_instruction161:
-	imul	rbx, r15
-CryptonightR_instruction162:
-	imul	rbx, r15
-CryptonightR_instruction163:
-	add	rbx, r15
-	add	rbx, 2147483647
-CryptonightR_instruction164:
-	sub	rbx, r15
-CryptonightR_instruction165:
-	ror	ebx, cl
-CryptonightR_instruction166:
-	rol	ebx, cl
-CryptonightR_instruction167:
-	xor	rbx, r15
-CryptonightR_instruction168:
-	imul	rsi, r15
-CryptonightR_instruction169:
-	imul	rsi, r15
-CryptonightR_instruction170:
-	imul	rsi, r15
-CryptonightR_instruction171:
-	add	rsi, r15
-	add	rsi, 2147483647
-CryptonightR_instruction172:
-	sub	rsi, r15
-CryptonightR_instruction173:
-	ror	esi, cl
-CryptonightR_instruction174:
-	rol	esi, cl
-CryptonightR_instruction175:
-	xor	rsi, r15
-CryptonightR_instruction176:
-	imul	rdi, r15
-CryptonightR_instruction177:
-	imul	rdi, r15
-CryptonightR_instruction178:
-	imul	rdi, r15
-CryptonightR_instruction179:
-	add	rdi, r15
-	add	rdi, 2147483647
-CryptonightR_instruction180:
-	sub	rdi, r15
-CryptonightR_instruction181:
-	ror	edi, cl
-CryptonightR_instruction182:
-	rol	edi, cl
-CryptonightR_instruction183:
-	xor	rdi, r15
-CryptonightR_instruction184:
-	imul	rbp, r15
-CryptonightR_instruction185:
-	imul	rbp, r15
-CryptonightR_instruction186:
-	imul	rbp, r15
-CryptonightR_instruction187:
-	add	rbp, r15
-	add	rbp, 2147483647
-CryptonightR_instruction188:
-	sub	rbp, r15
-CryptonightR_instruction189:
-	ror	ebp, cl
-CryptonightR_instruction190:
-	rol	ebp, cl
-CryptonightR_instruction191:
-	xor	rbp, r15
-CryptonightR_instruction192:
-	imul	rbx, rax
-CryptonightR_instruction193:
-	imul	rbx, rax
-CryptonightR_instruction194:
-	imul	rbx, rax
-CryptonightR_instruction195:
-	add	rbx, rax
-	add	rbx, 2147483647
-CryptonightR_instruction196:
-	sub	rbx, rax
-CryptonightR_instruction197:
-	ror	ebx, cl
-CryptonightR_instruction198:
-	rol	ebx, cl
-CryptonightR_instruction199:
-	xor	rbx, rax
-CryptonightR_instruction200:
-	imul	rsi, rax
-CryptonightR_instruction201:
-	imul	rsi, rax
-CryptonightR_instruction202:
-	imul	rsi, rax
-CryptonightR_instruction203:
-	add	rsi, rax
-	add	rsi, 2147483647
-CryptonightR_instruction204:
-	sub	rsi, rax
-CryptonightR_instruction205:
-	ror	esi, cl
-CryptonightR_instruction206:
-	rol	esi, cl
-CryptonightR_instruction207:
-	xor	rsi, rax
-CryptonightR_instruction208:
-	imul	rdi, rax
-CryptonightR_instruction209:
-	imul	rdi, rax
-CryptonightR_instruction210:
-	imul	rdi, rax
-CryptonightR_instruction211:
-	add	rdi, rax
-	add	rdi, 2147483647
-CryptonightR_instruction212:
-	sub	rdi, rax
-CryptonightR_instruction213:
-	ror	edi, cl
-CryptonightR_instruction214:
-	rol	edi, cl
-CryptonightR_instruction215:
-	xor	rdi, rax
-CryptonightR_instruction216:
-	imul	rbp, rax
-CryptonightR_instruction217:
-	imul	rbp, rax
-CryptonightR_instruction218:
-	imul	rbp, rax
-CryptonightR_instruction219:
-	add	rbp, rax
-	add	rbp, 2147483647
-CryptonightR_instruction220:
-	sub	rbp, rax
-CryptonightR_instruction221:
-	ror	ebp, cl
-CryptonightR_instruction222:
-	rol	ebp, cl
-CryptonightR_instruction223:
-	xor	rbp, rax
-CryptonightR_instruction224:
-	imul	rbx, rdx
-CryptonightR_instruction225:
-	imul	rbx, rdx
-CryptonightR_instruction226:
-	imul	rbx, rdx
-CryptonightR_instruction227:
-	add	rbx, rdx
-	add	rbx, 2147483647
-CryptonightR_instruction228:
-	sub	rbx, rdx
-CryptonightR_instruction229:
-	ror	ebx, cl
-CryptonightR_instruction230:
-	rol	ebx, cl
-CryptonightR_instruction231:
-	xor	rbx, rdx
-CryptonightR_instruction232:
-	imul	rsi, rdx
-CryptonightR_instruction233:
-	imul	rsi, rdx
-CryptonightR_instruction234:
-	imul	rsi, rdx
-CryptonightR_instruction235:
-	add	rsi, rdx
-	add	rsi, 2147483647
-CryptonightR_instruction236:
-	sub	rsi, rdx
-CryptonightR_instruction237:
-	ror	esi, cl
-CryptonightR_instruction238:
-	rol	esi, cl
-CryptonightR_instruction239:
-	xor	rsi, rdx
-CryptonightR_instruction240:
-	imul	rdi, rdx
-CryptonightR_instruction241:
-	imul	rdi, rdx
-CryptonightR_instruction242:
-	imul	rdi, rdx
-CryptonightR_instruction243:
-	add	rdi, rdx
-	add	rdi, 2147483647
-CryptonightR_instruction244:
-	sub	rdi, rdx
-CryptonightR_instruction245:
-	ror	edi, cl
-CryptonightR_instruction246:
-	rol	edi, cl
-CryptonightR_instruction247:
-	xor	rdi, rdx
-CryptonightR_instruction248:
-	imul	rbp, rdx
-CryptonightR_instruction249:
-	imul	rbp, rdx
-CryptonightR_instruction250:
-	imul	rbp, rdx
-CryptonightR_instruction251:
-	add	rbp, rdx
-	add	rbp, 2147483647
-CryptonightR_instruction252:
-	sub	rbp, rdx
-CryptonightR_instruction253:
-	ror	ebp, cl
-CryptonightR_instruction254:
-	rol	ebp, cl
-CryptonightR_instruction255:
-	xor	rbp, rdx
-CryptonightR_instruction256:
-	imul	rbx, rbx
-CryptonightR_instruction_mov0:
-
-CryptonightR_instruction_mov1:
-
-CryptonightR_instruction_mov2:
-
-CryptonightR_instruction_mov3:
-
-CryptonightR_instruction_mov4:
-
-CryptonightR_instruction_mov5:
-	mov	rcx, rbx
-CryptonightR_instruction_mov6:
-	mov	rcx, rbx
-CryptonightR_instruction_mov7:
-
-CryptonightR_instruction_mov8:
-
-CryptonightR_instruction_mov9:
-
-CryptonightR_instruction_mov10:
-
-CryptonightR_instruction_mov11:
-
-CryptonightR_instruction_mov12:
-
-CryptonightR_instruction_mov13:
-	mov	rcx, rbx
-CryptonightR_instruction_mov14:
-	mov	rcx, rbx
-CryptonightR_instruction_mov15:
-
-CryptonightR_instruction_mov16:
-
-CryptonightR_instruction_mov17:
-
-CryptonightR_instruction_mov18:
-
-CryptonightR_instruction_mov19:
-
-CryptonightR_instruction_mov20:
-
-CryptonightR_instruction_mov21:
-	mov	rcx, rbx
-CryptonightR_instruction_mov22:
-	mov	rcx, rbx
-CryptonightR_instruction_mov23:
-
-CryptonightR_instruction_mov24:
-
-CryptonightR_instruction_mov25:
-
-CryptonightR_instruction_mov26:
-
-CryptonightR_instruction_mov27:
-
-CryptonightR_instruction_mov28:
-
-CryptonightR_instruction_mov29:
-	mov	rcx, rbx
-CryptonightR_instruction_mov30:
-	mov	rcx, rbx
-CryptonightR_instruction_mov31:
-
-CryptonightR_instruction_mov32:
-
-CryptonightR_instruction_mov33:
-
-CryptonightR_instruction_mov34:
-
-CryptonightR_instruction_mov35:
-
-CryptonightR_instruction_mov36:
-
-CryptonightR_instruction_mov37:
-	mov	rcx, rsi
-CryptonightR_instruction_mov38:
-	mov	rcx, rsi
-CryptonightR_instruction_mov39:
-
-CryptonightR_instruction_mov40:
-
-CryptonightR_instruction_mov41:
-
-CryptonightR_instruction_mov42:
-
-CryptonightR_instruction_mov43:
-
-CryptonightR_instruction_mov44:
-
-CryptonightR_instruction_mov45:
-	mov	rcx, rsi
-CryptonightR_instruction_mov46:
-	mov	rcx, rsi
-CryptonightR_instruction_mov47:
-
-CryptonightR_instruction_mov48:
-
-CryptonightR_instruction_mov49:
-
-CryptonightR_instruction_mov50:
-
-CryptonightR_instruction_mov51:
-
-CryptonightR_instruction_mov52:
-
-CryptonightR_instruction_mov53:
-	mov	rcx, rsi
-CryptonightR_instruction_mov54:
-	mov	rcx, rsi
-CryptonightR_instruction_mov55:
-
-CryptonightR_instruction_mov56:
-
-CryptonightR_instruction_mov57:
-
-CryptonightR_instruction_mov58:
-
-CryptonightR_instruction_mov59:
-
-CryptonightR_instruction_mov60:
-
-CryptonightR_instruction_mov61:
-	mov	rcx, rsi
-CryptonightR_instruction_mov62:
-	mov	rcx, rsi
-CryptonightR_instruction_mov63:
-
-CryptonightR_instruction_mov64:
-
-CryptonightR_instruction_mov65:
-
-CryptonightR_instruction_mov66:
-
-CryptonightR_instruction_mov67:
-
-CryptonightR_instruction_mov68:
-
-CryptonightR_instruction_mov69:
-	mov	rcx, rdi
-CryptonightR_instruction_mov70:
-	mov	rcx, rdi
-CryptonightR_instruction_mov71:
-
-CryptonightR_instruction_mov72:
-
-CryptonightR_instruction_mov73:
-
-CryptonightR_instruction_mov74:
-
-CryptonightR_instruction_mov75:
-
-CryptonightR_instruction_mov76:
-
-CryptonightR_instruction_mov77:
-	mov	rcx, rdi
-CryptonightR_instruction_mov78:
-	mov	rcx, rdi
-CryptonightR_instruction_mov79:
-
-CryptonightR_instruction_mov80:
-
-CryptonightR_instruction_mov81:
-
-CryptonightR_instruction_mov82:
-
-CryptonightR_instruction_mov83:
-
-CryptonightR_instruction_mov84:
-
-CryptonightR_instruction_mov85:
-	mov	rcx, rdi
-CryptonightR_instruction_mov86:
-	mov	rcx, rdi
-CryptonightR_instruction_mov87:
-
-CryptonightR_instruction_mov88:
-
-CryptonightR_instruction_mov89:
-
-CryptonightR_instruction_mov90:
-
-CryptonightR_instruction_mov91:
-
-CryptonightR_instruction_mov92:
-
-CryptonightR_instruction_mov93:
-	mov	rcx, rdi
-CryptonightR_instruction_mov94:
-	mov	rcx, rdi
-CryptonightR_instruction_mov95:
-
-CryptonightR_instruction_mov96:
-
-CryptonightR_instruction_mov97:
-
-CryptonightR_instruction_mov98:
-
-CryptonightR_instruction_mov99:
-
-CryptonightR_instruction_mov100:
-
-CryptonightR_instruction_mov101:
-	mov	rcx, rbp
-CryptonightR_instruction_mov102:
-	mov	rcx, rbp
-CryptonightR_instruction_mov103:
-
-CryptonightR_instruction_mov104:
-
-CryptonightR_instruction_mov105:
-
-CryptonightR_instruction_mov106:
-
-CryptonightR_instruction_mov107:
-
-CryptonightR_instruction_mov108:
-
-CryptonightR_instruction_mov109:
-	mov	rcx, rbp
-CryptonightR_instruction_mov110:
-	mov	rcx, rbp
-CryptonightR_instruction_mov111:
-
-CryptonightR_instruction_mov112:
-
-CryptonightR_instruction_mov113:
-
-CryptonightR_instruction_mov114:
-
-CryptonightR_instruction_mov115:
-
-CryptonightR_instruction_mov116:
-
-CryptonightR_instruction_mov117:
-	mov	rcx, rbp
-CryptonightR_instruction_mov118:
-	mov	rcx, rbp
-CryptonightR_instruction_mov119:
-
-CryptonightR_instruction_mov120:
-
-CryptonightR_instruction_mov121:
-
-CryptonightR_instruction_mov122:
-
-CryptonightR_instruction_mov123:
-
-CryptonightR_instruction_mov124:
-
-CryptonightR_instruction_mov125:
-	mov	rcx, rbp
-CryptonightR_instruction_mov126:
-	mov	rcx, rbp
-CryptonightR_instruction_mov127:
-
-CryptonightR_instruction_mov128:
-
-CryptonightR_instruction_mov129:
-
-CryptonightR_instruction_mov130:
-
-CryptonightR_instruction_mov131:
-
-CryptonightR_instruction_mov132:
-
-CryptonightR_instruction_mov133:
-	mov	rcx, rsp
-CryptonightR_instruction_mov134:
-	mov	rcx, rsp
-CryptonightR_instruction_mov135:
-
-CryptonightR_instruction_mov136:
-
-CryptonightR_instruction_mov137:
-
-CryptonightR_instruction_mov138:
-
-CryptonightR_instruction_mov139:
-
-CryptonightR_instruction_mov140:
-
-CryptonightR_instruction_mov141:
-	mov	rcx, rsp
-CryptonightR_instruction_mov142:
-	mov	rcx, rsp
-CryptonightR_instruction_mov143:
-
-CryptonightR_instruction_mov144:
-
-CryptonightR_instruction_mov145:
-
-CryptonightR_instruction_mov146:
-
-CryptonightR_instruction_mov147:
-
-CryptonightR_instruction_mov148:
-
-CryptonightR_instruction_mov149:
-	mov	rcx, rsp
-CryptonightR_instruction_mov150:
-	mov	rcx, rsp
-CryptonightR_instruction_mov151:
-
-CryptonightR_instruction_mov152:
-
-CryptonightR_instruction_mov153:
-
-CryptonightR_instruction_mov154:
-
-CryptonightR_instruction_mov155:
-
-CryptonightR_instruction_mov156:
-
-CryptonightR_instruction_mov157:
-	mov	rcx, rsp
-CryptonightR_instruction_mov158:
-	mov	rcx, rsp
-CryptonightR_instruction_mov159:
-
-CryptonightR_instruction_mov160:
-
-CryptonightR_instruction_mov161:
-
-CryptonightR_instruction_mov162:
-
-CryptonightR_instruction_mov163:
-
-CryptonightR_instruction_mov164:
-
-CryptonightR_instruction_mov165:
-	mov	rcx, r15
-CryptonightR_instruction_mov166:
-	mov	rcx, r15
-CryptonightR_instruction_mov167:
-
-CryptonightR_instruction_mov168:
-
-CryptonightR_instruction_mov169:
-
-CryptonightR_instruction_mov170:
-
-CryptonightR_instruction_mov171:
-
-CryptonightR_instruction_mov172:
-
-CryptonightR_instruction_mov173:
-	mov	rcx, r15
-CryptonightR_instruction_mov174:
-	mov	rcx, r15
-CryptonightR_instruction_mov175:
-
-CryptonightR_instruction_mov176:
-
-CryptonightR_instruction_mov177:
-
-CryptonightR_instruction_mov178:
-
-CryptonightR_instruction_mov179:
-
-CryptonightR_instruction_mov180:
-
-CryptonightR_instruction_mov181:
-	mov	rcx, r15
-CryptonightR_instruction_mov182:
-	mov	rcx, r15
-CryptonightR_instruction_mov183:
-
-CryptonightR_instruction_mov184:
-
-CryptonightR_instruction_mov185:
-
-CryptonightR_instruction_mov186:
-
-CryptonightR_instruction_mov187:
-
-CryptonightR_instruction_mov188:
-
-CryptonightR_instruction_mov189:
-	mov	rcx, r15
-CryptonightR_instruction_mov190:
-	mov	rcx, r15
-CryptonightR_instruction_mov191:
-
-CryptonightR_instruction_mov192:
-
-CryptonightR_instruction_mov193:
-
-CryptonightR_instruction_mov194:
-
-CryptonightR_instruction_mov195:
-
-CryptonightR_instruction_mov196:
-
-CryptonightR_instruction_mov197:
-	mov	rcx, rax
-CryptonightR_instruction_mov198:
-	mov	rcx, rax
-CryptonightR_instruction_mov199:
-
-CryptonightR_instruction_mov200:
-
-CryptonightR_instruction_mov201:
-
-CryptonightR_instruction_mov202:
-
-CryptonightR_instruction_mov203:
-
-CryptonightR_instruction_mov204:
-
-CryptonightR_instruction_mov205:
-	mov	rcx, rax
-CryptonightR_instruction_mov206:
-	mov	rcx, rax
-CryptonightR_instruction_mov207:
-
-CryptonightR_instruction_mov208:
-
-CryptonightR_instruction_mov209:
-
-CryptonightR_instruction_mov210:
-
-CryptonightR_instruction_mov211:
-
-CryptonightR_instruction_mov212:
-
-CryptonightR_instruction_mov213:
-	mov	rcx, rax
-CryptonightR_instruction_mov214:
-	mov	rcx, rax
-CryptonightR_instruction_mov215:
-
-CryptonightR_instruction_mov216:
-
-CryptonightR_instruction_mov217:
-
-CryptonightR_instruction_mov218:
-
-CryptonightR_instruction_mov219:
-
-CryptonightR_instruction_mov220:
-
-CryptonightR_instruction_mov221:
-	mov	rcx, rax
-CryptonightR_instruction_mov222:
-	mov	rcx, rax
-CryptonightR_instruction_mov223:
-
-CryptonightR_instruction_mov224:
-
-CryptonightR_instruction_mov225:
-
-CryptonightR_instruction_mov226:
-
-CryptonightR_instruction_mov227:
-
-CryptonightR_instruction_mov228:
-
-CryptonightR_instruction_mov229:
-	mov	rcx, rdx
-CryptonightR_instruction_mov230:
-	mov	rcx, rdx
-CryptonightR_instruction_mov231:
-
-CryptonightR_instruction_mov232:
-
-CryptonightR_instruction_mov233:
-
-CryptonightR_instruction_mov234:
-
-CryptonightR_instruction_mov235:
-
-CryptonightR_instruction_mov236:
-
-CryptonightR_instruction_mov237:
-	mov	rcx, rdx
-CryptonightR_instruction_mov238:
-	mov	rcx, rdx
-CryptonightR_instruction_mov239:
-
-CryptonightR_instruction_mov240:
-
-CryptonightR_instruction_mov241:
-
-CryptonightR_instruction_mov242:
-
-CryptonightR_instruction_mov243:
-
-CryptonightR_instruction_mov244:
-
-CryptonightR_instruction_mov245:
-	mov	rcx, rdx
-CryptonightR_instruction_mov246:
-	mov	rcx, rdx
-CryptonightR_instruction_mov247:
-
-CryptonightR_instruction_mov248:
-
-CryptonightR_instruction_mov249:
-
-CryptonightR_instruction_mov250:
-
-CryptonightR_instruction_mov251:
-
-CryptonightR_instruction_mov252:
-
-CryptonightR_instruction_mov253:
-	mov	rcx, rdx
-CryptonightR_instruction_mov254:
-	mov	rcx, rdx
-CryptonightR_instruction_mov255:
-
-CryptonightR_instruction_mov256:
-
-_TEXT_CN_TEMPLATE ENDS
-END
diff --git a/src/crypto/asm/CryptonightR_template.h b/src/crypto/asm/CryptonightR_template.h
deleted file mode 100644
index d9159a8f..00000000
--- a/src/crypto/asm/CryptonightR_template.h
+++ /dev/null
@@ -1,1087 +0,0 @@
-// Auto-generated file, do not edit
-
-extern "C"
-{
-	void CryptonightWOW_template_part1();
-	void CryptonightWOW_template_mainloop();
-	void CryptonightWOW_template_part2();
-	void CryptonightWOW_template_part3();
-	void CryptonightWOW_template_end();
-	void CryptonightWOW_template_double_part1();
-	void CryptonightWOW_template_double_mainloop();
-	void CryptonightWOW_template_double_part2();
-	void CryptonightWOW_template_double_part3();
-	void CryptonightWOW_template_double_part4();
-	void CryptonightWOW_template_double_end();
-
-	void CryptonightR_template_part1();
-	void CryptonightR_template_mainloop();
-	void CryptonightR_template_part2();
-	void CryptonightR_template_part3();
-	void CryptonightR_template_end();
-	void CryptonightR_template_double_part1();
-	void CryptonightR_template_double_mainloop();
-	void CryptonightR_template_double_part2();
-	void CryptonightR_template_double_part3();
-	void CryptonightR_template_double_part4();
-	void CryptonightR_template_double_end();
-
-	void CryptonightWOW_soft_aes_template_part1();
-	void CryptonightWOW_soft_aes_template_mainloop();
-	void CryptonightWOW_soft_aes_template_part2();
-	void CryptonightWOW_soft_aes_template_part3();
-	void CryptonightWOW_soft_aes_template_end();
-	void CryptonightWOW_soft_aes_template_double_part1();
-	void CryptonightWOW_soft_aes_template_double_mainloop();
-	void CryptonightWOW_soft_aes_template_double_part2();
-	void CryptonightWOW_soft_aes_template_double_part3();
-	void CryptonightWOW_soft_aes_template_double_part4();
-	void CryptonightWOW_soft_aes_template_double_end();
-
-	void CryptonightR_soft_aes_template_part1();
-	void CryptonightR_soft_aes_template_mainloop();
-	void CryptonightR_soft_aes_template_part2();
-	void CryptonightR_soft_aes_template_part3();
-	void CryptonightR_soft_aes_template_end();
-	void CryptonightR_soft_aes_template_double_part1();
-	void CryptonightR_soft_aes_template_double_mainloop();
-	void CryptonightR_soft_aes_template_double_part2();
-	void CryptonightR_soft_aes_template_double_part3();
-	void CryptonightR_soft_aes_template_double_part4();
-	void CryptonightR_soft_aes_template_double_end();
-
-	void CryptonightR_instruction0();
-	void CryptonightR_instruction1();
-	void CryptonightR_instruction2();
-	void CryptonightR_instruction3();
-	void CryptonightR_instruction4();
-	void CryptonightR_instruction5();
-	void CryptonightR_instruction6();
-	void CryptonightR_instruction7();
-	void CryptonightR_instruction8();
-	void CryptonightR_instruction9();
-	void CryptonightR_instruction10();
-	void CryptonightR_instruction11();
-	void CryptonightR_instruction12();
-	void CryptonightR_instruction13();
-	void CryptonightR_instruction14();
-	void CryptonightR_instruction15();
-	void CryptonightR_instruction16();
-	void CryptonightR_instruction17();
-	void CryptonightR_instruction18();
-	void CryptonightR_instruction19();
-	void CryptonightR_instruction20();
-	void CryptonightR_instruction21();
-	void CryptonightR_instruction22();
-	void CryptonightR_instruction23();
-	void CryptonightR_instruction24();
-	void CryptonightR_instruction25();
-	void CryptonightR_instruction26();
-	void CryptonightR_instruction27();
-	void CryptonightR_instruction28();
-	void CryptonightR_instruction29();
-	void CryptonightR_instruction30();
-	void CryptonightR_instruction31();
-	void CryptonightR_instruction32();
-	void CryptonightR_instruction33();
-	void CryptonightR_instruction34();
-	void CryptonightR_instruction35();
-	void CryptonightR_instruction36();
-	void CryptonightR_instruction37();
-	void CryptonightR_instruction38();
-	void CryptonightR_instruction39();
-	void CryptonightR_instruction40();
-	void CryptonightR_instruction41();
-	void CryptonightR_instruction42();
-	void CryptonightR_instruction43();
-	void CryptonightR_instruction44();
-	void CryptonightR_instruction45();
-	void CryptonightR_instruction46();
-	void CryptonightR_instruction47();
-	void CryptonightR_instruction48();
-	void CryptonightR_instruction49();
-	void CryptonightR_instruction50();
-	void CryptonightR_instruction51();
-	void CryptonightR_instruction52();
-	void CryptonightR_instruction53();
-	void CryptonightR_instruction54();
-	void CryptonightR_instruction55();
-	void CryptonightR_instruction56();
-	void CryptonightR_instruction57();
-	void CryptonightR_instruction58();
-	void CryptonightR_instruction59();
-	void CryptonightR_instruction60();
-	void CryptonightR_instruction61();
-	void CryptonightR_instruction62();
-	void CryptonightR_instruction63();
-	void CryptonightR_instruction64();
-	void CryptonightR_instruction65();
-	void CryptonightR_instruction66();
-	void CryptonightR_instruction67();
-	void CryptonightR_instruction68();
-	void CryptonightR_instruction69();
-	void CryptonightR_instruction70();
-	void CryptonightR_instruction71();
-	void CryptonightR_instruction72();
-	void CryptonightR_instruction73();
-	void CryptonightR_instruction74();
-	void CryptonightR_instruction75();
-	void CryptonightR_instruction76();
-	void CryptonightR_instruction77();
-	void CryptonightR_instruction78();
-	void CryptonightR_instruction79();
-	void CryptonightR_instruction80();
-	void CryptonightR_instruction81();
-	void CryptonightR_instruction82();
-	void CryptonightR_instruction83();
-	void CryptonightR_instruction84();
-	void CryptonightR_instruction85();
-	void CryptonightR_instruction86();
-	void CryptonightR_instruction87();
-	void CryptonightR_instruction88();
-	void CryptonightR_instruction89();
-	void CryptonightR_instruction90();
-	void CryptonightR_instruction91();
-	void CryptonightR_instruction92();
-	void CryptonightR_instruction93();
-	void CryptonightR_instruction94();
-	void CryptonightR_instruction95();
-	void CryptonightR_instruction96();
-	void CryptonightR_instruction97();
-	void CryptonightR_instruction98();
-	void CryptonightR_instruction99();
-	void CryptonightR_instruction100();
-	void CryptonightR_instruction101();
-	void CryptonightR_instruction102();
-	void CryptonightR_instruction103();
-	void CryptonightR_instruction104();
-	void CryptonightR_instruction105();
-	void CryptonightR_instruction106();
-	void CryptonightR_instruction107();
-	void CryptonightR_instruction108();
-	void CryptonightR_instruction109();
-	void CryptonightR_instruction110();
-	void CryptonightR_instruction111();
-	void CryptonightR_instruction112();
-	void CryptonightR_instruction113();
-	void CryptonightR_instruction114();
-	void CryptonightR_instruction115();
-	void CryptonightR_instruction116();
-	void CryptonightR_instruction117();
-	void CryptonightR_instruction118();
-	void CryptonightR_instruction119();
-	void CryptonightR_instruction120();
-	void CryptonightR_instruction121();
-	void CryptonightR_instruction122();
-	void CryptonightR_instruction123();
-	void CryptonightR_instruction124();
-	void CryptonightR_instruction125();
-	void CryptonightR_instruction126();
-	void CryptonightR_instruction127();
-	void CryptonightR_instruction128();
-	void CryptonightR_instruction129();
-	void CryptonightR_instruction130();
-	void CryptonightR_instruction131();
-	void CryptonightR_instruction132();
-	void CryptonightR_instruction133();
-	void CryptonightR_instruction134();
-	void CryptonightR_instruction135();
-	void CryptonightR_instruction136();
-	void CryptonightR_instruction137();
-	void CryptonightR_instruction138();
-	void CryptonightR_instruction139();
-	void CryptonightR_instruction140();
-	void CryptonightR_instruction141();
-	void CryptonightR_instruction142();
-	void CryptonightR_instruction143();
-	void CryptonightR_instruction144();
-	void CryptonightR_instruction145();
-	void CryptonightR_instruction146();
-	void CryptonightR_instruction147();
-	void CryptonightR_instruction148();
-	void CryptonightR_instruction149();
-	void CryptonightR_instruction150();
-	void CryptonightR_instruction151();
-	void CryptonightR_instruction152();
-	void CryptonightR_instruction153();
-	void CryptonightR_instruction154();
-	void CryptonightR_instruction155();
-	void CryptonightR_instruction156();
-	void CryptonightR_instruction157();
-	void CryptonightR_instruction158();
-	void CryptonightR_instruction159();
-	void CryptonightR_instruction160();
-	void CryptonightR_instruction161();
-	void CryptonightR_instruction162();
-	void CryptonightR_instruction163();
-	void CryptonightR_instruction164();
-	void CryptonightR_instruction165();
-	void CryptonightR_instruction166();
-	void CryptonightR_instruction167();
-	void CryptonightR_instruction168();
-	void CryptonightR_instruction169();
-	void CryptonightR_instruction170();
-	void CryptonightR_instruction171();
-	void CryptonightR_instruction172();
-	void CryptonightR_instruction173();
-	void CryptonightR_instruction174();
-	void CryptonightR_instruction175();
-	void CryptonightR_instruction176();
-	void CryptonightR_instruction177();
-	void CryptonightR_instruction178();
-	void CryptonightR_instruction179();
-	void CryptonightR_instruction180();
-	void CryptonightR_instruction181();
-	void CryptonightR_instruction182();
-	void CryptonightR_instruction183();
-	void CryptonightR_instruction184();
-	void CryptonightR_instruction185();
-	void CryptonightR_instruction186();
-	void CryptonightR_instruction187();
-	void CryptonightR_instruction188();
-	void CryptonightR_instruction189();
-	void CryptonightR_instruction190();
-	void CryptonightR_instruction191();
-	void CryptonightR_instruction192();
-	void CryptonightR_instruction193();
-	void CryptonightR_instruction194();
-	void CryptonightR_instruction195();
-	void CryptonightR_instruction196();
-	void CryptonightR_instruction197();
-	void CryptonightR_instruction198();
-	void CryptonightR_instruction199();
-	void CryptonightR_instruction200();
-	void CryptonightR_instruction201();
-	void CryptonightR_instruction202();
-	void CryptonightR_instruction203();
-	void CryptonightR_instruction204();
-	void CryptonightR_instruction205();
-	void CryptonightR_instruction206();
-	void CryptonightR_instruction207();
-	void CryptonightR_instruction208();
-	void CryptonightR_instruction209();
-	void CryptonightR_instruction210();
-	void CryptonightR_instruction211();
-	void CryptonightR_instruction212();
-	void CryptonightR_instruction213();
-	void CryptonightR_instruction214();
-	void CryptonightR_instruction215();
-	void CryptonightR_instruction216();
-	void CryptonightR_instruction217();
-	void CryptonightR_instruction218();
-	void CryptonightR_instruction219();
-	void CryptonightR_instruction220();
-	void CryptonightR_instruction221();
-	void CryptonightR_instruction222();
-	void CryptonightR_instruction223();
-	void CryptonightR_instruction224();
-	void CryptonightR_instruction225();
-	void CryptonightR_instruction226();
-	void CryptonightR_instruction227();
-	void CryptonightR_instruction228();
-	void CryptonightR_instruction229();
-	void CryptonightR_instruction230();
-	void CryptonightR_instruction231();
-	void CryptonightR_instruction232();
-	void CryptonightR_instruction233();
-	void CryptonightR_instruction234();
-	void CryptonightR_instruction235();
-	void CryptonightR_instruction236();
-	void CryptonightR_instruction237();
-	void CryptonightR_instruction238();
-	void CryptonightR_instruction239();
-	void CryptonightR_instruction240();
-	void CryptonightR_instruction241();
-	void CryptonightR_instruction242();
-	void CryptonightR_instruction243();
-	void CryptonightR_instruction244();
-	void CryptonightR_instruction245();
-	void CryptonightR_instruction246();
-	void CryptonightR_instruction247();
-	void CryptonightR_instruction248();
-	void CryptonightR_instruction249();
-	void CryptonightR_instruction250();
-	void CryptonightR_instruction251();
-	void CryptonightR_instruction252();
-	void CryptonightR_instruction253();
-	void CryptonightR_instruction254();
-	void CryptonightR_instruction255();
-	void CryptonightR_instruction256();
-	void CryptonightR_instruction_mov0();
-	void CryptonightR_instruction_mov1();
-	void CryptonightR_instruction_mov2();
-	void CryptonightR_instruction_mov3();
-	void CryptonightR_instruction_mov4();
-	void CryptonightR_instruction_mov5();
-	void CryptonightR_instruction_mov6();
-	void CryptonightR_instruction_mov7();
-	void CryptonightR_instruction_mov8();
-	void CryptonightR_instruction_mov9();
-	void CryptonightR_instruction_mov10();
-	void CryptonightR_instruction_mov11();
-	void CryptonightR_instruction_mov12();
-	void CryptonightR_instruction_mov13();
-	void CryptonightR_instruction_mov14();
-	void CryptonightR_instruction_mov15();
-	void CryptonightR_instruction_mov16();
-	void CryptonightR_instruction_mov17();
-	void CryptonightR_instruction_mov18();
-	void CryptonightR_instruction_mov19();
-	void CryptonightR_instruction_mov20();
-	void CryptonightR_instruction_mov21();
-	void CryptonightR_instruction_mov22();
-	void CryptonightR_instruction_mov23();
-	void CryptonightR_instruction_mov24();
-	void CryptonightR_instruction_mov25();
-	void CryptonightR_instruction_mov26();
-	void CryptonightR_instruction_mov27();
-	void CryptonightR_instruction_mov28();
-	void CryptonightR_instruction_mov29();
-	void CryptonightR_instruction_mov30();
-	void CryptonightR_instruction_mov31();
-	void CryptonightR_instruction_mov32();
-	void CryptonightR_instruction_mov33();
-	void CryptonightR_instruction_mov34();
-	void CryptonightR_instruction_mov35();
-	void CryptonightR_instruction_mov36();
-	void CryptonightR_instruction_mov37();
-	void CryptonightR_instruction_mov38();
-	void CryptonightR_instruction_mov39();
-	void CryptonightR_instruction_mov40();
-	void CryptonightR_instruction_mov41();
-	void CryptonightR_instruction_mov42();
-	void CryptonightR_instruction_mov43();
-	void CryptonightR_instruction_mov44();
-	void CryptonightR_instruction_mov45();
-	void CryptonightR_instruction_mov46();
-	void CryptonightR_instruction_mov47();
-	void CryptonightR_instruction_mov48();
-	void CryptonightR_instruction_mov49();
-	void CryptonightR_instruction_mov50();
-	void CryptonightR_instruction_mov51();
-	void CryptonightR_instruction_mov52();
-	void CryptonightR_instruction_mov53();
-	void CryptonightR_instruction_mov54();
-	void CryptonightR_instruction_mov55();
-	void CryptonightR_instruction_mov56();
-	void CryptonightR_instruction_mov57();
-	void CryptonightR_instruction_mov58();
-	void CryptonightR_instruction_mov59();
-	void CryptonightR_instruction_mov60();
-	void CryptonightR_instruction_mov61();
-	void CryptonightR_instruction_mov62();
-	void CryptonightR_instruction_mov63();
-	void CryptonightR_instruction_mov64();
-	void CryptonightR_instruction_mov65();
-	void CryptonightR_instruction_mov66();
-	void CryptonightR_instruction_mov67();
-	void CryptonightR_instruction_mov68();
-	void CryptonightR_instruction_mov69();
-	void CryptonightR_instruction_mov70();
-	void CryptonightR_instruction_mov71();
-	void CryptonightR_instruction_mov72();
-	void CryptonightR_instruction_mov73();
-	void CryptonightR_instruction_mov74();
-	void CryptonightR_instruction_mov75();
-	void CryptonightR_instruction_mov76();
-	void CryptonightR_instruction_mov77();
-	void CryptonightR_instruction_mov78();
-	void CryptonightR_instruction_mov79();
-	void CryptonightR_instruction_mov80();
-	void CryptonightR_instruction_mov81();
-	void CryptonightR_instruction_mov82();
-	void CryptonightR_instruction_mov83();
-	void CryptonightR_instruction_mov84();
-	void CryptonightR_instruction_mov85();
-	void CryptonightR_instruction_mov86();
-	void CryptonightR_instruction_mov87();
-	void CryptonightR_instruction_mov88();
-	void CryptonightR_instruction_mov89();
-	void CryptonightR_instruction_mov90();
-	void CryptonightR_instruction_mov91();
-	void CryptonightR_instruction_mov92();
-	void CryptonightR_instruction_mov93();
-	void CryptonightR_instruction_mov94();
-	void CryptonightR_instruction_mov95();
-	void CryptonightR_instruction_mov96();
-	void CryptonightR_instruction_mov97();
-	void CryptonightR_instruction_mov98();
-	void CryptonightR_instruction_mov99();
-	void CryptonightR_instruction_mov100();
-	void CryptonightR_instruction_mov101();
-	void CryptonightR_instruction_mov102();
-	void CryptonightR_instruction_mov103();
-	void CryptonightR_instruction_mov104();
-	void CryptonightR_instruction_mov105();
-	void CryptonightR_instruction_mov106();
-	void CryptonightR_instruction_mov107();
-	void CryptonightR_instruction_mov108();
-	void CryptonightR_instruction_mov109();
-	void CryptonightR_instruction_mov110();
-	void CryptonightR_instruction_mov111();
-	void CryptonightR_instruction_mov112();
-	void CryptonightR_instruction_mov113();
-	void CryptonightR_instruction_mov114();
-	void CryptonightR_instruction_mov115();
-	void CryptonightR_instruction_mov116();
-	void CryptonightR_instruction_mov117();
-	void CryptonightR_instruction_mov118();
-	void CryptonightR_instruction_mov119();
-	void CryptonightR_instruction_mov120();
-	void CryptonightR_instruction_mov121();
-	void CryptonightR_instruction_mov122();
-	void CryptonightR_instruction_mov123();
-	void CryptonightR_instruction_mov124();
-	void CryptonightR_instruction_mov125();
-	void CryptonightR_instruction_mov126();
-	void CryptonightR_instruction_mov127();
-	void CryptonightR_instruction_mov128();
-	void CryptonightR_instruction_mov129();
-	void CryptonightR_instruction_mov130();
-	void CryptonightR_instruction_mov131();
-	void CryptonightR_instruction_mov132();
-	void CryptonightR_instruction_mov133();
-	void CryptonightR_instruction_mov134();
-	void CryptonightR_instruction_mov135();
-	void CryptonightR_instruction_mov136();
-	void CryptonightR_instruction_mov137();
-	void CryptonightR_instruction_mov138();
-	void CryptonightR_instruction_mov139();
-	void CryptonightR_instruction_mov140();
-	void CryptonightR_instruction_mov141();
-	void CryptonightR_instruction_mov142();
-	void CryptonightR_instruction_mov143();
-	void CryptonightR_instruction_mov144();
-	void CryptonightR_instruction_mov145();
-	void CryptonightR_instruction_mov146();
-	void CryptonightR_instruction_mov147();
-	void CryptonightR_instruction_mov148();
-	void CryptonightR_instruction_mov149();
-	void CryptonightR_instruction_mov150();
-	void CryptonightR_instruction_mov151();
-	void CryptonightR_instruction_mov152();
-	void CryptonightR_instruction_mov153();
-	void CryptonightR_instruction_mov154();
-	void CryptonightR_instruction_mov155();
-	void CryptonightR_instruction_mov156();
-	void CryptonightR_instruction_mov157();
-	void CryptonightR_instruction_mov158();
-	void CryptonightR_instruction_mov159();
-	void CryptonightR_instruction_mov160();
-	void CryptonightR_instruction_mov161();
-	void CryptonightR_instruction_mov162();
-	void CryptonightR_instruction_mov163();
-	void CryptonightR_instruction_mov164();
-	void CryptonightR_instruction_mov165();
-	void CryptonightR_instruction_mov166();
-	void CryptonightR_instruction_mov167();
-	void CryptonightR_instruction_mov168();
-	void CryptonightR_instruction_mov169();
-	void CryptonightR_instruction_mov170();
-	void CryptonightR_instruction_mov171();
-	void CryptonightR_instruction_mov172();
-	void CryptonightR_instruction_mov173();
-	void CryptonightR_instruction_mov174();
-	void CryptonightR_instruction_mov175();
-	void CryptonightR_instruction_mov176();
-	void CryptonightR_instruction_mov177();
-	void CryptonightR_instruction_mov178();
-	void CryptonightR_instruction_mov179();
-	void CryptonightR_instruction_mov180();
-	void CryptonightR_instruction_mov181();
-	void CryptonightR_instruction_mov182();
-	void CryptonightR_instruction_mov183();
-	void CryptonightR_instruction_mov184();
-	void CryptonightR_instruction_mov185();
-	void CryptonightR_instruction_mov186();
-	void CryptonightR_instruction_mov187();
-	void CryptonightR_instruction_mov188();
-	void CryptonightR_instruction_mov189();
-	void CryptonightR_instruction_mov190();
-	void CryptonightR_instruction_mov191();
-	void CryptonightR_instruction_mov192();
-	void CryptonightR_instruction_mov193();
-	void CryptonightR_instruction_mov194();
-	void CryptonightR_instruction_mov195();
-	void CryptonightR_instruction_mov196();
-	void CryptonightR_instruction_mov197();
-	void CryptonightR_instruction_mov198();
-	void CryptonightR_instruction_mov199();
-	void CryptonightR_instruction_mov200();
-	void CryptonightR_instruction_mov201();
-	void CryptonightR_instruction_mov202();
-	void CryptonightR_instruction_mov203();
-	void CryptonightR_instruction_mov204();
-	void CryptonightR_instruction_mov205();
-	void CryptonightR_instruction_mov206();
-	void CryptonightR_instruction_mov207();
-	void CryptonightR_instruction_mov208();
-	void CryptonightR_instruction_mov209();
-	void CryptonightR_instruction_mov210();
-	void CryptonightR_instruction_mov211();
-	void CryptonightR_instruction_mov212();
-	void CryptonightR_instruction_mov213();
-	void CryptonightR_instruction_mov214();
-	void CryptonightR_instruction_mov215();
-	void CryptonightR_instruction_mov216();
-	void CryptonightR_instruction_mov217();
-	void CryptonightR_instruction_mov218();
-	void CryptonightR_instruction_mov219();
-	void CryptonightR_instruction_mov220();
-	void CryptonightR_instruction_mov221();
-	void CryptonightR_instruction_mov222();
-	void CryptonightR_instruction_mov223();
-	void CryptonightR_instruction_mov224();
-	void CryptonightR_instruction_mov225();
-	void CryptonightR_instruction_mov226();
-	void CryptonightR_instruction_mov227();
-	void CryptonightR_instruction_mov228();
-	void CryptonightR_instruction_mov229();
-	void CryptonightR_instruction_mov230();
-	void CryptonightR_instruction_mov231();
-	void CryptonightR_instruction_mov232();
-	void CryptonightR_instruction_mov233();
-	void CryptonightR_instruction_mov234();
-	void CryptonightR_instruction_mov235();
-	void CryptonightR_instruction_mov236();
-	void CryptonightR_instruction_mov237();
-	void CryptonightR_instruction_mov238();
-	void CryptonightR_instruction_mov239();
-	void CryptonightR_instruction_mov240();
-	void CryptonightR_instruction_mov241();
-	void CryptonightR_instruction_mov242();
-	void CryptonightR_instruction_mov243();
-	void CryptonightR_instruction_mov244();
-	void CryptonightR_instruction_mov245();
-	void CryptonightR_instruction_mov246();
-	void CryptonightR_instruction_mov247();
-	void CryptonightR_instruction_mov248();
-	void CryptonightR_instruction_mov249();
-	void CryptonightR_instruction_mov250();
-	void CryptonightR_instruction_mov251();
-	void CryptonightR_instruction_mov252();
-	void CryptonightR_instruction_mov253();
-	void CryptonightR_instruction_mov254();
-	void CryptonightR_instruction_mov255();
-	void CryptonightR_instruction_mov256();
-}
-
-const void_func instructions[257] = {
-	CryptonightR_instruction0,
-	CryptonightR_instruction1,
-	CryptonightR_instruction2,
-	CryptonightR_instruction3,
-	CryptonightR_instruction4,
-	CryptonightR_instruction5,
-	CryptonightR_instruction6,
-	CryptonightR_instruction7,
-	CryptonightR_instruction8,
-	CryptonightR_instruction9,
-	CryptonightR_instruction10,
-	CryptonightR_instruction11,
-	CryptonightR_instruction12,
-	CryptonightR_instruction13,
-	CryptonightR_instruction14,
-	CryptonightR_instruction15,
-	CryptonightR_instruction16,
-	CryptonightR_instruction17,
-	CryptonightR_instruction18,
-	CryptonightR_instruction19,
-	CryptonightR_instruction20,
-	CryptonightR_instruction21,
-	CryptonightR_instruction22,
-	CryptonightR_instruction23,
-	CryptonightR_instruction24,
-	CryptonightR_instruction25,
-	CryptonightR_instruction26,
-	CryptonightR_instruction27,
-	CryptonightR_instruction28,
-	CryptonightR_instruction29,
-	CryptonightR_instruction30,
-	CryptonightR_instruction31,
-	CryptonightR_instruction32,
-	CryptonightR_instruction33,
-	CryptonightR_instruction34,
-	CryptonightR_instruction35,
-	CryptonightR_instruction36,
-	CryptonightR_instruction37,
-	CryptonightR_instruction38,
-	CryptonightR_instruction39,
-	CryptonightR_instruction40,
-	CryptonightR_instruction41,
-	CryptonightR_instruction42,
-	CryptonightR_instruction43,
-	CryptonightR_instruction44,
-	CryptonightR_instruction45,
-	CryptonightR_instruction46,
-	CryptonightR_instruction47,
-	CryptonightR_instruction48,
-	CryptonightR_instruction49,
-	CryptonightR_instruction50,
-	CryptonightR_instruction51,
-	CryptonightR_instruction52,
-	CryptonightR_instruction53,
-	CryptonightR_instruction54,
-	CryptonightR_instruction55,
-	CryptonightR_instruction56,
-	CryptonightR_instruction57,
-	CryptonightR_instruction58,
-	CryptonightR_instruction59,
-	CryptonightR_instruction60,
-	CryptonightR_instruction61,
-	CryptonightR_instruction62,
-	CryptonightR_instruction63,
-	CryptonightR_instruction64,
-	CryptonightR_instruction65,
-	CryptonightR_instruction66,
-	CryptonightR_instruction67,
-	CryptonightR_instruction68,
-	CryptonightR_instruction69,
-	CryptonightR_instruction70,
-	CryptonightR_instruction71,
-	CryptonightR_instruction72,
-	CryptonightR_instruction73,
-	CryptonightR_instruction74,
-	CryptonightR_instruction75,
-	CryptonightR_instruction76,
-	CryptonightR_instruction77,
-	CryptonightR_instruction78,
-	CryptonightR_instruction79,
-	CryptonightR_instruction80,
-	CryptonightR_instruction81,
-	CryptonightR_instruction82,
-	CryptonightR_instruction83,
-	CryptonightR_instruction84,
-	CryptonightR_instruction85,
-	CryptonightR_instruction86,
-	CryptonightR_instruction87,
-	CryptonightR_instruction88,
-	CryptonightR_instruction89,
-	CryptonightR_instruction90,
-	CryptonightR_instruction91,
-	CryptonightR_instruction92,
-	CryptonightR_instruction93,
-	CryptonightR_instruction94,
-	CryptonightR_instruction95,
-	CryptonightR_instruction96,
-	CryptonightR_instruction97,
-	CryptonightR_instruction98,
-	CryptonightR_instruction99,
-	CryptonightR_instruction100,
-	CryptonightR_instruction101,
-	CryptonightR_instruction102,
-	CryptonightR_instruction103,
-	CryptonightR_instruction104,
-	CryptonightR_instruction105,
-	CryptonightR_instruction106,
-	CryptonightR_instruction107,
-	CryptonightR_instruction108,
-	CryptonightR_instruction109,
-	CryptonightR_instruction110,
-	CryptonightR_instruction111,
-	CryptonightR_instruction112,
-	CryptonightR_instruction113,
-	CryptonightR_instruction114,
-	CryptonightR_instruction115,
-	CryptonightR_instruction116,
-	CryptonightR_instruction117,
-	CryptonightR_instruction118,
-	CryptonightR_instruction119,
-	CryptonightR_instruction120,
-	CryptonightR_instruction121,
-	CryptonightR_instruction122,
-	CryptonightR_instruction123,
-	CryptonightR_instruction124,
-	CryptonightR_instruction125,
-	CryptonightR_instruction126,
-	CryptonightR_instruction127,
-	CryptonightR_instruction128,
-	CryptonightR_instruction129,
-	CryptonightR_instruction130,
-	CryptonightR_instruction131,
-	CryptonightR_instruction132,
-	CryptonightR_instruction133,
-	CryptonightR_instruction134,
-	CryptonightR_instruction135,
-	CryptonightR_instruction136,
-	CryptonightR_instruction137,
-	CryptonightR_instruction138,
-	CryptonightR_instruction139,
-	CryptonightR_instruction140,
-	CryptonightR_instruction141,
-	CryptonightR_instruction142,
-	CryptonightR_instruction143,
-	CryptonightR_instruction144,
-	CryptonightR_instruction145,
-	CryptonightR_instruction146,
-	CryptonightR_instruction147,
-	CryptonightR_instruction148,
-	CryptonightR_instruction149,
-	CryptonightR_instruction150,
-	CryptonightR_instruction151,
-	CryptonightR_instruction152,
-	CryptonightR_instruction153,
-	CryptonightR_instruction154,
-	CryptonightR_instruction155,
-	CryptonightR_instruction156,
-	CryptonightR_instruction157,
-	CryptonightR_instruction158,
-	CryptonightR_instruction159,
-	CryptonightR_instruction160,
-	CryptonightR_instruction161,
-	CryptonightR_instruction162,
-	CryptonightR_instruction163,
-	CryptonightR_instruction164,
-	CryptonightR_instruction165,
-	CryptonightR_instruction166,
-	CryptonightR_instruction167,
-	CryptonightR_instruction168,
-	CryptonightR_instruction169,
-	CryptonightR_instruction170,
-	CryptonightR_instruction171,
-	CryptonightR_instruction172,
-	CryptonightR_instruction173,
-	CryptonightR_instruction174,
-	CryptonightR_instruction175,
-	CryptonightR_instruction176,
-	CryptonightR_instruction177,
-	CryptonightR_instruction178,
-	CryptonightR_instruction179,
-	CryptonightR_instruction180,
-	CryptonightR_instruction181,
-	CryptonightR_instruction182,
-	CryptonightR_instruction183,
-	CryptonightR_instruction184,
-	CryptonightR_instruction185,
-	CryptonightR_instruction186,
-	CryptonightR_instruction187,
-	CryptonightR_instruction188,
-	CryptonightR_instruction189,
-	CryptonightR_instruction190,
-	CryptonightR_instruction191,
-	CryptonightR_instruction192,
-	CryptonightR_instruction193,
-	CryptonightR_instruction194,
-	CryptonightR_instruction195,
-	CryptonightR_instruction196,
-	CryptonightR_instruction197,
-	CryptonightR_instruction198,
-	CryptonightR_instruction199,
-	CryptonightR_instruction200,
-	CryptonightR_instruction201,
-	CryptonightR_instruction202,
-	CryptonightR_instruction203,
-	CryptonightR_instruction204,
-	CryptonightR_instruction205,
-	CryptonightR_instruction206,
-	CryptonightR_instruction207,
-	CryptonightR_instruction208,
-	CryptonightR_instruction209,
-	CryptonightR_instruction210,
-	CryptonightR_instruction211,
-	CryptonightR_instruction212,
-	CryptonightR_instruction213,
-	CryptonightR_instruction214,
-	CryptonightR_instruction215,
-	CryptonightR_instruction216,
-	CryptonightR_instruction217,
-	CryptonightR_instruction218,
-	CryptonightR_instruction219,
-	CryptonightR_instruction220,
-	CryptonightR_instruction221,
-	CryptonightR_instruction222,
-	CryptonightR_instruction223,
-	CryptonightR_instruction224,
-	CryptonightR_instruction225,
-	CryptonightR_instruction226,
-	CryptonightR_instruction227,
-	CryptonightR_instruction228,
-	CryptonightR_instruction229,
-	CryptonightR_instruction230,
-	CryptonightR_instruction231,
-	CryptonightR_instruction232,
-	CryptonightR_instruction233,
-	CryptonightR_instruction234,
-	CryptonightR_instruction235,
-	CryptonightR_instruction236,
-	CryptonightR_instruction237,
-	CryptonightR_instruction238,
-	CryptonightR_instruction239,
-	CryptonightR_instruction240,
-	CryptonightR_instruction241,
-	CryptonightR_instruction242,
-	CryptonightR_instruction243,
-	CryptonightR_instruction244,
-	CryptonightR_instruction245,
-	CryptonightR_instruction246,
-	CryptonightR_instruction247,
-	CryptonightR_instruction248,
-	CryptonightR_instruction249,
-	CryptonightR_instruction250,
-	CryptonightR_instruction251,
-	CryptonightR_instruction252,
-	CryptonightR_instruction253,
-	CryptonightR_instruction254,
-	CryptonightR_instruction255,
-	CryptonightR_instruction256,
-};
-
-const void_func instructions_mov[257] = {
-	CryptonightR_instruction_mov0,
-	CryptonightR_instruction_mov1,
-	CryptonightR_instruction_mov2,
-	CryptonightR_instruction_mov3,
-	CryptonightR_instruction_mov4,
-	CryptonightR_instruction_mov5,
-	CryptonightR_instruction_mov6,
-	CryptonightR_instruction_mov7,
-	CryptonightR_instruction_mov8,
-	CryptonightR_instruction_mov9,
-	CryptonightR_instruction_mov10,
-	CryptonightR_instruction_mov11,
-	CryptonightR_instruction_mov12,
-	CryptonightR_instruction_mov13,
-	CryptonightR_instruction_mov14,
-	CryptonightR_instruction_mov15,
-	CryptonightR_instruction_mov16,
-	CryptonightR_instruction_mov17,
-	CryptonightR_instruction_mov18,
-	CryptonightR_instruction_mov19,
-	CryptonightR_instruction_mov20,
-	CryptonightR_instruction_mov21,
-	CryptonightR_instruction_mov22,
-	CryptonightR_instruction_mov23,
-	CryptonightR_instruction_mov24,
-	CryptonightR_instruction_mov25,
-	CryptonightR_instruction_mov26,
-	CryptonightR_instruction_mov27,
-	CryptonightR_instruction_mov28,
-	CryptonightR_instruction_mov29,
-	CryptonightR_instruction_mov30,
-	CryptonightR_instruction_mov31,
-	CryptonightR_instruction_mov32,
-	CryptonightR_instruction_mov33,
-	CryptonightR_instruction_mov34,
-	CryptonightR_instruction_mov35,
-	CryptonightR_instruction_mov36,
-	CryptonightR_instruction_mov37,
-	CryptonightR_instruction_mov38,
-	CryptonightR_instruction_mov39,
-	CryptonightR_instruction_mov40,
-	CryptonightR_instruction_mov41,
-	CryptonightR_instruction_mov42,
-	CryptonightR_instruction_mov43,
-	CryptonightR_instruction_mov44,
-	CryptonightR_instruction_mov45,
-	CryptonightR_instruction_mov46,
-	CryptonightR_instruction_mov47,
-	CryptonightR_instruction_mov48,
-	CryptonightR_instruction_mov49,
-	CryptonightR_instruction_mov50,
-	CryptonightR_instruction_mov51,
-	CryptonightR_instruction_mov52,
-	CryptonightR_instruction_mov53,
-	CryptonightR_instruction_mov54,
-	CryptonightR_instruction_mov55,
-	CryptonightR_instruction_mov56,
-	CryptonightR_instruction_mov57,
-	CryptonightR_instruction_mov58,
-	CryptonightR_instruction_mov59,
-	CryptonightR_instruction_mov60,
-	CryptonightR_instruction_mov61,
-	CryptonightR_instruction_mov62,
-	CryptonightR_instruction_mov63,
-	CryptonightR_instruction_mov64,
-	CryptonightR_instruction_mov65,
-	CryptonightR_instruction_mov66,
-	CryptonightR_instruction_mov67,
-	CryptonightR_instruction_mov68,
-	CryptonightR_instruction_mov69,
-	CryptonightR_instruction_mov70,
-	CryptonightR_instruction_mov71,
-	CryptonightR_instruction_mov72,
-	CryptonightR_instruction_mov73,
-	CryptonightR_instruction_mov74,
-	CryptonightR_instruction_mov75,
-	CryptonightR_instruction_mov76,
-	CryptonightR_instruction_mov77,
-	CryptonightR_instruction_mov78,
-	CryptonightR_instruction_mov79,
-	CryptonightR_instruction_mov80,
-	CryptonightR_instruction_mov81,
-	CryptonightR_instruction_mov82,
-	CryptonightR_instruction_mov83,
-	CryptonightR_instruction_mov84,
-	CryptonightR_instruction_mov85,
-	CryptonightR_instruction_mov86,
-	CryptonightR_instruction_mov87,
-	CryptonightR_instruction_mov88,
-	CryptonightR_instruction_mov89,
-	CryptonightR_instruction_mov90,
-	CryptonightR_instruction_mov91,
-	CryptonightR_instruction_mov92,
-	CryptonightR_instruction_mov93,
-	CryptonightR_instruction_mov94,
-	CryptonightR_instruction_mov95,
-	CryptonightR_instruction_mov96,
-	CryptonightR_instruction_mov97,
-	CryptonightR_instruction_mov98,
-	CryptonightR_instruction_mov99,
-	CryptonightR_instruction_mov100,
-	CryptonightR_instruction_mov101,
-	CryptonightR_instruction_mov102,
-	CryptonightR_instruction_mov103,
-	CryptonightR_instruction_mov104,
-	CryptonightR_instruction_mov105,
-	CryptonightR_instruction_mov106,
-	CryptonightR_instruction_mov107,
-	CryptonightR_instruction_mov108,
-	CryptonightR_instruction_mov109,
-	CryptonightR_instruction_mov110,
-	CryptonightR_instruction_mov111,
-	CryptonightR_instruction_mov112,
-	CryptonightR_instruction_mov113,
-	CryptonightR_instruction_mov114,
-	CryptonightR_instruction_mov115,
-	CryptonightR_instruction_mov116,
-	CryptonightR_instruction_mov117,
-	CryptonightR_instruction_mov118,
-	CryptonightR_instruction_mov119,
-	CryptonightR_instruction_mov120,
-	CryptonightR_instruction_mov121,
-	CryptonightR_instruction_mov122,
-	CryptonightR_instruction_mov123,
-	CryptonightR_instruction_mov124,
-	CryptonightR_instruction_mov125,
-	CryptonightR_instruction_mov126,
-	CryptonightR_instruction_mov127,
-	CryptonightR_instruction_mov128,
-	CryptonightR_instruction_mov129,
-	CryptonightR_instruction_mov130,
-	CryptonightR_instruction_mov131,
-	CryptonightR_instruction_mov132,
-	CryptonightR_instruction_mov133,
-	CryptonightR_instruction_mov134,
-	CryptonightR_instruction_mov135,
-	CryptonightR_instruction_mov136,
-	CryptonightR_instruction_mov137,
-	CryptonightR_instruction_mov138,
-	CryptonightR_instruction_mov139,
-	CryptonightR_instruction_mov140,
-	CryptonightR_instruction_mov141,
-	CryptonightR_instruction_mov142,
-	CryptonightR_instruction_mov143,
-	CryptonightR_instruction_mov144,
-	CryptonightR_instruction_mov145,
-	CryptonightR_instruction_mov146,
-	CryptonightR_instruction_mov147,
-	CryptonightR_instruction_mov148,
-	CryptonightR_instruction_mov149,
-	CryptonightR_instruction_mov150,
-	CryptonightR_instruction_mov151,
-	CryptonightR_instruction_mov152,
-	CryptonightR_instruction_mov153,
-	CryptonightR_instruction_mov154,
-	CryptonightR_instruction_mov155,
-	CryptonightR_instruction_mov156,
-	CryptonightR_instruction_mov157,
-	CryptonightR_instruction_mov158,
-	CryptonightR_instruction_mov159,
-	CryptonightR_instruction_mov160,
-	CryptonightR_instruction_mov161,
-	CryptonightR_instruction_mov162,
-	CryptonightR_instruction_mov163,
-	CryptonightR_instruction_mov164,
-	CryptonightR_instruction_mov165,
-	CryptonightR_instruction_mov166,
-	CryptonightR_instruction_mov167,
-	CryptonightR_instruction_mov168,
-	CryptonightR_instruction_mov169,
-	CryptonightR_instruction_mov170,
-	CryptonightR_instruction_mov171,
-	CryptonightR_instruction_mov172,
-	CryptonightR_instruction_mov173,
-	CryptonightR_instruction_mov174,
-	CryptonightR_instruction_mov175,
-	CryptonightR_instruction_mov176,
-	CryptonightR_instruction_mov177,
-	CryptonightR_instruction_mov178,
-	CryptonightR_instruction_mov179,
-	CryptonightR_instruction_mov180,
-	CryptonightR_instruction_mov181,
-	CryptonightR_instruction_mov182,
-	CryptonightR_instruction_mov183,
-	CryptonightR_instruction_mov184,
-	CryptonightR_instruction_mov185,
-	CryptonightR_instruction_mov186,
-	CryptonightR_instruction_mov187,
-	CryptonightR_instruction_mov188,
-	CryptonightR_instruction_mov189,
-	CryptonightR_instruction_mov190,
-	CryptonightR_instruction_mov191,
-	CryptonightR_instruction_mov192,
-	CryptonightR_instruction_mov193,
-	CryptonightR_instruction_mov194,
-	CryptonightR_instruction_mov195,
-	CryptonightR_instruction_mov196,
-	CryptonightR_instruction_mov197,
-	CryptonightR_instruction_mov198,
-	CryptonightR_instruction_mov199,
-	CryptonightR_instruction_mov200,
-	CryptonightR_instruction_mov201,
-	CryptonightR_instruction_mov202,
-	CryptonightR_instruction_mov203,
-	CryptonightR_instruction_mov204,
-	CryptonightR_instruction_mov205,
-	CryptonightR_instruction_mov206,
-	CryptonightR_instruction_mov207,
-	CryptonightR_instruction_mov208,
-	CryptonightR_instruction_mov209,
-	CryptonightR_instruction_mov210,
-	CryptonightR_instruction_mov211,
-	CryptonightR_instruction_mov212,
-	CryptonightR_instruction_mov213,
-	CryptonightR_instruction_mov214,
-	CryptonightR_instruction_mov215,
-	CryptonightR_instruction_mov216,
-	CryptonightR_instruction_mov217,
-	CryptonightR_instruction_mov218,
-	CryptonightR_instruction_mov219,
-	CryptonightR_instruction_mov220,
-	CryptonightR_instruction_mov221,
-	CryptonightR_instruction_mov222,
-	CryptonightR_instruction_mov223,
-	CryptonightR_instruction_mov224,
-	CryptonightR_instruction_mov225,
-	CryptonightR_instruction_mov226,
-	CryptonightR_instruction_mov227,
-	CryptonightR_instruction_mov228,
-	CryptonightR_instruction_mov229,
-	CryptonightR_instruction_mov230,
-	CryptonightR_instruction_mov231,
-	CryptonightR_instruction_mov232,
-	CryptonightR_instruction_mov233,
-	CryptonightR_instruction_mov234,
-	CryptonightR_instruction_mov235,
-	CryptonightR_instruction_mov236,
-	CryptonightR_instruction_mov237,
-	CryptonightR_instruction_mov238,
-	CryptonightR_instruction_mov239,
-	CryptonightR_instruction_mov240,
-	CryptonightR_instruction_mov241,
-	CryptonightR_instruction_mov242,
-	CryptonightR_instruction_mov243,
-	CryptonightR_instruction_mov244,
-	CryptonightR_instruction_mov245,
-	CryptonightR_instruction_mov246,
-	CryptonightR_instruction_mov247,
-	CryptonightR_instruction_mov248,
-	CryptonightR_instruction_mov249,
-	CryptonightR_instruction_mov250,
-	CryptonightR_instruction_mov251,
-	CryptonightR_instruction_mov252,
-	CryptonightR_instruction_mov253,
-	CryptonightR_instruction_mov254,
-	CryptonightR_instruction_mov255,
-	CryptonightR_instruction_mov256,
-};
diff --git a/src/crypto/asm/CryptonightR_template.inc b/src/crypto/asm/CryptonightR_template.inc
deleted file mode 100644
index 61b6b985..00000000
--- a/src/crypto/asm/CryptonightR_template.inc
+++ /dev/null
@@ -1,536 +0,0 @@
-PUBLIC FN_PREFIX(CryptonightR_template_part1)
-PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
-PUBLIC FN_PREFIX(CryptonightR_template_part2)
-PUBLIC FN_PREFIX(CryptonightR_template_part3)
-PUBLIC FN_PREFIX(CryptonightR_template_end)
-PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
-PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
-PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
-PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
-PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
-PUBLIC FN_PREFIX(CryptonightR_template_double_end)
-
-ALIGN(64)
-FN_PREFIX(CryptonightR_template_part1):
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	r10
-	push	r11
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	push	rdi
-	sub	rsp, 64
-	mov	r12, rcx
-	mov	r8, QWORD PTR [r12+32]
-	mov	rdx, r12
-	xor	r8, QWORD PTR [r12]
-	mov	r15, QWORD PTR [r12+40]
-	mov	r9, r8
-	xor	r15, QWORD PTR [r12+8]
-	mov	r11, QWORD PTR [r12+224]
-	mov	r12, QWORD PTR [r12+56]
-	xor	r12, QWORD PTR [rdx+24]
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movq	xmm0, r12
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	movaps	XMMWORD PTR [rsp], xmm9
-	mov	r12, QWORD PTR [rdx+88]
-	xor	r12, QWORD PTR [rdx+72]
-	movq	xmm6, rax
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm6, xmm0
-	and	r9d, 2097136
-	movq	xmm0, r12
-	movq	xmm7, rax
-	punpcklqdq xmm7, xmm0
-	mov r10d, r9d
-	movq	xmm9, rsp
-	mov rsp, r8
-	mov	r8d, 524288
-
-	mov	ebx, [rdx+96]
-	mov	esi, [rdx+100]
-	mov	edi, [rdx+104]
-	mov	ebp, [rdx+108]
-
-	ALIGN(64)
-FN_PREFIX(CryptonightR_template_mainloop):
-	movdqa	xmm5, XMMWORD PTR [r9+r11]
-	movq	xmm0, r15
-	movq	xmm4, rsp
-	punpcklqdq xmm4, xmm0
-	lea	rdx, QWORD PTR [r9+r11]
-
-	aesenc	xmm5, xmm4
-
-	mov	r13d, r9d
-	mov	eax, r9d
-	xor	r9d, 48
-	xor	r13d, 16
-	xor	eax, 32
-	movdqu	xmm0, XMMWORD PTR [r9+r11]
-	movaps xmm3, xmm0
-	movdqu	xmm2, XMMWORD PTR [r13+r11]
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	pxor xmm0, xmm2
-	pxor xmm5, xmm1
-	pxor xmm5, xmm0
-
-	movq	r12, xmm5
-	movd	r10d, xmm5
-	and	r10d, 2097136
-
-	paddq	xmm3, xmm7
-	paddq	xmm2, xmm6
-	paddq	xmm1, xmm4
-	movdqu	XMMWORD PTR [r13+r11], xmm3
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movdqu	XMMWORD PTR [r9+r11], xmm1
-
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [rdx], xmm0
-
-	lea	r13d, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	r13, rdx
-
-	movd eax, xmm6
-	movd edx, xmm7
-	pextrd r9d, xmm7, 2
-
-	xor	r13, QWORD PTR [r10+r11]
-	mov	r14, QWORD PTR [r10+r11+8]
-
-FN_PREFIX(CryptonightR_template_part2):
-	lea	rcx, [r10+r11]
-
-	mov eax, edi
-	mov edx, ebp
-	shl rdx, 32
-	or rax, rdx
-	xor rsp, rax
-
-	mov eax, ebx
-	mov edx, esi
-	shl rdx, 32
-	or rax, rdx
-	xor r15, rax
-
-	mov	rax, r13
-	mul	r12
-	add	r15, rax
-	add	rsp, rdx
-
-	mov	r9d, r10d
-	mov	r12d, r10d
-	xor	r9d, 16
-	xor	r12d, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [r12+r11]
-	movaps xmm3, xmm1
-	movdqa	xmm2, XMMWORD PTR [r9+r11]
-	movdqa	xmm0, XMMWORD PTR [r10+r11]
-	pxor xmm1, xmm2
-	pxor xmm5, xmm0
-	pxor xmm5, xmm1
-	paddq	xmm3, xmm4
-	paddq	xmm2, xmm6
-	paddq	xmm0, xmm7
-	movdqu	XMMWORD PTR [r9+r11], xmm0
-	movdqu	XMMWORD PTR [r12+r11], xmm2
-	movdqu	XMMWORD PTR [r10+r11], xmm3
-
-	movdqa	xmm7, xmm6
-	mov	QWORD PTR [rcx], rsp
-	xor	rsp, r13
-	mov	r9d, esp
-	mov	QWORD PTR [rcx+8], r15
-	and	r9d, 2097136
-	xor	r15, r14
-	movdqa	xmm6, xmm5
-	dec	r8d
-	jnz	FN_PREFIX(CryptonightR_template_mainloop)
-
-FN_PREFIX(CryptonightR_template_part3):
-	movq	rsp, xmm9
-
-	mov	rbx, QWORD PTR [rsp+136]
-	mov	rbp, QWORD PTR [rsp+144]
-	mov	rsi, QWORD PTR [rsp+152]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+16]
-	movaps	xmm9, XMMWORD PTR [rsp]
-	add	rsp, 64
-	pop	rdi
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	r11
-	pop	r10
-	ret	0
-FN_PREFIX(CryptonightR_template_end):
-
-ALIGN(64)
-FN_PREFIX(CryptonightR_template_double_part1):
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+24], rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 320
-	mov	r14, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r14, QWORD PTR [rcx]
-	mov	r12, QWORD PTR [rcx+40]
-	mov	ebx, r14d
-	mov	rsi, QWORD PTR [rcx+224]
-	and	ebx, 2097136
-	xor	r12, QWORD PTR [rcx+8]
-	mov	rcx, QWORD PTR [rcx+56]
-	xor	rcx, QWORD PTR [r8+24]
-	mov	rax, QWORD PTR [r8+48]
-	xor	rax, QWORD PTR [r8+16]
-	mov	r15, QWORD PTR [rdx+32]
-	xor	r15, QWORD PTR [rdx]
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r8+88]
-	xor	rcx, QWORD PTR [r8+72]
-	mov	r13, QWORD PTR [rdx+40]
-	mov	rdi, QWORD PTR [rdx+224]
-	xor	r13, QWORD PTR [rdx+8]
-	movaps	XMMWORD PTR [rsp+160], xmm6
-	movaps	XMMWORD PTR [rsp+176], xmm7
-	movaps	XMMWORD PTR [rsp+192], xmm8
-	movaps	XMMWORD PTR [rsp+208], xmm9
-	movaps	XMMWORD PTR [rsp+224], xmm10
-	movaps	XMMWORD PTR [rsp+240], xmm11
-	movaps	XMMWORD PTR [rsp+256], xmm12
-	movaps	XMMWORD PTR [rsp+272], xmm13
-	movaps	XMMWORD PTR [rsp+288], xmm14
-	movaps	XMMWORD PTR [rsp+304], xmm15
-	movq	xmm7, rax
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-
-	movaps xmm1, XMMWORD PTR [rdx+96]
-	movaps xmm2, XMMWORD PTR [r8+96]
-	movaps XMMWORD PTR [rsp], xmm1
-	movaps XMMWORD PTR [rsp+16], xmm2
-
-	mov	r8d, r15d
-	punpcklqdq xmm7, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+56]
-	xor	rcx, QWORD PTR [rdx+24]
-	movq	xmm9, rax
-	mov	QWORD PTR [rsp+128], rsi
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	punpcklqdq xmm9, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+88]
-	xor	rcx, QWORD PTR [rdx+72]
-	movq	xmm8, rax
-	mov	QWORD PTR [rsp+136], rdi
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm8, xmm0
-	and	r8d, 2097136
-	movq	xmm0, rcx
-	mov	r11d, 524288
-	movq	xmm10, rax
-	punpcklqdq xmm10, xmm0
-	
-	movq xmm14, QWORD PTR [rsp+128]
-	movq xmm15, QWORD PTR [rsp+136]
-
-	ALIGN(64)
-FN_PREFIX(CryptonightR_template_double_mainloop):
-	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
-	movq	xmm0, r12
-	mov	ecx, ebx
-	movq	xmm3, r14
-	punpcklqdq xmm3, xmm0
-	xor	ebx, 16
-	aesenc	xmm6, xmm3
-	movq	xmm4, r15
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	pxor	xmm6, xmm0
-	xor	ebx, 48
-	paddq	xmm0, xmm7
-	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
-	pxor	xmm6, xmm1
-	movdqu	XMMWORD PTR [rbx+rsi], xmm0
-	paddq	xmm1, xmm3
-	xor	ebx, 16
-	mov	eax, ebx
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	pxor	xmm6, xmm0
-	movq	rdx, xmm6
-	movdqu	XMMWORD PTR [rbx+rsi], xmm1
-	paddq	xmm0, xmm9
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [rcx+rsi], xmm0
-	mov	esi, edx
-	movdqu	xmm5, XMMWORD PTR [r8+rdi]
-	and	esi, 2097136
-	mov	ecx, r8d
-	movq	xmm0, r13
-	punpcklqdq xmm4, xmm0
-	xor	r8d, 16
-	aesenc	xmm5, xmm4
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	pxor	xmm5, xmm0
-	xor	r8d, 48
-	paddq	xmm0, xmm8
-	movdqu	xmm1, XMMWORD PTR [r8+rdi]
-	pxor	xmm5, xmm1
-	movdqu	XMMWORD PTR [r8+rdi], xmm0
-	paddq	xmm1, xmm4
-	xor	r8d, 16
-	mov	eax, r8d
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	pxor	xmm5, xmm0
-	movdqu	XMMWORD PTR [r8+rdi], xmm1
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rdi], xmm0
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm8
-	movdqu	XMMWORD PTR [rcx+rdi], xmm0
-	movq	rdi, xmm5
-	movq	rcx, xmm14
-	mov	ebp, edi
-	mov	r8, QWORD PTR [rcx+rsi]
-	mov	r10, QWORD PTR [rcx+rsi+8]
-	lea	r9, QWORD PTR [rcx+rsi]
-	xor	esi, 16
-
-	movq xmm0, rsp
-	movq xmm1, rsi
-	movq xmm2, rdi
-	movq xmm11, rbp
-	movq xmm12, r15
-	movq xmm13, rdx
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp+16]
-	mov esi, DWORD PTR [rsp+20]
-	mov edi, DWORD PTR [rsp+24]
-	mov ebp, DWORD PTR [rsp+28]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-	xor r8, rax
-
-	movd esp, xmm3
-	pextrd r15d, xmm3, 2
-	movd eax, xmm7
-	movd edx, xmm9
-	pextrd r9d, xmm9, 2
-
-FN_PREFIX(CryptonightR_template_double_part2):
-
-	mov eax, edi
-	mov edx, ebp
-	shl rdx, 32
-	or rax, rdx
-	xor r14, rax
-
-	mov eax, ebx
-	mov edx, esi
-	shl rdx, 32
-	or rax, rdx
-	xor r12, rax
-
-	movq rsp, xmm0
-	mov DWORD PTR [rsp+16], ebx
-	mov DWORD PTR [rsp+20], esi
-	mov DWORD PTR [rsp+24], edi
-	mov DWORD PTR [rsp+28], ebp
-
-	movq rsi, xmm1
-	movq rdi, xmm2
-	movq rbp, xmm11
-	movq r15, xmm12
-	movq rdx, xmm13
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rbx, r8
-	mov	rax, r8
-	mul	rdx
-	and	ebp, 2097136
-	mov	r8, rax
-	movdqu	xmm1, XMMWORD PTR [rcx+rsi]
-	pxor	xmm6, xmm1
-	xor	esi, 48
-	paddq	xmm1, xmm7
-	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
-	pxor	xmm6, xmm2
-	paddq	xmm2, xmm3
-	movdqu	XMMWORD PTR [rsi+rcx], xmm1
-	xor	esi, 16
-	mov	eax, esi
-	mov	rsi, rcx
-	movdqu	xmm0, XMMWORD PTR [rax+rcx]
-	pxor	xmm6, xmm0
-	movdqu	XMMWORD PTR [rax+rcx], xmm2
-	paddq	xmm0, xmm9
-	add	r12, r8
-	xor	rax, 32
-	add	r14, rdx
-	movdqa	xmm9, xmm7
-	movdqa	xmm7, xmm6
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	mov	QWORD PTR [r9+8], r12
-	xor	r12, r10
-	mov	QWORD PTR [r9], r14
-	movq rcx, xmm15
-	xor	r14, rbx
-	mov	r10d, ebp
-	mov	ebx, r14d
-	xor	ebp, 16
-	and	ebx, 2097136
-	mov	r8, QWORD PTR [r10+rcx]
-	mov	r9, QWORD PTR [r10+rcx+8]
-
-	movq xmm0, rsp
-	movq xmm1, rbx
-	movq xmm2, rsi
-	movq xmm11, rdi
-	movq xmm12, rbp
-	movq xmm13, r15
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp]
-	mov esi, DWORD PTR [rsp+4]
-	mov edi, DWORD PTR [rsp+8]
-	mov ebp, DWORD PTR [rsp+12]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-
-	xor r8, rax
-	movq xmm3, r8
-
-	movd esp, xmm4
-	pextrd r15d, xmm4, 2
-	movd eax, xmm8
-	movd edx, xmm10
-	pextrd r9d, xmm10, 2
-
-FN_PREFIX(CryptonightR_template_double_part3):
-
-	movq r15, xmm13
-
-	mov eax, edi
-	mov edx, ebp
-	shl rdx, 32
-	or rax, rdx
-	xor r15, rax
-
-	mov eax, ebx
-	mov edx, esi
-	shl rdx, 32
-	or rax, rdx
-	xor r13, rax
-
-	movq rsp, xmm0
-	mov DWORD PTR [rsp], ebx
-	mov DWORD PTR [rsp+4], esi
-	mov DWORD PTR [rsp+8], edi
-	mov DWORD PTR [rsp+12], ebp
-
-	movq rbx, xmm1
-	movq rsi, xmm2
-	movq rdi, xmm11
-	movq rbp, xmm12
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rax, r8
-	mul	rdi
-	mov	rdi, rcx
-	mov	r8, rax
-	movdqu	xmm1, XMMWORD PTR [rbp+rcx]
-	pxor xmm5, xmm1
-	xor	ebp, 48
-	paddq	xmm1, xmm8
-	add	r13, r8
-	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
-	pxor xmm5, xmm2
-	add	r15, rdx
-	movdqu	XMMWORD PTR [rbp+rcx], xmm1
-	paddq	xmm2, xmm4
-	xor	ebp, 16
-	mov	eax, ebp
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
-	pxor xmm5, xmm0
-	movdqu	XMMWORD PTR [rbp+rcx], xmm2
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	movq rax, xmm3
-	movdqa	xmm10, xmm8
-	mov	QWORD PTR [r10+rcx], r15
-	movdqa	xmm8, xmm5
-	xor	r15, rax
-	mov	QWORD PTR [r10+rcx+8], r13
-	mov	r8d, r15d
-	xor	r13, r9
-	and	r8d, 2097136
-	dec r11d
-	jnz	FN_PREFIX(CryptonightR_template_double_mainloop)
-
-FN_PREFIX(CryptonightR_template_double_part4):
-
-	mov	rbx, QWORD PTR [rsp+400]
-	movaps	xmm6, XMMWORD PTR [rsp+160]
-	movaps	xmm7, XMMWORD PTR [rsp+176]
-	movaps	xmm8, XMMWORD PTR [rsp+192]
-	movaps	xmm9, XMMWORD PTR [rsp+208]
-	movaps	xmm10, XMMWORD PTR [rsp+224]
-	movaps	xmm11, XMMWORD PTR [rsp+240]
-	movaps	xmm12, XMMWORD PTR [rsp+256]
-	movaps	xmm13, XMMWORD PTR [rsp+272]
-	movaps	xmm14, XMMWORD PTR [rsp+288]
-	movaps	xmm15, XMMWORD PTR [rsp+304]
-	add	rsp, 320
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	ret	0
-FN_PREFIX(CryptonightR_template_double_end):
diff --git a/src/crypto/asm/CryptonightR_template_win.inc b/src/crypto/asm/CryptonightR_template_win.inc
deleted file mode 100644
index 1bb89eb1..00000000
--- a/src/crypto/asm/CryptonightR_template_win.inc
+++ /dev/null
@@ -1,536 +0,0 @@
-PUBLIC CryptonightR_template_part1
-PUBLIC CryptonightR_template_mainloop
-PUBLIC CryptonightR_template_part2
-PUBLIC CryptonightR_template_part3
-PUBLIC CryptonightR_template_end
-PUBLIC CryptonightR_template_double_part1
-PUBLIC CryptonightR_template_double_mainloop
-PUBLIC CryptonightR_template_double_part2
-PUBLIC CryptonightR_template_double_part3
-PUBLIC CryptonightR_template_double_part4
-PUBLIC CryptonightR_template_double_end
-
-ALIGN(64)
-CryptonightR_template_part1:
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	r10
-	push	r11
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	push	rdi
-	sub	rsp, 64
-	mov	r12, rcx
-	mov	r8, QWORD PTR [r12+32]
-	mov	rdx, r12
-	xor	r8, QWORD PTR [r12]
-	mov	r15, QWORD PTR [r12+40]
-	mov	r9, r8
-	xor	r15, QWORD PTR [r12+8]
-	mov	r11, QWORD PTR [r12+224]
-	mov	r12, QWORD PTR [r12+56]
-	xor	r12, QWORD PTR [rdx+24]
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movq	xmm0, r12
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	movaps	XMMWORD PTR [rsp], xmm9
-	mov	r12, QWORD PTR [rdx+88]
-	xor	r12, QWORD PTR [rdx+72]
-	movq	xmm6, rax
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm6, xmm0
-	and	r9d, 2097136
-	movq	xmm0, r12
-	movq	xmm7, rax
-	punpcklqdq xmm7, xmm0
-	mov r10d, r9d
-	movq	xmm9, rsp
-	mov rsp, r8
-	mov	r8d, 524288
-
-	mov	ebx, [rdx+96]
-	mov	esi, [rdx+100]
-	mov	edi, [rdx+104]
-	mov	ebp, [rdx+108]
-
-	ALIGN(64)
-CryptonightR_template_mainloop:
-	movdqa	xmm5, XMMWORD PTR [r9+r11]
-	movq	xmm0, r15
-	movq	xmm4, rsp
-	punpcklqdq xmm4, xmm0
-	lea	rdx, QWORD PTR [r9+r11]
-
-	aesenc	xmm5, xmm4
-
-	mov	r13d, r9d
-	mov	eax, r9d
-	xor	r9d, 48
-	xor	r13d, 16
-	xor	eax, 32
-	movdqu	xmm0, XMMWORD PTR [r9+r11]
-	movaps xmm3, xmm0
-	movdqu	xmm2, XMMWORD PTR [r13+r11]
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	pxor xmm0, xmm2
-	pxor xmm5, xmm1
-	pxor xmm5, xmm0
-
-	movq	r12, xmm5
-	movd	r10d, xmm5
-	and	r10d, 2097136
-
-	paddq	xmm3, xmm7
-	paddq	xmm2, xmm6
-	paddq	xmm1, xmm4
-	movdqu	XMMWORD PTR [r13+r11], xmm3
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movdqu	XMMWORD PTR [r9+r11], xmm1
-
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [rdx], xmm0
-
-	lea	r13d, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	r13, rdx
-
-	movd eax, xmm6
-	movd edx, xmm7
-	pextrd r9d, xmm7, 2
-
-	xor	r13, QWORD PTR [r10+r11]
-	mov	r14, QWORD PTR [r10+r11+8]
-
-CryptonightR_template_part2:
-	lea	rcx, [r10+r11]
-
-	mov eax, edi
-	mov edx, ebp
-	shl rdx, 32
-	or rax, rdx
-	xor rsp, rax
-
-	mov eax, ebx
-	mov edx, esi
-	shl rdx, 32
-	or rax, rdx
-	xor r15, rax
-
-	mov	rax, r13
-	mul	r12
-	add	r15, rax
-	add	rsp, rdx
-
-	mov	r9d, r10d
-	mov	r12d, r10d
-	xor	r9d, 16
-	xor	r12d, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [r12+r11]
-	movaps xmm3, xmm1
-	movdqa	xmm2, XMMWORD PTR [r9+r11]
-	movdqa	xmm0, XMMWORD PTR [r10+r11]
-	pxor xmm1, xmm2
-	pxor xmm5, xmm0
-	pxor xmm5, xmm1
-	paddq	xmm3, xmm4
-	paddq	xmm2, xmm6
-	paddq	xmm0, xmm7
-	movdqu	XMMWORD PTR [r9+r11], xmm0
-	movdqu	XMMWORD PTR [r12+r11], xmm2
-	movdqu	XMMWORD PTR [r10+r11], xmm3
-
-	movdqa	xmm7, xmm6
-	mov	QWORD PTR [rcx], rsp
-	xor	rsp, r13
-	mov	r9d, esp
-	mov	QWORD PTR [rcx+8], r15
-	and	r9d, 2097136
-	xor	r15, r14
-	movdqa	xmm6, xmm5
-	dec	r8d
-	jnz	CryptonightR_template_mainloop
-
-CryptonightR_template_part3:
-	movq	rsp, xmm9
-
-	mov	rbx, QWORD PTR [rsp+136]
-	mov	rbp, QWORD PTR [rsp+144]
-	mov	rsi, QWORD PTR [rsp+152]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+16]
-	movaps	xmm9, XMMWORD PTR [rsp]
-	add	rsp, 64
-	pop	rdi
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	r11
-	pop	r10
-	ret	0
-CryptonightR_template_end:
-
-ALIGN(64)
-CryptonightR_template_double_part1:
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+24], rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 320
-	mov	r14, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r14, QWORD PTR [rcx]
-	mov	r12, QWORD PTR [rcx+40]
-	mov	ebx, r14d
-	mov	rsi, QWORD PTR [rcx+224]
-	and	ebx, 2097136
-	xor	r12, QWORD PTR [rcx+8]
-	mov	rcx, QWORD PTR [rcx+56]
-	xor	rcx, QWORD PTR [r8+24]
-	mov	rax, QWORD PTR [r8+48]
-	xor	rax, QWORD PTR [r8+16]
-	mov	r15, QWORD PTR [rdx+32]
-	xor	r15, QWORD PTR [rdx]
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r8+88]
-	xor	rcx, QWORD PTR [r8+72]
-	mov	r13, QWORD PTR [rdx+40]
-	mov	rdi, QWORD PTR [rdx+224]
-	xor	r13, QWORD PTR [rdx+8]
-	movaps	XMMWORD PTR [rsp+160], xmm6
-	movaps	XMMWORD PTR [rsp+176], xmm7
-	movaps	XMMWORD PTR [rsp+192], xmm8
-	movaps	XMMWORD PTR [rsp+208], xmm9
-	movaps	XMMWORD PTR [rsp+224], xmm10
-	movaps	XMMWORD PTR [rsp+240], xmm11
-	movaps	XMMWORD PTR [rsp+256], xmm12
-	movaps	XMMWORD PTR [rsp+272], xmm13
-	movaps	XMMWORD PTR [rsp+288], xmm14
-	movaps	XMMWORD PTR [rsp+304], xmm15
-	movq	xmm7, rax
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-
-	movaps xmm1, XMMWORD PTR [rdx+96]
-	movaps xmm2, XMMWORD PTR [r8+96]
-	movaps XMMWORD PTR [rsp], xmm1
-	movaps XMMWORD PTR [rsp+16], xmm2
-
-	mov	r8d, r15d
-	punpcklqdq xmm7, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+56]
-	xor	rcx, QWORD PTR [rdx+24]
-	movq	xmm9, rax
-	mov	QWORD PTR [rsp+128], rsi
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	punpcklqdq xmm9, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+88]
-	xor	rcx, QWORD PTR [rdx+72]
-	movq	xmm8, rax
-	mov	QWORD PTR [rsp+136], rdi
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm8, xmm0
-	and	r8d, 2097136
-	movq	xmm0, rcx
-	mov	r11d, 524288
-	movq	xmm10, rax
-	punpcklqdq xmm10, xmm0
-	
-	movq xmm14, QWORD PTR [rsp+128]
-	movq xmm15, QWORD PTR [rsp+136]
-
-	ALIGN(64)
-CryptonightR_template_double_mainloop:
-	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
-	movq	xmm0, r12
-	mov	ecx, ebx
-	movq	xmm3, r14
-	punpcklqdq xmm3, xmm0
-	xor	ebx, 16
-	aesenc	xmm6, xmm3
-	movq	xmm4, r15
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	pxor	xmm6, xmm0
-	xor	ebx, 48
-	paddq	xmm0, xmm7
-	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
-	pxor	xmm6, xmm1
-	movdqu	XMMWORD PTR [rbx+rsi], xmm0
-	paddq	xmm1, xmm3
-	xor	ebx, 16
-	mov	eax, ebx
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	pxor	xmm6, xmm0
-	movq	rdx, xmm6
-	movdqu	XMMWORD PTR [rbx+rsi], xmm1
-	paddq	xmm0, xmm9
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [rcx+rsi], xmm0
-	mov	esi, edx
-	movdqu	xmm5, XMMWORD PTR [r8+rdi]
-	and	esi, 2097136
-	mov	ecx, r8d
-	movq	xmm0, r13
-	punpcklqdq xmm4, xmm0
-	xor	r8d, 16
-	aesenc	xmm5, xmm4
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	pxor	xmm5, xmm0
-	xor	r8d, 48
-	paddq	xmm0, xmm8
-	movdqu	xmm1, XMMWORD PTR [r8+rdi]
-	pxor	xmm5, xmm1
-	movdqu	XMMWORD PTR [r8+rdi], xmm0
-	paddq	xmm1, xmm4
-	xor	r8d, 16
-	mov	eax, r8d
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	pxor	xmm5, xmm0
-	movdqu	XMMWORD PTR [r8+rdi], xmm1
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rdi], xmm0
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm8
-	movdqu	XMMWORD PTR [rcx+rdi], xmm0
-	movq	rdi, xmm5
-	movq	rcx, xmm14
-	mov	ebp, edi
-	mov	r8, QWORD PTR [rcx+rsi]
-	mov	r10, QWORD PTR [rcx+rsi+8]
-	lea	r9, QWORD PTR [rcx+rsi]
-	xor	esi, 16
-
-	movq xmm0, rsp
-	movq xmm1, rsi
-	movq xmm2, rdi
-	movq xmm11, rbp
-	movq xmm12, r15
-	movq xmm13, rdx
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp+16]
-	mov esi, DWORD PTR [rsp+20]
-	mov edi, DWORD PTR [rsp+24]
-	mov ebp, DWORD PTR [rsp+28]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-	xor r8, rax
-
-	movd esp, xmm3
-	pextrd r15d, xmm3, 2
-	movd eax, xmm7
-	movd edx, xmm9
-	pextrd r9d, xmm9, 2
-
-CryptonightR_template_double_part2:
-
-	mov eax, edi
-	mov edx, ebp
-	shl rdx, 32
-	or rax, rdx
-	xor r14, rax
-
-	mov eax, ebx
-	mov edx, esi
-	shl rdx, 32
-	or rax, rdx
-	xor r12, rax
-
-	movq rsp, xmm0
-	mov DWORD PTR [rsp+16], ebx
-	mov DWORD PTR [rsp+20], esi
-	mov DWORD PTR [rsp+24], edi
-	mov DWORD PTR [rsp+28], ebp
-
-	movq rsi, xmm1
-	movq rdi, xmm2
-	movq rbp, xmm11
-	movq r15, xmm12
-	movq rdx, xmm13
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rbx, r8
-	mov	rax, r8
-	mul	rdx
-	and	ebp, 2097136
-	mov	r8, rax
-	movdqu	xmm1, XMMWORD PTR [rcx+rsi]
-	pxor	xmm6, xmm1
-	xor	esi, 48
-	paddq	xmm1, xmm7
-	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
-	pxor	xmm6, xmm2
-	paddq	xmm2, xmm3
-	movdqu	XMMWORD PTR [rsi+rcx], xmm1
-	xor	esi, 16
-	mov	eax, esi
-	mov	rsi, rcx
-	movdqu	xmm0, XMMWORD PTR [rax+rcx]
-	pxor	xmm6, xmm0
-	movdqu	XMMWORD PTR [rax+rcx], xmm2
-	paddq	xmm0, xmm9
-	add	r12, r8
-	xor	rax, 32
-	add	r14, rdx
-	movdqa	xmm9, xmm7
-	movdqa	xmm7, xmm6
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	mov	QWORD PTR [r9+8], r12
-	xor	r12, r10
-	mov	QWORD PTR [r9], r14
-	movq rcx, xmm15
-	xor	r14, rbx
-	mov	r10d, ebp
-	mov	ebx, r14d
-	xor	ebp, 16
-	and	ebx, 2097136
-	mov	r8, QWORD PTR [r10+rcx]
-	mov	r9, QWORD PTR [r10+rcx+8]
-
-	movq xmm0, rsp
-	movq xmm1, rbx
-	movq xmm2, rsi
-	movq xmm11, rdi
-	movq xmm12, rbp
-	movq xmm13, r15
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp]
-	mov esi, DWORD PTR [rsp+4]
-	mov edi, DWORD PTR [rsp+8]
-	mov ebp, DWORD PTR [rsp+12]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-
-	xor r8, rax
-	movq xmm3, r8
-
-	movd esp, xmm4
-	pextrd r15d, xmm4, 2
-	movd eax, xmm8
-	movd edx, xmm10
-	pextrd r9d, xmm10, 2
-
-CryptonightR_template_double_part3:
-
-	movq r15, xmm13
-
-	mov eax, edi
-	mov edx, ebp
-	shl rdx, 32
-	or rax, rdx
-	xor r15, rax
-
-	mov eax, ebx
-	mov edx, esi
-	shl rdx, 32
-	or rax, rdx
-	xor r13, rax
-
-	movq rsp, xmm0
-	mov DWORD PTR [rsp], ebx
-	mov DWORD PTR [rsp+4], esi
-	mov DWORD PTR [rsp+8], edi
-	mov DWORD PTR [rsp+12], ebp
-
-	movq rbx, xmm1
-	movq rsi, xmm2
-	movq rdi, xmm11
-	movq rbp, xmm12
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rax, r8
-	mul	rdi
-	mov	rdi, rcx
-	mov	r8, rax
-	movdqu	xmm1, XMMWORD PTR [rbp+rcx]
-	pxor xmm5, xmm1
-	xor	ebp, 48
-	paddq	xmm1, xmm8
-	add	r13, r8
-	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
-	pxor xmm5, xmm2
-	add	r15, rdx
-	movdqu	XMMWORD PTR [rbp+rcx], xmm1
-	paddq	xmm2, xmm4
-	xor	ebp, 16
-	mov	eax, ebp
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
-	pxor xmm5, xmm0
-	movdqu	XMMWORD PTR [rbp+rcx], xmm2
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	movq rax, xmm3
-	movdqa	xmm10, xmm8
-	mov	QWORD PTR [r10+rcx], r15
-	movdqa	xmm8, xmm5
-	xor	r15, rax
-	mov	QWORD PTR [r10+rcx+8], r13
-	mov	r8d, r15d
-	xor	r13, r9
-	and	r8d, 2097136
-	dec r11d
-	jnz	CryptonightR_template_double_mainloop
-
-CryptonightR_template_double_part4:
-
-	mov	rbx, QWORD PTR [rsp+400]
-	movaps	xmm6, XMMWORD PTR [rsp+160]
-	movaps	xmm7, XMMWORD PTR [rsp+176]
-	movaps	xmm8, XMMWORD PTR [rsp+192]
-	movaps	xmm9, XMMWORD PTR [rsp+208]
-	movaps	xmm10, XMMWORD PTR [rsp+224]
-	movaps	xmm11, XMMWORD PTR [rsp+240]
-	movaps	xmm12, XMMWORD PTR [rsp+256]
-	movaps	xmm13, XMMWORD PTR [rsp+272]
-	movaps	xmm14, XMMWORD PTR [rsp+288]
-	movaps	xmm15, XMMWORD PTR [rsp+304]
-	add	rsp, 320
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	ret	0
-CryptonightR_template_double_end:
diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc
deleted file mode 100644
index 53b7016a..00000000
--- a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc
+++ /dev/null
@@ -1,268 +0,0 @@
-PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1)
-PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
-PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2)
-PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3)
-PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
-
-ALIGN(64)
-FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+8], rcx
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 232
-
-	mov	eax, [rcx+96]
-	mov	ebx, [rcx+100]
-	mov	esi, [rcx+104]
-	mov	edx, [rcx+108]
-	mov [rsp+144], eax
-	mov [rsp+148], ebx
-	mov [rsp+152], esi
-	mov [rsp+156], edx
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r10, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r9, QWORD PTR [rcx+40]
-	xor	r9, QWORD PTR [rcx+8]
-	movq	xmm4, rax
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	r11, QWORD PTR [rcx+224]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r10+72]
-	mov	rax, QWORD PTR [r10+80]
-	movq	xmm0, rdx
-	xor	rax, QWORD PTR [r10+64]
-
-	movaps	XMMWORD PTR [rsp+16], xmm6
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+48], xmm8
-	movaps	XMMWORD PTR [rsp+64], xmm9
-	movaps	XMMWORD PTR [rsp+80], xmm10
-	movaps	XMMWORD PTR [rsp+96], xmm11
-	movaps	XMMWORD PTR [rsp+112], xmm12
-	movaps	XMMWORD PTR [rsp+128], xmm13
-
-	movq	xmm5, rax
-
-	mov	rax, r8
-	punpcklqdq xmm4, xmm0
-	and	eax, 2097136
-	movq	xmm10, QWORD PTR [r10+96]
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r10+104]
-	xorps	xmm9, xmm9
-	mov	QWORD PTR [rsp+328], rax
-	movq	xmm12, r11
-	mov	QWORD PTR [rsp+320], r9
-	punpcklqdq xmm5, xmm0
-	movq xmm13, rcx
-	mov r12d, 524288
-
-	ALIGN(64)
-FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop):
-	movd xmm11, r12d
-	mov	r12, QWORD PTR [r10+272]
-	lea	r13, QWORD PTR [rax+r11]
-	mov	esi, DWORD PTR [r13]
-	movq	xmm0, r9
-	mov	r10d, DWORD PTR [r13+4]
-	movq	xmm7, r8
-	mov	ebp, DWORD PTR [r13+12]
-	mov	r14d, DWORD PTR [r13+8]
-	mov	rdx, QWORD PTR [rsp+328]
-	movzx	ecx, sil
-	shr	esi, 8
-	punpcklqdq xmm7, xmm0
-	mov	r15d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	mov	edi, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	ebx, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	shr	ebp, 8
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	xor	r15d, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	eax, r14d
-	shr	eax, 8
-	xor	edi, DWORD PTR [r12+rcx*4+1024]
-	add	eax, 256
-	movzx	ecx, bpl
-	shr	ebp, 8
-	xor	ebx, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, sil
-	shr	esi, 8
-	xor	r9d, DWORD PTR [r12+rcx*4+1024]
-	add	r12, 2048
-	movzx	ecx, r10b
-	shr	r10d, 8
-	add	r10d, 256
-	mov	r11d, DWORD PTR [r12+rax*4]
-	xor	r11d, DWORD PTR [r12+rcx*4]
-	xor	r11d, r9d
-	movzx	ecx, sil
-	mov	r10d, DWORD PTR [r12+r10*4]
-	shr	esi, 8
-	add	esi, 256
-	xor	r10d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	xor	r10d, ebx
-	shr	ebp, 8
-	movd	xmm1, r11d
-	add	ebp, 256
-	movq	r11, xmm12
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	xor	r9d, DWORD PTR [r12+rsi*4]
-	mov	eax, DWORD PTR [r12+rbp*4]
-	xor	r9d, edi
-	movzx	ecx, r14b
-	movd	xmm0, r10d
-	movd	xmm2, r9d
-	xor	eax, DWORD PTR [r12+rcx*4]
-	mov	rcx, rdx
-	xor	eax, r15d
-	punpckldq xmm2, xmm1
-	xor	rcx, 16
-	movd	xmm6, eax
-	mov	rax, rdx
-	punpckldq xmm6, xmm0
-	xor	rax, 32
-	punpckldq xmm6, xmm2
-	xor	rdx, 48
-	movdqu	xmm2, XMMWORD PTR [rcx+r11]
-	pxor	xmm6, xmm7
-	paddq	xmm2, xmm4
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	movdqu	xmm0, XMMWORD PTR [rdx+r11]
-	paddq	xmm0, xmm5
-	movdqu	XMMWORD PTR [rcx+r11], xmm0
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movq rcx, xmm13
-	paddq	xmm1, xmm7
-	movdqu	XMMWORD PTR [rdx+r11], xmm1
-	movq	rdi, xmm6
-	mov	r10, rdi
-	and	r10d, 2097136
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm4
-	movdqu	XMMWORD PTR [r13], xmm0
-
-	mov ebx, [rsp+144]
-	mov ebp, [rsp+152]
-	add ebx, [rsp+148]
-	add ebp, [rsp+156]
-	shl rbp, 32
-	or rbx, rbp
-
-	xor rbx, QWORD PTR [r10+r11]
-	lea	r14, QWORD PTR [r10+r11]
-	mov	rbp, QWORD PTR [r14+8]
-
-	mov [rsp+160], rbx
-	mov [rsp+168], rdi
-	mov [rsp+176], rbp
-	mov [rsp+184], r10
-	mov r10, rsp
-
-	mov ebx, [rsp+144]
-	mov esi, [rsp+148]
-	mov edi, [rsp+152]
-	mov ebp, [rsp+156]
-
-	movd esp, xmm7
-	movaps xmm0, xmm7
-	psrldq xmm0, 8
-	movd r15d, xmm0
-	movd eax, xmm4
-	movd edx, xmm5
-
-FN_PREFIX(CryptonightWOW_soft_aes_template_part2):
-	mov rsp, r10
-	mov [rsp+144], ebx
-	mov [rsp+148], esi
-	mov [rsp+152], edi
-	mov [rsp+156], ebp
-
-	mov rbx, [rsp+160]
-	mov rdi, [rsp+168]
-	mov rbp, [rsp+176]
-	mov r10, [rsp+184]
-
-	mov	r9, r10
-	xor	r9, 16
-	mov	rcx, r10
-	xor	rcx, 32
-	xor	r10, 48
-	mov	rax, rbx
-	mul	rdi
-	movdqu	xmm2, XMMWORD PTR [r9+r11]
-	movdqu	xmm1, XMMWORD PTR [rcx+r11]
-	paddq	xmm1, xmm7
-	movq	xmm0, rax
-	movq	xmm3, rdx
-	xor	rax, QWORD PTR [r11+rcx+8]
-	xor	rdx, QWORD PTR [rcx+r11]
-	punpcklqdq xmm3, xmm0
-	add	r8, rdx
-	movdqu	xmm0, XMMWORD PTR [r10+r11]
-	pxor	xmm2, xmm3
-	paddq	xmm0, xmm5
-	paddq	xmm2, xmm4
-	movdqu	XMMWORD PTR [r9+r11], xmm0
-	movdqa	xmm5, xmm4
-	mov	r9, QWORD PTR [rsp+320]
-	movdqa	xmm4, xmm6
-	add	r9, rax
-	movdqu	XMMWORD PTR [rcx+r11], xmm2
-	movdqu	XMMWORD PTR [r10+r11], xmm1
-	mov	r10, QWORD PTR [rsp+304]
-	movd r12d, xmm11
-	mov	QWORD PTR [r14], r8
-	xor	r8, rbx
-	mov	rax, r8
-	mov	QWORD PTR [r14+8], r9
-	and	eax, 2097136
-	xor	r9, rbp
-	mov	QWORD PTR [rsp+320], r9
-	mov	QWORD PTR [rsp+328], rax
-	sub	r12d, 1
-	jne	FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
-
-FN_PREFIX(CryptonightWOW_soft_aes_template_part3):
-	movaps	xmm6, XMMWORD PTR [rsp+16]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+48]
-	movaps	xmm9, XMMWORD PTR [rsp+64]
-	movaps	xmm10, XMMWORD PTR [rsp+80]
-	movaps	xmm11, XMMWORD PTR [rsp+96]
-	movaps	xmm12, XMMWORD PTR [rsp+112]
-	movaps	xmm13, XMMWORD PTR [rsp+128]
-
-	add	rsp, 232
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	ret
-FN_PREFIX(CryptonightWOW_soft_aes_template_end):
diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc
deleted file mode 100644
index b3202b78..00000000
--- a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc
+++ /dev/null
@@ -1,268 +0,0 @@
-PUBLIC CryptonightWOW_soft_aes_template_part1
-PUBLIC CryptonightWOW_soft_aes_template_mainloop
-PUBLIC CryptonightWOW_soft_aes_template_part2
-PUBLIC CryptonightWOW_soft_aes_template_part3
-PUBLIC CryptonightWOW_soft_aes_template_end
-
-ALIGN(64)
-CryptonightWOW_soft_aes_template_part1:
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+8], rcx
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 232
-
-	mov	eax, [rcx+96]
-	mov	ebx, [rcx+100]
-	mov	esi, [rcx+104]
-	mov	edx, [rcx+108]
-	mov [rsp+144], eax
-	mov [rsp+148], ebx
-	mov [rsp+152], esi
-	mov [rsp+156], edx
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r10, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r9, QWORD PTR [rcx+40]
-	xor	r9, QWORD PTR [rcx+8]
-	movq	xmm4, rax
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	r11, QWORD PTR [rcx+224]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r10+72]
-	mov	rax, QWORD PTR [r10+80]
-	movq	xmm0, rdx
-	xor	rax, QWORD PTR [r10+64]
-
-	movaps	XMMWORD PTR [rsp+16], xmm6
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+48], xmm8
-	movaps	XMMWORD PTR [rsp+64], xmm9
-	movaps	XMMWORD PTR [rsp+80], xmm10
-	movaps	XMMWORD PTR [rsp+96], xmm11
-	movaps	XMMWORD PTR [rsp+112], xmm12
-	movaps	XMMWORD PTR [rsp+128], xmm13
-
-	movq	xmm5, rax
-
-	mov	rax, r8
-	punpcklqdq xmm4, xmm0
-	and	eax, 2097136
-	movq	xmm10, QWORD PTR [r10+96]
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r10+104]
-	xorps	xmm9, xmm9
-	mov	QWORD PTR [rsp+328], rax
-	movq	xmm12, r11
-	mov	QWORD PTR [rsp+320], r9
-	punpcklqdq xmm5, xmm0
-	movq xmm13, rcx
-	mov r12d, 524288
-
-	ALIGN(64)
-CryptonightWOW_soft_aes_template_mainloop:
-	movd xmm11, r12d
-	mov	r12, QWORD PTR [r10+272]
-	lea	r13, QWORD PTR [rax+r11]
-	mov	esi, DWORD PTR [r13]
-	movq	xmm0, r9
-	mov	r10d, DWORD PTR [r13+4]
-	movq	xmm7, r8
-	mov	ebp, DWORD PTR [r13+12]
-	mov	r14d, DWORD PTR [r13+8]
-	mov	rdx, QWORD PTR [rsp+328]
-	movzx	ecx, sil
-	shr	esi, 8
-	punpcklqdq xmm7, xmm0
-	mov	r15d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	mov	edi, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	ebx, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	shr	ebp, 8
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	xor	r15d, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	eax, r14d
-	shr	eax, 8
-	xor	edi, DWORD PTR [r12+rcx*4+1024]
-	add	eax, 256
-	movzx	ecx, bpl
-	shr	ebp, 8
-	xor	ebx, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, sil
-	shr	esi, 8
-	xor	r9d, DWORD PTR [r12+rcx*4+1024]
-	add	r12, 2048
-	movzx	ecx, r10b
-	shr	r10d, 8
-	add	r10d, 256
-	mov	r11d, DWORD PTR [r12+rax*4]
-	xor	r11d, DWORD PTR [r12+rcx*4]
-	xor	r11d, r9d
-	movzx	ecx, sil
-	mov	r10d, DWORD PTR [r12+r10*4]
-	shr	esi, 8
-	add	esi, 256
-	xor	r10d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	xor	r10d, ebx
-	shr	ebp, 8
-	movd	xmm1, r11d
-	add	ebp, 256
-	movq	r11, xmm12
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	xor	r9d, DWORD PTR [r12+rsi*4]
-	mov	eax, DWORD PTR [r12+rbp*4]
-	xor	r9d, edi
-	movzx	ecx, r14b
-	movd	xmm0, r10d
-	movd	xmm2, r9d
-	xor	eax, DWORD PTR [r12+rcx*4]
-	mov	rcx, rdx
-	xor	eax, r15d
-	punpckldq xmm2, xmm1
-	xor	rcx, 16
-	movd	xmm6, eax
-	mov	rax, rdx
-	punpckldq xmm6, xmm0
-	xor	rax, 32
-	punpckldq xmm6, xmm2
-	xor	rdx, 48
-	movdqu	xmm2, XMMWORD PTR [rcx+r11]
-	pxor	xmm6, xmm7
-	paddq	xmm2, xmm4
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	movdqu	xmm0, XMMWORD PTR [rdx+r11]
-	paddq	xmm0, xmm5
-	movdqu	XMMWORD PTR [rcx+r11], xmm0
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movq rcx, xmm13
-	paddq	xmm1, xmm7
-	movdqu	XMMWORD PTR [rdx+r11], xmm1
-	movq	rdi, xmm6
-	mov	r10, rdi
-	and	r10d, 2097136
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm4
-	movdqu	XMMWORD PTR [r13], xmm0
-
-	mov ebx, [rsp+144]
-	mov ebp, [rsp+152]
-	add ebx, [rsp+148]
-	add ebp, [rsp+156]
-	shl rbp, 32
-	or rbx, rbp
-
-	xor rbx, QWORD PTR [r10+r11]
-	lea	r14, QWORD PTR [r10+r11]
-	mov	rbp, QWORD PTR [r14+8]
-
-	mov [rsp+160], rbx
-	mov [rsp+168], rdi
-	mov [rsp+176], rbp
-	mov [rsp+184], r10
-	mov r10, rsp
-
-	mov ebx, [rsp+144]
-	mov esi, [rsp+148]
-	mov edi, [rsp+152]
-	mov ebp, [rsp+156]
-
-	movd esp, xmm7
-	movaps xmm0, xmm7
-	psrldq xmm0, 8
-	movd r15d, xmm0
-	movd eax, xmm4
-	movd edx, xmm5
-
-CryptonightWOW_soft_aes_template_part2:
-	mov rsp, r10
-	mov [rsp+144], ebx
-	mov [rsp+148], esi
-	mov [rsp+152], edi
-	mov [rsp+156], ebp
-
-	mov rbx, [rsp+160]
-	mov rdi, [rsp+168]
-	mov rbp, [rsp+176]
-	mov r10, [rsp+184]
-
-	mov	r9, r10
-	xor	r9, 16
-	mov	rcx, r10
-	xor	rcx, 32
-	xor	r10, 48
-	mov	rax, rbx
-	mul	rdi
-	movdqu	xmm2, XMMWORD PTR [r9+r11]
-	movdqu	xmm1, XMMWORD PTR [rcx+r11]
-	paddq	xmm1, xmm7
-	movq	xmm0, rax
-	movq	xmm3, rdx
-	xor	rax, QWORD PTR [r11+rcx+8]
-	xor	rdx, QWORD PTR [rcx+r11]
-	punpcklqdq xmm3, xmm0
-	add	r8, rdx
-	movdqu	xmm0, XMMWORD PTR [r10+r11]
-	pxor	xmm2, xmm3
-	paddq	xmm0, xmm5
-	paddq	xmm2, xmm4
-	movdqu	XMMWORD PTR [r9+r11], xmm0
-	movdqa	xmm5, xmm4
-	mov	r9, QWORD PTR [rsp+320]
-	movdqa	xmm4, xmm6
-	add	r9, rax
-	movdqu	XMMWORD PTR [rcx+r11], xmm2
-	movdqu	XMMWORD PTR [r10+r11], xmm1
-	mov	r10, QWORD PTR [rsp+304]
-	movd r12d, xmm11
-	mov	QWORD PTR [r14], r8
-	xor	r8, rbx
-	mov	rax, r8
-	mov	QWORD PTR [r14+8], r9
-	and	eax, 2097136
-	xor	r9, rbp
-	mov	QWORD PTR [rsp+320], r9
-	mov	QWORD PTR [rsp+328], rax
-	sub	r12d, 1
-	jne	CryptonightWOW_soft_aes_template_mainloop
-
-CryptonightWOW_soft_aes_template_part3:
-	movaps	xmm6, XMMWORD PTR [rsp+16]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+48]
-	movaps	xmm9, XMMWORD PTR [rsp+64]
-	movaps	xmm10, XMMWORD PTR [rsp+80]
-	movaps	xmm11, XMMWORD PTR [rsp+96]
-	movaps	xmm12, XMMWORD PTR [rsp+112]
-	movaps	xmm13, XMMWORD PTR [rsp+128]
-
-	add	rsp, 232
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	ret
-CryptonightWOW_soft_aes_template_end:
diff --git a/src/crypto/asm/CryptonightWOW_template.inc b/src/crypto/asm/CryptonightWOW_template.inc
deleted file mode 100644
index 82d455f6..00000000
--- a/src/crypto/asm/CryptonightWOW_template.inc
+++ /dev/null
@@ -1,491 +0,0 @@
-PUBLIC FN_PREFIX(CryptonightWOW_template_part1)
-PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop)
-PUBLIC FN_PREFIX(CryptonightWOW_template_part2)
-PUBLIC FN_PREFIX(CryptonightWOW_template_part3)
-PUBLIC FN_PREFIX(CryptonightWOW_template_end)
-PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1)
-PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop)
-PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2)
-PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3)
-PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4)
-PUBLIC FN_PREFIX(CryptonightWOW_template_double_end)
-
-ALIGN(64)
-FN_PREFIX(CryptonightWOW_template_part1):
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	r10
-	push	r11
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	push	rdi
-	sub	rsp, 64
-	mov	r12, rcx
-	mov	r8, QWORD PTR [r12+32]
-	mov	rdx, r12
-	xor	r8, QWORD PTR [r12]
-	mov	r15, QWORD PTR [r12+40]
-	mov	r9, r8
-	xor	r15, QWORD PTR [r12+8]
-	mov	r11, QWORD PTR [r12+224]
-	mov	r12, QWORD PTR [r12+56]
-	xor	r12, QWORD PTR [rdx+24]
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movq	xmm0, r12
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	movaps	XMMWORD PTR [rsp], xmm9
-	mov	r12, QWORD PTR [rdx+88]
-	xor	r12, QWORD PTR [rdx+72]
-	movq	xmm6, rax
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm6, xmm0
-	and	r9d, 2097136
-	movq	xmm0, r12
-	movq	xmm7, rax
-	punpcklqdq xmm7, xmm0
-	mov r10d, r9d
-	movq	xmm9, rsp
-	mov rsp, r8
-	mov	r8d, 524288
-
-	mov	ebx, [rdx+96]
-	mov	esi, [rdx+100]
-	mov	edi, [rdx+104]
-	mov	ebp, [rdx+108]
-
-	ALIGN(64)
-FN_PREFIX(CryptonightWOW_template_mainloop):
-	movdqa	xmm5, XMMWORD PTR [r9+r11]
-	movq	xmm0, r15
-	movq	xmm4, rsp
-	punpcklqdq xmm4, xmm0
-	lea	rdx, QWORD PTR [r9+r11]
-
-	aesenc	xmm5, xmm4
-	movd	r10d, xmm5
-	and	r10d, 2097136
-
-	mov	r12d, r9d
-	mov	eax, r9d
-	xor	r9d, 48
-	xor	r12d, 16
-	xor	eax, 32
-	movdqu	xmm0, XMMWORD PTR [r9+r11]
-	movdqu	xmm2, XMMWORD PTR [r12+r11]
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	paddq	xmm0, xmm7
-	paddq	xmm2, xmm6
-	paddq	xmm1, xmm4
-	movdqu	XMMWORD PTR [r12+r11], xmm0
-	movq	r12, xmm5
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movdqu	XMMWORD PTR [r9+r11], xmm1
-
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [rdx], xmm0
-
-	lea	r13d, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	r13, rdx
-
-	xor	r13, QWORD PTR [r10+r11]
-	mov	r14, QWORD PTR [r10+r11+8]
-
-	movd eax, xmm6
-	movd edx, xmm7
-	pextrd r9d, xmm7, 2
-
-FN_PREFIX(CryptonightWOW_template_part2):
-	mov	rax, r13
-	mul	r12
-	movq	xmm0, rax
-	movq	xmm3, rdx
-	punpcklqdq xmm3, xmm0
-
-	mov	r9d, r10d
-	mov	r12d, r10d
-	xor	r9d, 16
-	xor	r12d, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [r12+r11]
-	xor	rdx, QWORD PTR [r12+r11]
-	xor	rax, QWORD PTR [r11+r12+8]
-	movdqa	xmm2, XMMWORD PTR [r9+r11]
-	pxor	xmm3, xmm2
-	paddq	xmm7, XMMWORD PTR [r10+r11]
-	paddq	xmm1, xmm4
-	paddq	xmm3, xmm6
-	movdqu	XMMWORD PTR [r9+r11], xmm7
-	movdqu	XMMWORD PTR [r12+r11], xmm3
-	movdqu	XMMWORD PTR [r10+r11], xmm1
-
-	movdqa	xmm7, xmm6
-	add	r15, rax
-	add	rsp, rdx
-	xor	r10, 48
-	mov	QWORD PTR [r10+r11], rsp
-	xor	rsp, r13
-	mov	r9d, esp
-	mov	QWORD PTR [r10+r11+8], r15
-	and	r9d, 2097136
-	xor	r15, r14
-	movdqa	xmm6, xmm5
-	dec	r8d
-	jnz	FN_PREFIX(CryptonightWOW_template_mainloop)
-
-FN_PREFIX(CryptonightWOW_template_part3):
-	movq	rsp, xmm9
-
-	mov	rbx, QWORD PTR [rsp+136]
-	mov	rbp, QWORD PTR [rsp+144]
-	mov	rsi, QWORD PTR [rsp+152]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+16]
-	movaps	xmm9, XMMWORD PTR [rsp]
-	add	rsp, 64
-	pop	rdi
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	r11
-	pop	r10
-	ret	0
-FN_PREFIX(CryptonightWOW_template_end):
-
-ALIGN(64)
-FN_PREFIX(CryptonightWOW_template_double_part1):
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+24], rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 320
-	mov	r14, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r14, QWORD PTR [rcx]
-	mov	r12, QWORD PTR [rcx+40]
-	mov	ebx, r14d
-	mov	rsi, QWORD PTR [rcx+224]
-	and	ebx, 2097136
-	xor	r12, QWORD PTR [rcx+8]
-	mov	rcx, QWORD PTR [rcx+56]
-	xor	rcx, QWORD PTR [r8+24]
-	mov	rax, QWORD PTR [r8+48]
-	xor	rax, QWORD PTR [r8+16]
-	mov	r15, QWORD PTR [rdx+32]
-	xor	r15, QWORD PTR [rdx]
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r8+88]
-	xor	rcx, QWORD PTR [r8+72]
-	mov	r13, QWORD PTR [rdx+40]
-	mov	rdi, QWORD PTR [rdx+224]
-	xor	r13, QWORD PTR [rdx+8]
-	movaps	XMMWORD PTR [rsp+160], xmm6
-	movaps	XMMWORD PTR [rsp+176], xmm7
-	movaps	XMMWORD PTR [rsp+192], xmm8
-	movaps	XMMWORD PTR [rsp+208], xmm9
-	movaps	XMMWORD PTR [rsp+224], xmm10
-	movaps	XMMWORD PTR [rsp+240], xmm11
-	movaps	XMMWORD PTR [rsp+256], xmm12
-	movaps	XMMWORD PTR [rsp+272], xmm13
-	movaps	XMMWORD PTR [rsp+288], xmm14
-	movaps	XMMWORD PTR [rsp+304], xmm15
-	movq	xmm7, rax
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-
-	movaps xmm1, XMMWORD PTR [rdx+96]
-	movaps xmm2, XMMWORD PTR [r8+96]
-	movaps XMMWORD PTR [rsp], xmm1
-	movaps XMMWORD PTR [rsp+16], xmm2
-
-	mov	r8d, r15d
-	punpcklqdq xmm7, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+56]
-	xor	rcx, QWORD PTR [rdx+24]
-	movq	xmm9, rax
-	mov	QWORD PTR [rsp+128], rsi
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	punpcklqdq xmm9, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+88]
-	xor	rcx, QWORD PTR [rdx+72]
-	movq	xmm8, rax
-	mov	QWORD PTR [rsp+136], rdi
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm8, xmm0
-	and	r8d, 2097136
-	movq	xmm0, rcx
-	mov	r11d, 524288
-	movq	xmm10, rax
-	punpcklqdq xmm10, xmm0
-	
-	movq xmm14, QWORD PTR [rsp+128]
-	movq xmm15, QWORD PTR [rsp+136]
-
-	ALIGN(64)
-FN_PREFIX(CryptonightWOW_template_double_mainloop):
-	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
-	movq	xmm0, r12
-	mov	ecx, ebx
-	movq	xmm3, r14
-	punpcklqdq xmm3, xmm0
-	xor	ebx, 16
-	aesenc	xmm6, xmm3
-	movq	rdx, xmm6
-	movq	xmm4, r15
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	xor	ebx, 48
-	paddq	xmm0, xmm7
-	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
-	movdqu	XMMWORD PTR [rbx+rsi], xmm0
-	paddq	xmm1, xmm3
-	xor	ebx, 16
-	mov	eax, ebx
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	movdqu	XMMWORD PTR [rbx+rsi], xmm1
-	paddq	xmm0, xmm9
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [rcx+rsi], xmm0
-	mov	esi, edx
-	movdqu	xmm5, XMMWORD PTR [r8+rdi]
-	and	esi, 2097136
-	mov	ecx, r8d
-	movq	xmm0, r13
-	punpcklqdq xmm4, xmm0
-	xor	r8d, 16
-	aesenc	xmm5, xmm4
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	xor	r8d, 48
-	paddq	xmm0, xmm8
-	movdqu	xmm1, XMMWORD PTR [r8+rdi]
-	movdqu	XMMWORD PTR [r8+rdi], xmm0
-	paddq	xmm1, xmm4
-	xor	r8d, 16
-	mov	eax, r8d
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	movdqu	XMMWORD PTR [r8+rdi], xmm1
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rdi], xmm0
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm8
-	movdqu	XMMWORD PTR [rcx+rdi], xmm0
-	movq	rdi, xmm5
-	movq	rcx, xmm14
-	mov	ebp, edi
-	mov	r8, QWORD PTR [rcx+rsi]
-	mov	r10, QWORD PTR [rcx+rsi+8]
-	lea	r9, QWORD PTR [rcx+rsi]
-	xor	esi, 16
-
-	movq xmm0, rsp
-	movq xmm1, rsi
-	movq xmm2, rdi
-	movq xmm11, rbp
-	movq xmm12, r15
-	movq xmm13, rdx
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp+16]
-	mov esi, DWORD PTR [rsp+20]
-	mov edi, DWORD PTR [rsp+24]
-	mov ebp, DWORD PTR [rsp+28]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-	xor r8, rax
-
-	movd esp, xmm3
-	pextrd r15d, xmm3, 2
-	movd eax, xmm7
-	movd edx, xmm9
-	pextrd r9d, xmm9, 2
-
-FN_PREFIX(CryptonightWOW_template_double_part2):
-
-	movq rsp, xmm0
-	mov DWORD PTR [rsp+16], ebx
-	mov DWORD PTR [rsp+20], esi
-	mov DWORD PTR [rsp+24], edi
-	mov DWORD PTR [rsp+28], ebp
-
-	movq rsi, xmm1
-	movq rdi, xmm2
-	movq rbp, xmm11
-	movq r15, xmm12
-	movq rdx, xmm13
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rbx, r8
-	mov	rax, r8
-	mul	rdx
-	and	ebp, 2097136
-	mov	r8, rax
-	movq	xmm1, rdx
-	movq	xmm0, r8
-	punpcklqdq xmm1, xmm0
-	pxor	xmm1, XMMWORD PTR [rcx+rsi]
-	xor	esi, 48
-	paddq	xmm1, xmm7
-	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
-	xor	rdx, QWORD PTR [rsi+rcx]
-	paddq	xmm2, xmm3
-	xor	r8, QWORD PTR [rsi+rcx+8]
-	movdqu	XMMWORD PTR [rsi+rcx], xmm1
-	xor	esi, 16
-	mov	eax, esi
-	mov	rsi, rcx
-	movdqu	xmm0, XMMWORD PTR [rax+rcx]
-	movdqu	XMMWORD PTR [rax+rcx], xmm2
-	paddq	xmm0, xmm9
-	add	r12, r8
-	xor	rax, 32
-	add	r14, rdx
-	movdqa	xmm9, xmm7
-	movdqa	xmm7, xmm6
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	mov	QWORD PTR [r9+8], r12
-	xor	r12, r10
-	mov	QWORD PTR [r9], r14
-	movq rcx, xmm15
-	xor	r14, rbx
-	mov	r10d, ebp
-	mov	ebx, r14d
-	xor	ebp, 16
-	and	ebx, 2097136
-	mov	r8, QWORD PTR [r10+rcx]
-	mov	r9, QWORD PTR [r10+rcx+8]
-
-	movq xmm0, rsp
-	movq xmm1, rbx
-	movq xmm2, rsi
-	movq xmm11, rdi
-	movq xmm12, rbp
-	movq xmm13, r15
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp]
-	mov esi, DWORD PTR [rsp+4]
-	mov edi, DWORD PTR [rsp+8]
-	mov ebp, DWORD PTR [rsp+12]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-
-	xor r8, rax
-	movq xmm3, r8
-
-	movd esp, xmm4
-	pextrd r15d, xmm4, 2
-	movd eax, xmm8
-	movd edx, xmm10
-	pextrd r9d, xmm10, 2
-
-FN_PREFIX(CryptonightWOW_template_double_part3):
-
-	movq rsp, xmm0
-	mov DWORD PTR [rsp], ebx
-	mov DWORD PTR [rsp+4], esi
-	mov DWORD PTR [rsp+8], edi
-	mov DWORD PTR [rsp+12], ebp
-
-	movq rbx, xmm1
-	movq rsi, xmm2
-	movq rdi, xmm11
-	movq rbp, xmm12
-	movq r15, xmm13
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rax, r8
-	mul	rdi
-	movq	xmm1, rdx
-	movq	xmm0, rax
-	punpcklqdq xmm1, xmm0
-	mov	rdi, rcx
-	mov	r8, rax
-	pxor	xmm1, XMMWORD PTR [rbp+rcx]
-	xor	ebp, 48
-	paddq	xmm1, xmm8
-	xor	r8, QWORD PTR [rbp+rcx+8]
-	xor	rdx, QWORD PTR [rbp+rcx]
-	add	r13, r8
-	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
-	add	r15, rdx
-	movdqu	XMMWORD PTR [rbp+rcx], xmm1
-	paddq	xmm2, xmm4
-	xor	ebp, 16
-	mov	eax, ebp
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
-	movdqu	XMMWORD PTR [rbp+rcx], xmm2
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	movq rax, xmm3
-	movdqa	xmm10, xmm8
-	mov	QWORD PTR [r10+rcx], r15
-	movdqa	xmm8, xmm5
-	xor	r15, rax
-	mov	QWORD PTR [r10+rcx+8], r13
-	mov	r8d, r15d
-	xor	r13, r9
-	and	r8d, 2097136
-	dec r11d
-	jnz	FN_PREFIX(CryptonightWOW_template_double_mainloop)
-
-FN_PREFIX(CryptonightWOW_template_double_part4):
-
-	mov	rbx, QWORD PTR [rsp+400]
-	movaps	xmm6, XMMWORD PTR [rsp+160]
-	movaps	xmm7, XMMWORD PTR [rsp+176]
-	movaps	xmm8, XMMWORD PTR [rsp+192]
-	movaps	xmm9, XMMWORD PTR [rsp+208]
-	movaps	xmm10, XMMWORD PTR [rsp+224]
-	movaps	xmm11, XMMWORD PTR [rsp+240]
-	movaps	xmm12, XMMWORD PTR [rsp+256]
-	movaps	xmm13, XMMWORD PTR [rsp+272]
-	movaps	xmm14, XMMWORD PTR [rsp+288]
-	movaps	xmm15, XMMWORD PTR [rsp+304]
-	add	rsp, 320
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	ret	0
-FN_PREFIX(CryptonightWOW_template_double_end):
diff --git a/src/crypto/asm/CryptonightWOW_template_win.inc b/src/crypto/asm/CryptonightWOW_template_win.inc
deleted file mode 100644
index 644c01f1..00000000
--- a/src/crypto/asm/CryptonightWOW_template_win.inc
+++ /dev/null
@@ -1,491 +0,0 @@
-PUBLIC CryptonightWOW_template_part1
-PUBLIC CryptonightWOW_template_mainloop
-PUBLIC CryptonightWOW_template_part2
-PUBLIC CryptonightWOW_template_part3
-PUBLIC CryptonightWOW_template_end
-PUBLIC CryptonightWOW_template_double_part1
-PUBLIC CryptonightWOW_template_double_mainloop
-PUBLIC CryptonightWOW_template_double_part2
-PUBLIC CryptonightWOW_template_double_part3
-PUBLIC CryptonightWOW_template_double_part4
-PUBLIC CryptonightWOW_template_double_end
-
-ALIGN(64)
-CryptonightWOW_template_part1:
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	r10
-	push	r11
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	push	rdi
-	sub	rsp, 64
-	mov	r12, rcx
-	mov	r8, QWORD PTR [r12+32]
-	mov	rdx, r12
-	xor	r8, QWORD PTR [r12]
-	mov	r15, QWORD PTR [r12+40]
-	mov	r9, r8
-	xor	r15, QWORD PTR [r12+8]
-	mov	r11, QWORD PTR [r12+224]
-	mov	r12, QWORD PTR [r12+56]
-	xor	r12, QWORD PTR [rdx+24]
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movq	xmm0, r12
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	movaps	XMMWORD PTR [rsp], xmm9
-	mov	r12, QWORD PTR [rdx+88]
-	xor	r12, QWORD PTR [rdx+72]
-	movq	xmm6, rax
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm6, xmm0
-	and	r9d, 2097136
-	movq	xmm0, r12
-	movq	xmm7, rax
-	punpcklqdq xmm7, xmm0
-	mov r10d, r9d
-	movq	xmm9, rsp
-	mov rsp, r8
-	mov	r8d, 524288
-
-	mov	ebx, [rdx+96]
-	mov	esi, [rdx+100]
-	mov	edi, [rdx+104]
-	mov	ebp, [rdx+108]
-
-	ALIGN(64)
-CryptonightWOW_template_mainloop:
-	movdqa	xmm5, XMMWORD PTR [r9+r11]
-	movq	xmm0, r15
-	movq	xmm4, rsp
-	punpcklqdq xmm4, xmm0
-	lea	rdx, QWORD PTR [r9+r11]
-
-	aesenc	xmm5, xmm4
-	movd	r10d, xmm5
-	and	r10d, 2097136
-
-	mov	r12d, r9d
-	mov	eax, r9d
-	xor	r9d, 48
-	xor	r12d, 16
-	xor	eax, 32
-	movdqu	xmm0, XMMWORD PTR [r9+r11]
-	movdqu	xmm2, XMMWORD PTR [r12+r11]
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	paddq	xmm0, xmm7
-	paddq	xmm2, xmm6
-	paddq	xmm1, xmm4
-	movdqu	XMMWORD PTR [r12+r11], xmm0
-	movq	r12, xmm5
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movdqu	XMMWORD PTR [r9+r11], xmm1
-
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [rdx], xmm0
-
-	lea	r13d, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	r13, rdx
-
-	xor	r13, QWORD PTR [r10+r11]
-	mov	r14, QWORD PTR [r10+r11+8]
-
-	movd eax, xmm6
-	movd edx, xmm7
-	pextrd r9d, xmm7, 2
-
-CryptonightWOW_template_part2:
-	mov	rax, r13
-	mul	r12
-	movq	xmm0, rax
-	movq	xmm3, rdx
-	punpcklqdq xmm3, xmm0
-
-	mov	r9d, r10d
-	mov	r12d, r10d
-	xor	r9d, 16
-	xor	r12d, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [r12+r11]
-	xor	rdx, QWORD PTR [r12+r11]
-	xor	rax, QWORD PTR [r11+r12+8]
-	movdqa	xmm2, XMMWORD PTR [r9+r11]
-	pxor	xmm3, xmm2
-	paddq	xmm7, XMMWORD PTR [r10+r11]
-	paddq	xmm1, xmm4
-	paddq	xmm3, xmm6
-	movdqu	XMMWORD PTR [r9+r11], xmm7
-	movdqu	XMMWORD PTR [r12+r11], xmm3
-	movdqu	XMMWORD PTR [r10+r11], xmm1
-
-	movdqa	xmm7, xmm6
-	add	r15, rax
-	add	rsp, rdx
-	xor	r10, 48
-	mov	QWORD PTR [r10+r11], rsp
-	xor	rsp, r13
-	mov	r9d, esp
-	mov	QWORD PTR [r10+r11+8], r15
-	and	r9d, 2097136
-	xor	r15, r14
-	movdqa	xmm6, xmm5
-	dec	r8d
-	jnz	CryptonightWOW_template_mainloop
-
-CryptonightWOW_template_part3:
-	movq	rsp, xmm9
-
-	mov	rbx, QWORD PTR [rsp+136]
-	mov	rbp, QWORD PTR [rsp+144]
-	mov	rsi, QWORD PTR [rsp+152]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+16]
-	movaps	xmm9, XMMWORD PTR [rsp]
-	add	rsp, 64
-	pop	rdi
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	r11
-	pop	r10
-	ret	0
-CryptonightWOW_template_end:
-
-ALIGN(64)
-CryptonightWOW_template_double_part1:
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+24], rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 320
-	mov	r14, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r14, QWORD PTR [rcx]
-	mov	r12, QWORD PTR [rcx+40]
-	mov	ebx, r14d
-	mov	rsi, QWORD PTR [rcx+224]
-	and	ebx, 2097136
-	xor	r12, QWORD PTR [rcx+8]
-	mov	rcx, QWORD PTR [rcx+56]
-	xor	rcx, QWORD PTR [r8+24]
-	mov	rax, QWORD PTR [r8+48]
-	xor	rax, QWORD PTR [r8+16]
-	mov	r15, QWORD PTR [rdx+32]
-	xor	r15, QWORD PTR [rdx]
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r8+88]
-	xor	rcx, QWORD PTR [r8+72]
-	mov	r13, QWORD PTR [rdx+40]
-	mov	rdi, QWORD PTR [rdx+224]
-	xor	r13, QWORD PTR [rdx+8]
-	movaps	XMMWORD PTR [rsp+160], xmm6
-	movaps	XMMWORD PTR [rsp+176], xmm7
-	movaps	XMMWORD PTR [rsp+192], xmm8
-	movaps	XMMWORD PTR [rsp+208], xmm9
-	movaps	XMMWORD PTR [rsp+224], xmm10
-	movaps	XMMWORD PTR [rsp+240], xmm11
-	movaps	XMMWORD PTR [rsp+256], xmm12
-	movaps	XMMWORD PTR [rsp+272], xmm13
-	movaps	XMMWORD PTR [rsp+288], xmm14
-	movaps	XMMWORD PTR [rsp+304], xmm15
-	movq	xmm7, rax
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-
-	movaps xmm1, XMMWORD PTR [rdx+96]
-	movaps xmm2, XMMWORD PTR [r8+96]
-	movaps XMMWORD PTR [rsp], xmm1
-	movaps XMMWORD PTR [rsp+16], xmm2
-
-	mov	r8d, r15d
-	punpcklqdq xmm7, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+56]
-	xor	rcx, QWORD PTR [rdx+24]
-	movq	xmm9, rax
-	mov	QWORD PTR [rsp+128], rsi
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	punpcklqdq xmm9, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+88]
-	xor	rcx, QWORD PTR [rdx+72]
-	movq	xmm8, rax
-	mov	QWORD PTR [rsp+136], rdi
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm8, xmm0
-	and	r8d, 2097136
-	movq	xmm0, rcx
-	mov	r11d, 524288
-	movq	xmm10, rax
-	punpcklqdq xmm10, xmm0
-	
-	movq xmm14, QWORD PTR [rsp+128]
-	movq xmm15, QWORD PTR [rsp+136]
-
-	ALIGN(64)
-CryptonightWOW_template_double_mainloop:
-	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
-	movq	xmm0, r12
-	mov	ecx, ebx
-	movq	xmm3, r14
-	punpcklqdq xmm3, xmm0
-	xor	ebx, 16
-	aesenc	xmm6, xmm3
-	movq	rdx, xmm6
-	movq	xmm4, r15
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	xor	ebx, 48
-	paddq	xmm0, xmm7
-	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
-	movdqu	XMMWORD PTR [rbx+rsi], xmm0
-	paddq	xmm1, xmm3
-	xor	ebx, 16
-	mov	eax, ebx
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	movdqu	XMMWORD PTR [rbx+rsi], xmm1
-	paddq	xmm0, xmm9
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [rcx+rsi], xmm0
-	mov	esi, edx
-	movdqu	xmm5, XMMWORD PTR [r8+rdi]
-	and	esi, 2097136
-	mov	ecx, r8d
-	movq	xmm0, r13
-	punpcklqdq xmm4, xmm0
-	xor	r8d, 16
-	aesenc	xmm5, xmm4
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	xor	r8d, 48
-	paddq	xmm0, xmm8
-	movdqu	xmm1, XMMWORD PTR [r8+rdi]
-	movdqu	XMMWORD PTR [r8+rdi], xmm0
-	paddq	xmm1, xmm4
-	xor	r8d, 16
-	mov	eax, r8d
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	movdqu	XMMWORD PTR [r8+rdi], xmm1
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rdi], xmm0
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm8
-	movdqu	XMMWORD PTR [rcx+rdi], xmm0
-	movq	rdi, xmm5
-	movq	rcx, xmm14
-	mov	ebp, edi
-	mov	r8, QWORD PTR [rcx+rsi]
-	mov	r10, QWORD PTR [rcx+rsi+8]
-	lea	r9, QWORD PTR [rcx+rsi]
-	xor	esi, 16
-
-	movq xmm0, rsp
-	movq xmm1, rsi
-	movq xmm2, rdi
-	movq xmm11, rbp
-	movq xmm12, r15
-	movq xmm13, rdx
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp+16]
-	mov esi, DWORD PTR [rsp+20]
-	mov edi, DWORD PTR [rsp+24]
-	mov ebp, DWORD PTR [rsp+28]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-	xor r8, rax
-
-	movd esp, xmm3
-	pextrd r15d, xmm3, 2
-	movd eax, xmm7
-	movd edx, xmm9
-	pextrd r9d, xmm9, 2
-
-CryptonightWOW_template_double_part2:
-
-	movq rsp, xmm0
-	mov DWORD PTR [rsp+16], ebx
-	mov DWORD PTR [rsp+20], esi
-	mov DWORD PTR [rsp+24], edi
-	mov DWORD PTR [rsp+28], ebp
-
-	movq rsi, xmm1
-	movq rdi, xmm2
-	movq rbp, xmm11
-	movq r15, xmm12
-	movq rdx, xmm13
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rbx, r8
-	mov	rax, r8
-	mul	rdx
-	and	ebp, 2097136
-	mov	r8, rax
-	movq	xmm1, rdx
-	movq	xmm0, r8
-	punpcklqdq xmm1, xmm0
-	pxor	xmm1, XMMWORD PTR [rcx+rsi]
-	xor	esi, 48
-	paddq	xmm1, xmm7
-	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
-	xor	rdx, QWORD PTR [rsi+rcx]
-	paddq	xmm2, xmm3
-	xor	r8, QWORD PTR [rsi+rcx+8]
-	movdqu	XMMWORD PTR [rsi+rcx], xmm1
-	xor	esi, 16
-	mov	eax, esi
-	mov	rsi, rcx
-	movdqu	xmm0, XMMWORD PTR [rax+rcx]
-	movdqu	XMMWORD PTR [rax+rcx], xmm2
-	paddq	xmm0, xmm9
-	add	r12, r8
-	xor	rax, 32
-	add	r14, rdx
-	movdqa	xmm9, xmm7
-	movdqa	xmm7, xmm6
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	mov	QWORD PTR [r9+8], r12
-	xor	r12, r10
-	mov	QWORD PTR [r9], r14
-	movq rcx, xmm15
-	xor	r14, rbx
-	mov	r10d, ebp
-	mov	ebx, r14d
-	xor	ebp, 16
-	and	ebx, 2097136
-	mov	r8, QWORD PTR [r10+rcx]
-	mov	r9, QWORD PTR [r10+rcx+8]
-
-	movq xmm0, rsp
-	movq xmm1, rbx
-	movq xmm2, rsi
-	movq xmm11, rdi
-	movq xmm12, rbp
-	movq xmm13, r15
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp]
-	mov esi, DWORD PTR [rsp+4]
-	mov edi, DWORD PTR [rsp+8]
-	mov ebp, DWORD PTR [rsp+12]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-
-	xor r8, rax
-	movq xmm3, r8
-
-	movd esp, xmm4
-	pextrd r15d, xmm4, 2
-	movd eax, xmm8
-	movd edx, xmm10
-	pextrd r9d, xmm10, 2
-
-CryptonightWOW_template_double_part3:
-
-	movq rsp, xmm0
-	mov DWORD PTR [rsp], ebx
-	mov DWORD PTR [rsp+4], esi
-	mov DWORD PTR [rsp+8], edi
-	mov DWORD PTR [rsp+12], ebp
-
-	movq rbx, xmm1
-	movq rsi, xmm2
-	movq rdi, xmm11
-	movq rbp, xmm12
-	movq r15, xmm13
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rax, r8
-	mul	rdi
-	movq	xmm1, rdx
-	movq	xmm0, rax
-	punpcklqdq xmm1, xmm0
-	mov	rdi, rcx
-	mov	r8, rax
-	pxor	xmm1, XMMWORD PTR [rbp+rcx]
-	xor	ebp, 48
-	paddq	xmm1, xmm8
-	xor	r8, QWORD PTR [rbp+rcx+8]
-	xor	rdx, QWORD PTR [rbp+rcx]
-	add	r13, r8
-	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
-	add	r15, rdx
-	movdqu	XMMWORD PTR [rbp+rcx], xmm1
-	paddq	xmm2, xmm4
-	xor	ebp, 16
-	mov	eax, ebp
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
-	movdqu	XMMWORD PTR [rbp+rcx], xmm2
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	movq rax, xmm3
-	movdqa	xmm10, xmm8
-	mov	QWORD PTR [r10+rcx], r15
-	movdqa	xmm8, xmm5
-	xor	r15, rax
-	mov	QWORD PTR [r10+rcx+8], r13
-	mov	r8d, r15d
-	xor	r13, r9
-	and	r8d, 2097136
-	dec r11d
-	jnz	CryptonightWOW_template_double_mainloop
-
-CryptonightWOW_template_double_part4:
-
-	mov	rbx, QWORD PTR [rsp+400]
-	movaps	xmm6, XMMWORD PTR [rsp+160]
-	movaps	xmm7, XMMWORD PTR [rsp+176]
-	movaps	xmm8, XMMWORD PTR [rsp+192]
-	movaps	xmm9, XMMWORD PTR [rsp+208]
-	movaps	xmm10, XMMWORD PTR [rsp+224]
-	movaps	xmm11, XMMWORD PTR [rsp+240]
-	movaps	xmm12, XMMWORD PTR [rsp+256]
-	movaps	xmm13, XMMWORD PTR [rsp+272]
-	movaps	xmm14, XMMWORD PTR [rsp+288]
-	movaps	xmm15, XMMWORD PTR [rsp+304]
-	add	rsp, 320
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	ret	0
-CryptonightWOW_template_double_end:
diff --git a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc
deleted file mode 100644
index 1710cac7..00000000
--- a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc
+++ /dev/null
@@ -1,413 +0,0 @@
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	rax, rsp
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 184
-
-	stmxcsr DWORD PTR [rsp+272]
-	mov DWORD PTR [rsp+276], 24448
-	ldmxcsr DWORD PTR [rsp+276]
-
-	mov	r13, QWORD PTR [rcx+224]
-	mov	r9, rdx
-	mov	r10, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r10, QWORD PTR [rcx]
-	mov	r14d, 524288
-	mov	r11, QWORD PTR [rcx+40]
-	xor	r11, QWORD PTR [rcx+8]
-	mov	rsi, QWORD PTR [rdx+224]
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	rdi, QWORD PTR [r9+32]
-	xor	rdi, QWORD PTR [r9]
-	mov	rbp, QWORD PTR [r9+40]
-	xor	rbp, QWORD PTR [r9+8]
-	movq	xmm0, rdx
-	movaps	XMMWORD PTR [rax-88], xmm6
-	movaps	XMMWORD PTR [rax-104], xmm7
-	movaps	XMMWORD PTR [rax-120], xmm8
-	movaps	XMMWORD PTR [rsp+112], xmm9
-	movaps	XMMWORD PTR [rsp+96], xmm10
-	movaps	XMMWORD PTR [rsp+80], xmm11
-	movaps	XMMWORD PTR [rsp+64], xmm12
-	movaps	XMMWORD PTR [rsp+48], xmm13
-	movaps	XMMWORD PTR [rsp+32], xmm14
-	movaps	XMMWORD PTR [rsp+16], xmm15
-	mov	rdx, r10
-	movq	xmm4, QWORD PTR [r8+96]
-	and	edx, 2097136
-	mov	rax, QWORD PTR [rcx+48]
-	xorps	xmm13, xmm13
-	xor	rax, QWORD PTR [rcx+16]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r8+72]
-	movq	xmm5, QWORD PTR [r8+104]
-	movq	xmm7, rax
-
-	mov eax, 1
-	shl rax, 52
-	movq xmm14, rax
-	punpcklqdq xmm14, xmm14
-
-	mov eax, 1023
-	shl rax, 52
-	movq xmm12, rax
-	punpcklqdq xmm12, xmm12
-
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-	punpcklqdq xmm7, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r9+56]
-	xor	rcx, QWORD PTR [r9+24]
-	movq	xmm3, rax
-	mov	rax, QWORD PTR [r9+48]
-	xor	rax, QWORD PTR [r9+16]
-	punpcklqdq xmm3, xmm0
-	movq	xmm0, rcx
-	mov	QWORD PTR [rsp], r13
-	mov	rcx, QWORD PTR [r9+88]
-	xor	rcx, QWORD PTR [r9+72]
-	movq	xmm6, rax
-	mov	rax, QWORD PTR [r9+80]
-	xor	rax, QWORD PTR [r9+64]
-	punpcklqdq xmm6, xmm0
-	movq	xmm0, rcx
-	mov	QWORD PTR [rsp+256], r10
-	mov	rcx, rdi
-	mov	QWORD PTR [rsp+264], r11
-	movq	xmm8, rax
-	and	ecx, 2097136
-	punpcklqdq xmm8, xmm0
-	movq	xmm0, QWORD PTR [r9+96]
-	punpcklqdq xmm4, xmm0
-	movq	xmm0, QWORD PTR [r9+104]
-	lea	r8, QWORD PTR [rcx+rsi]
-	movdqu	xmm11, XMMWORD PTR [r8]
-	punpcklqdq xmm5, xmm0
-	lea	r9, QWORD PTR [rdx+r13]
-	movdqu	xmm15, XMMWORD PTR [r9]
-
-	ALIGN(64)
-main_loop_double_sandybridge:
-	movdqu	xmm9, xmm15
-	mov eax, edx
-	mov ebx, edx
-	xor eax, 16
-	xor ebx, 32
-	xor edx, 48
-
-	movq	xmm0, r11
-	movq	xmm2, r10
-	punpcklqdq xmm2, xmm0
-	aesenc	xmm9, xmm2
-
-	movdqu	xmm0, XMMWORD PTR [rax+r13]
-	movdqu	xmm1, XMMWORD PTR [rbx+r13]
-	paddq	xmm0, xmm7
-	paddq	xmm1, xmm2
-	movdqu	XMMWORD PTR [rbx+r13], xmm0
-	movdqu	xmm0, XMMWORD PTR [rdx+r13]
-	movdqu	XMMWORD PTR [rdx+r13], xmm1
-	paddq	xmm0, xmm3
-	movdqu	XMMWORD PTR [rax+r13], xmm0
-
-	movq	r11, xmm9
-	mov	edx, r11d
-	and	edx, 2097136
-	movdqa	xmm0, xmm9
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [r9], xmm0
-
-	lea	rbx, QWORD PTR [rdx+r13]
-	mov	r10, QWORD PTR [rdx+r13]
-
-	movdqu	xmm10, xmm11
-	movq	xmm0, rbp
-	movq	xmm11, rdi
-	punpcklqdq xmm11, xmm0
-	aesenc	xmm10, xmm11
-
-	mov eax, ecx
-	mov r12d, ecx
-	xor eax, 16
-	xor r12d, 32
-	xor ecx, 48
-
-	movdqu	xmm0, XMMWORD PTR [rax+rsi]
-	paddq	xmm0, xmm6
-	movdqu	xmm1, XMMWORD PTR [r12+rsi]
-	movdqu	XMMWORD PTR [r12+rsi], xmm0
-	paddq	xmm1, xmm11
-	movdqu	xmm0, XMMWORD PTR [rcx+rsi]
-	movdqu	XMMWORD PTR [rcx+rsi], xmm1
-	paddq	xmm0, xmm8
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-
-	movq	rcx, xmm10
-	and	ecx, 2097136
-
-	movdqa	xmm0, xmm10
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [r8], xmm0
-	mov r12, QWORD PTR [rcx+rsi]
-
-	mov	r9, QWORD PTR [rbx+8]
-
-	xor edx, 16
-	mov r8d, edx
-	mov r15d, edx
-
-	movq	rdx, xmm5
-	shl	rdx, 32
-	movq	rax, xmm4
-	xor	rdx, rax
-	xor	r10, rdx
-	mov	rax, r10
-	mul	r11
-	mov r11d, r8d
-	xor r11d, 48
-	movq xmm0, rdx
-	xor rdx, [r11+r13]
-	movq xmm1, rax
-	xor rax, [r11+r13+8]
-	punpcklqdq xmm0, xmm1
-
-	pxor xmm0, XMMWORD PTR [r8+r13]
-	xor	r8d, 32
-	movdqu	xmm1, XMMWORD PTR [r11+r13]
-	paddq	xmm0, xmm7
-	paddq	xmm1, xmm2
-	movdqu	XMMWORD PTR [r11+r13], xmm0
-	movdqu	xmm0, XMMWORD PTR [r8+r13]
-	movdqu	XMMWORD PTR [r8+r13], xmm1
-	paddq	xmm0, xmm3
-	movdqu	XMMWORD PTR [r15+r13], xmm0
-
-	mov	r11, QWORD PTR [rsp+256]
-	add	r11, rdx
-	mov	rdx, QWORD PTR [rsp+264]
-	add	rdx, rax
-	mov	QWORD PTR [rbx], r11
-	xor	r11, r10
-	mov	QWORD PTR [rbx+8], rdx
-	xor	rdx, r9
-	mov	QWORD PTR [rsp+256], r11
-	and	r11d, 2097136
-	mov	QWORD PTR [rsp+264], rdx
-	mov	QWORD PTR [rsp+8], r11
-	lea	r15, QWORD PTR [r11+r13]
-	movdqu xmm15, XMMWORD PTR [r11+r13]
-	lea	r13, QWORD PTR [rsi+rcx]
-	movdqa	xmm0, xmm5
-	psrldq	xmm0, 8
-	movaps	xmm2, xmm13
-	movq	r10, xmm0
-	psllq	xmm5, 1
-	shl	r10, 32
-	movdqa	xmm0, xmm9
-	psrldq	xmm0, 8
-	movdqa	xmm1, xmm10
-	movq	r11, xmm0
-	psrldq	xmm1, 8
-	movq	r8, xmm1
-	psrldq	xmm4, 8
-	movaps	xmm0, xmm13
-	movq	rax, xmm4
-	xor	r10, rax
-	movaps	xmm1, xmm13
-	xor	r10, r12
-	lea	rax, QWORD PTR [r11+1]
-	shr	rax, 1
-	movdqa	xmm3, xmm9
-	punpcklqdq xmm3, xmm10
-	paddq	xmm5, xmm3
-	movq	rdx, xmm5
-	psrldq	xmm5, 8
-	cvtsi2sd xmm2, rax
-	or	edx, -2147483647
-	lea	rax, QWORD PTR [r8+1]
-	shr	rax, 1
-	movq	r9, xmm5
-	cvtsi2sd xmm0, rax
-	or	r9d, -2147483647
-	cvtsi2sd xmm1, rdx
-	unpcklpd xmm2, xmm0
-	movaps	xmm0, xmm13
-	cvtsi2sd xmm0, r9
-	unpcklpd xmm1, xmm0
-	divpd	xmm2, xmm1
-	paddq	xmm2, xmm14
-	cvttsd2si rax, xmm2
-	psrldq	xmm2, 8
-	mov	rbx, rax
-	imul	rax, rdx
-	sub	r11, rax
-	js	div_fix_1_sandybridge
-div_fix_1_ret_sandybridge:
-
-	cvttsd2si rdx, xmm2
-	mov	rax, rdx
-	imul	rax, r9
-	movd	xmm2, r11d
-	movd	xmm4, ebx
-	sub	r8, rax
-	js	div_fix_2_sandybridge
-div_fix_2_ret_sandybridge:
-
-	movd	xmm1, r8d
-	movd	xmm0, edx
-	punpckldq xmm2, xmm1
-	punpckldq xmm4, xmm0
-	punpckldq xmm4, xmm2
-	paddq	xmm3, xmm4
-	movdqa	xmm0, xmm3
-	psrlq	xmm0, 12
-	paddq	xmm0, xmm12
-	sqrtpd	xmm1, xmm0
-	movq	r9, xmm1
-	movdqa xmm5, xmm1
-	psrlq xmm5, 19
-	test	r9, 524287
-	je	sqrt_fix_1_sandybridge
-sqrt_fix_1_ret_sandybridge:
-
-	movq r9, xmm10
-	psrldq	xmm1, 8
-	movq	r8, xmm1
-	test	r8, 524287
-	je	sqrt_fix_2_sandybridge
-sqrt_fix_2_ret_sandybridge:
-
-	mov r12d, ecx
-	mov r8d, ecx
-	xor r12d, 16
-	xor r8d, 32
-	xor ecx, 48
-	mov	rax, r10
-	mul	r9
-	movq xmm0, rax
-	movq xmm3, rdx
-	punpcklqdq xmm3, xmm0
-
-	movdqu	xmm0, XMMWORD PTR [r12+rsi]
-	pxor xmm0, xmm3
-	movdqu	xmm1, XMMWORD PTR [r8+rsi]
-	xor rdx, [r8+rsi]
-	xor rax, [r8+rsi+8]
-	movdqu	xmm3, XMMWORD PTR [rcx+rsi]
-	paddq	xmm0, xmm6
-	paddq	xmm1, xmm11
-	paddq	xmm3, xmm8
-	movdqu	XMMWORD PTR [r8+rsi], xmm0
-	movdqu	XMMWORD PTR [rcx+rsi], xmm1
-	movdqu	XMMWORD PTR [r12+rsi], xmm3
-
-	add	rdi, rdx
-	mov	QWORD PTR [r13], rdi
-	xor	rdi, r10
-	mov	ecx, edi
-	and	ecx, 2097136
-	lea	r8, QWORD PTR [rcx+rsi]
-
-	mov rdx, QWORD PTR [r13+8]	
-	add	rbp, rax
-	mov	QWORD PTR [r13+8], rbp
-	movdqu xmm11, XMMWORD PTR [rcx+rsi]
-	xor	rbp, rdx
-	mov	r13, QWORD PTR [rsp]
-	movdqa	xmm3, xmm7
-	mov	rdx, QWORD PTR [rsp+8]
-	movdqa	xmm8, xmm6
-	mov	r10, QWORD PTR [rsp+256]
-	movdqa	xmm7, xmm9
-	mov	r11, QWORD PTR [rsp+264]
-	movdqa	xmm6, xmm10
-	mov	r9, r15
-	dec r14d
-	jne	main_loop_double_sandybridge
-
-	ldmxcsr DWORD PTR [rsp+272]
-	movaps	xmm13, XMMWORD PTR [rsp+48]
-	lea	r11, QWORD PTR [rsp+184]
-	movaps	xmm6, XMMWORD PTR [r11-24]
-	movaps	xmm7, XMMWORD PTR [r11-40]
-	movaps	xmm8, XMMWORD PTR [r11-56]
-	movaps	xmm9, XMMWORD PTR [r11-72]
-	movaps	xmm10, XMMWORD PTR [r11-88]
-	movaps	xmm11, XMMWORD PTR [r11-104]
-	movaps	xmm12, XMMWORD PTR [r11-120]
-	movaps	xmm14, XMMWORD PTR [rsp+32]
-	movaps	xmm15, XMMWORD PTR [rsp+16]
-	mov	rsp, r11
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	jmp cnv2_double_mainloop_asm_sandybridge_endp
-
-div_fix_1_sandybridge:
-	dec	rbx
-	add	r11, rdx
-	jmp	div_fix_1_ret_sandybridge
-
-div_fix_2_sandybridge:
-	dec	rdx
-	add	r8, r9
-	jmp	div_fix_2_ret_sandybridge
-
-sqrt_fix_1_sandybridge:
-	movq	r8, xmm3
-	movdqa xmm0, xmm5
-	psrldq xmm0, 8
-	dec	r9
-	mov r11d, -1022
-	shl r11, 32
-	mov	rax, r9
-	shr	r9, 19
-	shr	rax, 20
-	mov	rdx, r9
-	sub	rdx, rax
-	lea	rdx, [rdx+r11+1]
-	add	rax, r11
-	imul	rdx, rax
-	sub	rdx, r8
-	adc	r9, 0
-	movq xmm5, r9
-	punpcklqdq xmm5, xmm0
-	jmp	sqrt_fix_1_ret_sandybridge
-
-sqrt_fix_2_sandybridge:
-	psrldq	xmm3, 8
-	movq	r11, xmm3
-	dec	r8
-	mov ebx, -1022
-	shl rbx, 32
-	mov	rax, r8
-	shr	r8, 19
-	shr	rax, 20
-	mov	rdx, r8
-	sub	rdx, rax
-	lea	rdx, [rdx+rbx+1]
-	add	rax, rbx
-	imul	rdx, rax
-	sub	rdx, r11
-	adc	r8, 0
-	movq xmm0, r8
-	punpcklqdq xmm5, xmm0
-	jmp	sqrt_fix_2_ret_sandybridge
-
-cnv2_double_mainloop_asm_sandybridge_endp:
diff --git a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
deleted file mode 100644
index b881b669..00000000
--- a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
+++ /dev/null
@@ -1,182 +0,0 @@
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 64
-
-	stmxcsr DWORD PTR [rsp]
-	mov DWORD PTR [rsp+4], 24448
-	ldmxcsr DWORD PTR [rsp+4]
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r9, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	ebp, 524288
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r11, QWORD PTR [rcx+40]
-	mov	r10, r8
-	mov	rdx, QWORD PTR [rcx+56]
-	movq	xmm3, rax
-	xor	rdx, QWORD PTR [rcx+24]
-	xor	r11, QWORD PTR [rcx+8]
-	mov	rbx, QWORD PTR [rcx+224]
-	mov	rax, QWORD PTR [r9+80]
-	xor	rax, QWORD PTR [r9+64]
-	movq	xmm0, rdx
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r9+72]
-	mov	rdi, QWORD PTR [r9+104]
-	and	r10d, 2097136
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movq	xmm4, rax
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	xorps	xmm8, xmm8
-	mov ax, 1023
-	shl rax, 52
-	movq xmm7, rax
-	mov	r15, QWORD PTR [r9+96]
-	punpcklqdq xmm3, xmm0
-	movq	xmm0, rcx
-	punpcklqdq xmm4, xmm0
-
-	ALIGN(64)
-cnv2_main_loop_bulldozer:
-	movdqa	xmm5, XMMWORD PTR [r10+rbx]
-	movq xmm6, r8
-	pinsrq xmm6, r11, 1
-	lea	rdx, QWORD PTR [r10+rbx]
-	lea	r9, QWORD PTR [rdi+rdi]
-	shl	rdi, 32
-
-	mov	ecx, r10d
-	mov	eax, r10d
-	xor	ecx, 16
-	xor	eax, 32
-	xor	r10d, 48
-	aesenc	xmm5, xmm6
-	movdqa	xmm2, XMMWORD PTR [rcx+rbx]
-	movdqa	xmm1, XMMWORD PTR [rax+rbx]
-	movdqa	xmm0, XMMWORD PTR [r10+rbx]
-	paddq	xmm2, xmm3
-	paddq	xmm1, xmm6
-	paddq	xmm0, xmm4
-	movdqa	XMMWORD PTR [rcx+rbx], xmm0
-	movdqa	XMMWORD PTR [rax+rbx], xmm2
-	movdqa	XMMWORD PTR [r10+rbx], xmm1
-
-	movaps	xmm1, xmm8
-	mov	rsi, r15
-	xor	rsi, rdi
-
-	mov edi, 1023
-	shl rdi, 52
-
-	movq	r14, xmm5
-	pextrq rax, xmm5, 1
-
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm3
-	mov	r10, r14
-	and	r10d, 2097136
-	movdqa	XMMWORD PTR [rdx], xmm0
-	xor	rsi, QWORD PTR [r10+rbx]
-	lea	r12, QWORD PTR [r10+rbx]
-	mov	r13, QWORD PTR [r10+rbx+8]
-
-	add	r9d, r14d
-	or	r9d, -2147483647
-	xor	edx, edx
-	div	r9
-	mov	eax, eax
-	shl	rdx, 32
-	lea	r15, [rax+rdx]
-	lea	rax, [r14+r15]
-	shr	rax, 12
-	add	rax, rdi
-	movq	xmm0, rax
-	sqrtsd	xmm1, xmm0
-	movq	rdi, xmm1
-	test	rdi, 524287
-	je	sqrt_fixup_bulldozer
-	shr	rdi, 19
-
-sqrt_fixup_bulldozer_ret:
-	mov	rax, rsi
-	mul	r14
-	movq xmm1, rax
-	movq xmm0, rdx
-	punpcklqdq xmm0, xmm1
-
-	mov	r9d, r10d
-	mov	ecx, r10d
-	xor	r9d, 16
-	xor	ecx, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [rcx+rbx]
-	xor rdx, [rcx+rbx]
-	xor rax, [rcx+rbx+8]
-	movdqa	xmm2, XMMWORD PTR [r9+rbx]
-	pxor xmm2, xmm0
-	paddq xmm4, XMMWORD PTR [r10+rbx]
-	paddq	xmm2, xmm3
-	paddq	xmm1, xmm6
-	movdqa	XMMWORD PTR [r9+rbx], xmm4
-	movdqa	XMMWORD PTR [rcx+rbx], xmm2
-	movdqa	XMMWORD PTR [r10+rbx], xmm1
-
-	movdqa	xmm4, xmm3
-	add	r8, rdx
-	add	r11, rax
-	mov	QWORD PTR [r12], r8
-	xor	r8, rsi
-	mov	QWORD PTR [r12+8], r11
-	mov	r10, r8
-	xor	r11, r13
-	and	r10d, 2097136
-	movdqa	xmm3, xmm5
-	dec	ebp
-	jne	cnv2_main_loop_bulldozer
-
-	ldmxcsr DWORD PTR [rsp]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	lea	r11, QWORD PTR [rsp+64]
-	mov	rbx, QWORD PTR [r11+56]
-	mov	rbp, QWORD PTR [r11+64]
-	mov	rsi, QWORD PTR [r11+72]
-	movaps	xmm8, XMMWORD PTR [r11-48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	mov	rsp, r11
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	jmp cnv2_main_loop_bulldozer_endp
-
-sqrt_fixup_bulldozer:
-	movq r9, xmm5
-	add r9, r15
-	dec	rdi
-	mov edx, -1022
-	shl rdx, 32
-	mov	rax, rdi
-	shr	rdi, 19
-	shr	rax, 20
-	mov	rcx, rdi
-	sub	rcx, rax
-	lea	rcx, [rcx+rdx+1]
-	add	rax, rdx
-	imul	rcx, rax
-	sub	rcx, r9
-	adc	rdi, 0
-	jmp	sqrt_fixup_bulldozer_ret
-
-cnv2_main_loop_bulldozer_endp:
diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc
deleted file mode 100644
index 863673de..00000000
--- a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc
+++ /dev/null
@@ -1,188 +0,0 @@
-	mov	rcx, [rcx]
-
-	mov	 QWORD PTR [rsp+24], rbx
-	push	 rbp
-	push	 rsi
-	push	 rdi
-	push	 r12
-	push	 r13
-	push	 r14
-	push	 r15
-	sub	 rsp, 80
-
-	stmxcsr DWORD PTR [rsp]
-	mov DWORD PTR [rsp+4], 24448
-	ldmxcsr DWORD PTR [rsp+4]
-
-	mov	 rax, QWORD PTR [rcx+48]
-	mov	 r9, rcx
-	xor	 rax, QWORD PTR [rcx+16]
-	mov	 esi, 524288
-	mov	 r8, QWORD PTR [rcx+32]
-	mov	 r13d, -2147483647
-	xor	 r8, QWORD PTR [rcx]
-	mov	 r11, QWORD PTR [rcx+40]
-	mov	 r10, r8
-	mov	 rdx, QWORD PTR [rcx+56]
-	movq	 xmm4, rax
-	xor	 rdx, QWORD PTR [rcx+24]
-	xor	 r11, QWORD PTR [rcx+8]
-	mov	 rbx, QWORD PTR [rcx+224]
-	mov	 rax, QWORD PTR [r9+80]
-	xor	 rax, QWORD PTR [r9+64]
-	movq	 xmm0, rdx
-	mov	 rcx, QWORD PTR [rcx+88]
-	xor	 rcx, QWORD PTR [r9+72]
-	movq	 xmm3, QWORD PTR [r9+104]
-	movaps	 XMMWORD PTR [rsp+64], xmm6
-	movaps	 XMMWORD PTR [rsp+48], xmm7
-	movaps	 XMMWORD PTR [rsp+32], xmm8
-	and	 r10d, 2097136
-	movq	 xmm5, rax
-
-	xor eax, eax
-	mov QWORD PTR [rsp+16], rax
-
-	mov ax, 1023
-	shl rax, 52
-	movq xmm8, rax
-	mov r15, QWORD PTR [r9+96]
-	punpcklqdq xmm4, xmm0
-	movq	 xmm0, rcx
-	punpcklqdq xmm5, xmm0
-	movdqu	 xmm6, XMMWORD PTR [r10+rbx]
-
-	ALIGN(64)
-main_loop_ivybridge:
-	lea	 rdx, QWORD PTR [r10+rbx]
-	mov	 ecx, r10d
-	mov	 eax, r10d
-	mov rdi, r15
-	xor	 ecx, 16
-	xor	 eax, 32
-	xor	 r10d, 48
-	movq	 xmm0, r11
-	movq	 xmm7, r8
-	punpcklqdq xmm7, xmm0
-	aesenc	 xmm6, xmm7
-	movq	 rbp, xmm6
-	mov	 r9, rbp
-	and	 r9d, 2097136
-	movdqu	 xmm2, XMMWORD PTR [rcx+rbx]
-	movdqu	 xmm1, XMMWORD PTR [rax+rbx]
-	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
-	paddq	 xmm1, xmm7
-	paddq	 xmm0, xmm5
-	paddq	 xmm2, xmm4
-	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
-	movdqu	 XMMWORD PTR [rax+rbx], xmm2
-	movdqu	 XMMWORD PTR [r10+rbx], xmm1
-	mov r10, r9
-	xor r10d, 32
-	movq	 rcx, xmm3
-	mov	 rax, rcx
-	shl	 rax, 32
-	xor	 rdi, rax
-	movdqa	 xmm0, xmm6
-	pxor	 xmm0, xmm4
-	movdqu	 XMMWORD PTR [rdx], xmm0
-	xor	 rdi, QWORD PTR [r9+rbx]
-	lea	 r14, QWORD PTR [r9+rbx]
-	mov	 r12, QWORD PTR [r14+8]
-	xor	 edx, edx
-	lea	 r9d, DWORD PTR [ecx+ecx]
-	add	 r9d, ebp
-	movdqa	 xmm0, xmm6
-	psrldq	 xmm0, 8
-	or	 r9d, r13d
-	movq	 rax, xmm0
-	div	 r9
-	xorps xmm3, xmm3
-	mov	 eax, eax
-	shl	 rdx, 32
-	add	 rdx, rax
-	lea	 r9, QWORD PTR [rdx+rbp]
-	mov r15, rdx
-	mov	 rax, r9
-	shr	 rax, 12
-	movq	 xmm0, rax
-	paddq	 xmm0, xmm8
-	sqrtsd	 xmm3, xmm0
-	psubq	 xmm3, XMMWORD PTR [rsp+16]
-	movq	 rdx, xmm3
-	test	 edx, 524287
-	je	 sqrt_fixup_ivybridge
-	psrlq	 xmm3, 19
-sqrt_fixup_ivybridge_ret:
-
-	mov	 ecx, r10d
-	mov	 rax, rdi
-	mul	 rbp
-	movq xmm2, rdx
-	xor rdx, [rcx+rbx]
-	add	 r8, rdx
-	mov	 QWORD PTR [r14], r8
-	xor	 r8, rdi
-	mov edi, r8d
-	and edi, 2097136
-	movq xmm0, rax
-	xor rax, [rcx+rbx+8]
-	add	 r11, rax
-	mov	 QWORD PTR [r14+8], r11
-	punpcklqdq xmm2, xmm0
-
-	mov	 r9d, r10d
-	xor	 r9d, 48
-	xor	 r10d, 16
-	pxor	 xmm2, XMMWORD PTR [r9+rbx]
-	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
-	paddq	 xmm0, xmm5
-	movdqu	 xmm1, XMMWORD PTR [rcx+rbx]
-	paddq	 xmm2, xmm4
-	paddq	 xmm1, xmm7
-	movdqa	 xmm5, xmm4
-	movdqu	 XMMWORD PTR [r9+rbx], xmm0
-	movdqa	 xmm4, xmm6
-	movdqu	 XMMWORD PTR [rcx+rbx], xmm2
-	movdqu	 XMMWORD PTR [r10+rbx], xmm1
-	movdqu xmm6, [rdi+rbx]
-	mov	 r10d, edi
-	xor	 r11, r12
-	dec rsi
-	jne	 main_loop_ivybridge
-
-	ldmxcsr DWORD PTR [rsp]
-	mov	 rbx, QWORD PTR [rsp+160]
-	movaps	 xmm6, XMMWORD PTR [rsp+64]
-	movaps	 xmm7, XMMWORD PTR [rsp+48]
-	movaps	 xmm8, XMMWORD PTR [rsp+32]
-	add	 rsp, 80
-	pop	 r15
-	pop	 r14
-	pop	 r13
-	pop	 r12
-	pop	 rdi
-	pop	 rsi
-	pop	 rbp
-	jmp cnv2_main_loop_ivybridge_endp
-
-sqrt_fixup_ivybridge:
-	dec	 rdx
-	mov r13d, -1022
-	shl r13, 32
-	mov	 rax, rdx
-	shr	 rdx, 19
-	shr	 rax, 20
-	mov	 rcx, rdx
-	sub	 rcx, rax
-	add	 rax, r13
-	not r13
-	sub	 rcx, r13
-	mov	 r13d, -2147483647
-	imul	 rcx, rax
-	sub	 rcx, r9
-	adc	 rdx, 0
-	movq	 xmm3, rdx
-	jmp	 sqrt_fixup_ivybridge_ret
-
-cnv2_main_loop_ivybridge_endp:
diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc
deleted file mode 100644
index 8ccc5e17..00000000
--- a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc
+++ /dev/null
@@ -1,181 +0,0 @@
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 64
-
-	stmxcsr DWORD PTR [rsp]
-	mov DWORD PTR [rsp+4], 24448
-	ldmxcsr DWORD PTR [rsp+4]
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r9, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	ebp, 524288
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r11, QWORD PTR [rcx+40]
-	mov	r10, r8
-	mov	rdx, QWORD PTR [rcx+56]
-	movq	xmm3, rax
-	xor	rdx, QWORD PTR [rcx+24]
-	xor	r11, QWORD PTR [rcx+8]
-	mov	rbx, QWORD PTR [rcx+224]
-	mov	rax, QWORD PTR [r9+80]
-	xor	rax, QWORD PTR [r9+64]
-	movq	xmm0, rdx
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r9+72]
-	mov	rdi, QWORD PTR [r9+104]
-	and	r10d, 2097136
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movq	xmm4, rax
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	xorps	xmm8, xmm8
-	mov ax, 1023
-	shl rax, 52
-	movq xmm7, rax
-	mov	r15, QWORD PTR [r9+96]
-	punpcklqdq xmm3, xmm0
-	movq	xmm0, rcx
-	punpcklqdq xmm4, xmm0
-
-	ALIGN(64)
-main_loop_ryzen:
-	movdqa	xmm5, XMMWORD PTR [r10+rbx]
-	movq	xmm0, r11
-	movq	xmm6, r8
-	punpcklqdq xmm6, xmm0
-	lea	rdx, QWORD PTR [r10+rbx]
-	lea	r9, QWORD PTR [rdi+rdi]
-	shl	rdi, 32
-
-	mov	ecx, r10d
-	mov	eax, r10d
-	xor	ecx, 16
-	xor	eax, 32
-	xor	r10d, 48
-	aesenc	xmm5, xmm6
-	movdqa	xmm2, XMMWORD PTR [rcx+rbx]
-	movdqa	xmm1, XMMWORD PTR [rax+rbx]
-	movdqa	xmm0, XMMWORD PTR [r10+rbx]
-	paddq	xmm2, xmm3
-	paddq	xmm1, xmm6
-	paddq	xmm0, xmm4
-	movdqa	XMMWORD PTR [rcx+rbx], xmm0
-	movdqa	XMMWORD PTR [rax+rbx], xmm2
-	movdqa	XMMWORD PTR [r10+rbx], xmm1
-
-	movaps	xmm1, xmm8
-	mov	rsi, r15
-	xor	rsi, rdi
-	movq	r14, xmm5
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm3
-	mov	r10, r14
-	and	r10d, 2097136
-	movdqa	XMMWORD PTR [rdx], xmm0
-	xor	rsi, QWORD PTR [r10+rbx]
-	lea	r12, QWORD PTR [r10+rbx]
-	mov	r13, QWORD PTR [r10+rbx+8]
-
-	add	r9d, r14d
-	or	r9d, -2147483647
-	xor	edx, edx
-	movdqa	xmm0, xmm5
-	psrldq	xmm0, 8
-	movq	rax, xmm0
-
-	div	r9
-	movq xmm0, rax
-	movq xmm1, rdx
-	punpckldq xmm0, xmm1
-	movq r15, xmm0
-	paddq xmm0, xmm5
-	movdqa xmm2, xmm0
-	psrlq xmm0, 12
-	paddq	xmm0, xmm7
-	sqrtsd	xmm1, xmm0
-	movq	rdi, xmm1
-	test	rdi, 524287
-	je	sqrt_fixup_ryzen
-	shr	rdi, 19
-
-sqrt_fixup_ryzen_ret:
-	mov	rax, rsi
-	mul	r14
-	movq xmm1, rax
-	movq xmm0, rdx
-	punpcklqdq xmm0, xmm1
-
-	mov	r9d, r10d
-	mov	ecx, r10d
-	xor	r9d, 16
-	xor	ecx, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [rcx+rbx]
-	xor rdx, [rcx+rbx]
-	xor rax, [rcx+rbx+8]
-	movdqa	xmm2, XMMWORD PTR [r9+rbx]
-	pxor xmm2, xmm0
-	paddq xmm4, XMMWORD PTR [r10+rbx]
-	paddq	xmm2, xmm3
-	paddq	xmm1, xmm6
-	movdqa	XMMWORD PTR [r9+rbx], xmm4
-	movdqa	XMMWORD PTR [rcx+rbx], xmm2
-	movdqa	XMMWORD PTR [r10+rbx], xmm1
-
-	movdqa	xmm4, xmm3
-	add	r8, rdx
-	add	r11, rax
-	mov	QWORD PTR [r12], r8
-	xor	r8, rsi
-	mov	QWORD PTR [r12+8], r11
-	mov	r10, r8
-	xor	r11, r13
-	and	r10d, 2097136
-	movdqa	xmm3, xmm5
-	dec	ebp
-	jne	main_loop_ryzen
-
-	ldmxcsr DWORD PTR [rsp]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	lea	r11, QWORD PTR [rsp+64]
-	mov	rbx, QWORD PTR [r11+56]
-	mov	rbp, QWORD PTR [r11+64]
-	mov	rsi, QWORD PTR [r11+72]
-	movaps	xmm8, XMMWORD PTR [r11-48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	mov	rsp, r11
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	jmp cnv2_main_loop_ryzen_endp
-
-sqrt_fixup_ryzen:
-	movq r9, xmm2
-	dec	rdi
-	mov edx, -1022
-	shl rdx, 32
-	mov	rax, rdi
-	shr	rdi, 19
-	shr	rax, 20
-	mov	rcx, rdi
-	sub	rcx, rax
-	lea	rcx, [rcx+rdx+1]
-	add	rax, rdx
-	imul	rcx, rax
-	sub	rcx, r9
-	adc	rdi, 0
-	jmp	sqrt_fixup_ryzen_ret
-
-cnv2_main_loop_ryzen_endp:
diff --git a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc
deleted file mode 100644
index d9bfc9c1..00000000
--- a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc
+++ /dev/null
@@ -1,413 +0,0 @@
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	rax, rsp
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 184
-
-	stmxcsr DWORD PTR [rsp+272]
-	mov DWORD PTR [rsp+276], 24448
-	ldmxcsr DWORD PTR [rsp+276]
-
-	mov	r13, QWORD PTR [rcx+224]
-	mov	r9, rdx
-	mov	r10, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r10, QWORD PTR [rcx]
-	mov	r14d, 393216
-	mov	r11, QWORD PTR [rcx+40]
-	xor	r11, QWORD PTR [rcx+8]
-	mov	rsi, QWORD PTR [rdx+224]
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	rdi, QWORD PTR [r9+32]
-	xor	rdi, QWORD PTR [r9]
-	mov	rbp, QWORD PTR [r9+40]
-	xor	rbp, QWORD PTR [r9+8]
-	movq	xmm0, rdx
-	movaps	XMMWORD PTR [rax-88], xmm6
-	movaps	XMMWORD PTR [rax-104], xmm7
-	movaps	XMMWORD PTR [rax-120], xmm8
-	movaps	XMMWORD PTR [rsp+112], xmm9
-	movaps	XMMWORD PTR [rsp+96], xmm10
-	movaps	XMMWORD PTR [rsp+80], xmm11
-	movaps	XMMWORD PTR [rsp+64], xmm12
-	movaps	XMMWORD PTR [rsp+48], xmm13
-	movaps	XMMWORD PTR [rsp+32], xmm14
-	movaps	XMMWORD PTR [rsp+16], xmm15
-	mov	rdx, r10
-	movq	xmm4, QWORD PTR [r8+96]
-	and	edx, 2097136
-	mov	rax, QWORD PTR [rcx+48]
-	xorps	xmm13, xmm13
-	xor	rax, QWORD PTR [rcx+16]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r8+72]
-	movq	xmm5, QWORD PTR [r8+104]
-	movq	xmm7, rax
-
-	mov eax, 1
-	shl rax, 52
-	movq xmm14, rax
-	punpcklqdq xmm14, xmm14
-
-	mov eax, 1023
-	shl rax, 52
-	movq xmm12, rax
-	punpcklqdq xmm12, xmm12
-
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-	punpcklqdq xmm7, xmm0
-	movq	xmm0, rcx
-	mov	rcx, QWORD PTR [r9+56]
-	xor	rcx, QWORD PTR [r9+24]
-	movq	xmm3, rax
-	mov	rax, QWORD PTR [r9+48]
-	xor	rax, QWORD PTR [r9+16]
-	punpcklqdq xmm3, xmm0
-	movq	xmm0, rcx
-	mov	QWORD PTR [rsp], r13
-	mov	rcx, QWORD PTR [r9+88]
-	xor	rcx, QWORD PTR [r9+72]
-	movq	xmm6, rax
-	mov	rax, QWORD PTR [r9+80]
-	xor	rax, QWORD PTR [r9+64]
-	punpcklqdq xmm6, xmm0
-	movq	xmm0, rcx
-	mov	QWORD PTR [rsp+256], r10
-	mov	rcx, rdi
-	mov	QWORD PTR [rsp+264], r11
-	movq	xmm8, rax
-	and	ecx, 2097136
-	punpcklqdq xmm8, xmm0
-	movq	xmm0, QWORD PTR [r9+96]
-	punpcklqdq xmm4, xmm0
-	movq	xmm0, QWORD PTR [r9+104]
-	lea	r8, QWORD PTR [rcx+rsi]
-	movdqu	xmm11, XMMWORD PTR [r8]
-	punpcklqdq xmm5, xmm0
-	lea	r9, QWORD PTR [rdx+r13]
-	movdqu	xmm15, XMMWORD PTR [r9]
-
-	ALIGN(64)
-rwz_main_loop_double:
-	movdqu	xmm9, xmm15
-	mov eax, edx
-	mov ebx, edx
-	xor eax, 16
-	xor ebx, 32
-	xor edx, 48
-
-	movq	xmm0, r11
-	movq	xmm2, r10
-	punpcklqdq xmm2, xmm0
-	aesenc	xmm9, xmm2
-
-	movdqu	xmm0, XMMWORD PTR [rdx+r13]
-	movdqu	xmm1, XMMWORD PTR [rbx+r13]
-	paddq	xmm0, xmm7
-	paddq	xmm1, xmm2
-	movdqu	XMMWORD PTR [rbx+r13], xmm0
-	movdqu	xmm0, XMMWORD PTR [rax+r13]
-	movdqu	XMMWORD PTR [rdx+r13], xmm1
-	paddq	xmm0, xmm3
-	movdqu	XMMWORD PTR [rax+r13], xmm0
-
-	movq	r11, xmm9
-	mov	edx, r11d
-	and	edx, 2097136
-	movdqa	xmm0, xmm9
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [r9], xmm0
-
-	lea	rbx, QWORD PTR [rdx+r13]
-	mov	r10, QWORD PTR [rdx+r13]
-
-	movdqu	xmm10, xmm11
-	movq	xmm0, rbp
-	movq	xmm11, rdi
-	punpcklqdq xmm11, xmm0
-	aesenc	xmm10, xmm11
-
-	mov eax, ecx
-	mov r12d, ecx
-	xor eax, 16
-	xor r12d, 32
-	xor ecx, 48
-
-	movdqu	xmm0, XMMWORD PTR [rcx+rsi]
-	paddq	xmm0, xmm6
-	movdqu	xmm1, XMMWORD PTR [r12+rsi]
-	movdqu	XMMWORD PTR [r12+rsi], xmm0
-	paddq	xmm1, xmm11
-	movdqu	xmm0, XMMWORD PTR [rax+rsi]
-	movdqu	XMMWORD PTR [rcx+rsi], xmm1
-	paddq	xmm0, xmm8
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-
-	movq	rcx, xmm10
-	and	ecx, 2097136
-
-	movdqa	xmm0, xmm10
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [r8], xmm0
-	mov r12, QWORD PTR [rcx+rsi]
-
-	mov	r9, QWORD PTR [rbx+8]
-
-	xor edx, 16
-	mov r8d, edx
-	mov r15d, edx
-
-	movq	rdx, xmm5
-	shl	rdx, 32
-	movq	rax, xmm4
-	xor	rdx, rax
-	xor	r10, rdx
-	mov	rax, r10
-	mul	r11
-	mov r11d, r8d
-	xor r11d, 48
-	movq xmm0, rdx
-	xor rdx, [r11+r13]
-	movq xmm1, rax
-	xor rax, [r11+r13+8]
-	punpcklqdq xmm0, xmm1
-
-	pxor xmm0, XMMWORD PTR [r8+r13]
-	movdqu	xmm1, XMMWORD PTR [r11+r13]
-	paddq	xmm0, xmm3
-	paddq	xmm1, xmm2
-	movdqu	XMMWORD PTR [r8+r13], xmm0
-	xor	r8d, 32
-	movdqu	xmm0, XMMWORD PTR [r8+r13]
-	movdqu	XMMWORD PTR [r8+r13], xmm1
-	paddq	xmm0, xmm7
-	movdqu	XMMWORD PTR [r11+r13], xmm0
-
-	mov	r11, QWORD PTR [rsp+256]
-	add	r11, rdx
-	mov	rdx, QWORD PTR [rsp+264]
-	add	rdx, rax
-	mov	QWORD PTR [rbx], r11
-	xor	r11, r10
-	mov	QWORD PTR [rbx+8], rdx
-	xor	rdx, r9
-	mov	QWORD PTR [rsp+256], r11
-	and	r11d, 2097136
-	mov	QWORD PTR [rsp+264], rdx
-	mov	QWORD PTR [rsp+8], r11
-	lea	r15, QWORD PTR [r11+r13]
-	movdqu xmm15, XMMWORD PTR [r11+r13]
-	lea	r13, QWORD PTR [rsi+rcx]
-	movdqa	xmm0, xmm5
-	psrldq	xmm0, 8
-	movaps	xmm2, xmm13
-	movq	r10, xmm0
-	psllq	xmm5, 1
-	shl	r10, 32
-	movdqa	xmm0, xmm9
-	psrldq	xmm0, 8
-	movdqa	xmm1, xmm10
-	movq	r11, xmm0
-	psrldq	xmm1, 8
-	movq	r8, xmm1
-	psrldq	xmm4, 8
-	movaps	xmm0, xmm13
-	movq	rax, xmm4
-	xor	r10, rax
-	movaps	xmm1, xmm13
-	xor	r10, r12
-	lea	rax, QWORD PTR [r11+1]
-	shr	rax, 1
-	movdqa	xmm3, xmm9
-	punpcklqdq xmm3, xmm10
-	paddq	xmm5, xmm3
-	movq	rdx, xmm5
-	psrldq	xmm5, 8
-	cvtsi2sd xmm2, rax
-	or	edx, -2147483647
-	lea	rax, QWORD PTR [r8+1]
-	shr	rax, 1
-	movq	r9, xmm5
-	cvtsi2sd xmm0, rax
-	or	r9d, -2147483647
-	cvtsi2sd xmm1, rdx
-	unpcklpd xmm2, xmm0
-	movaps	xmm0, xmm13
-	cvtsi2sd xmm0, r9
-	unpcklpd xmm1, xmm0
-	divpd	xmm2, xmm1
-	paddq	xmm2, xmm14
-	cvttsd2si rax, xmm2
-	psrldq	xmm2, 8
-	mov	rbx, rax
-	imul	rax, rdx
-	sub	r11, rax
-	js	rwz_div_fix_1
-rwz_div_fix_1_ret:
-
-	cvttsd2si rdx, xmm2
-	mov	rax, rdx
-	imul	rax, r9
-	movd	xmm2, r11d
-	movd	xmm4, ebx
-	sub	r8, rax
-	js	rwz_div_fix_2
-rwz_div_fix_2_ret:
-
-	movd	xmm1, r8d
-	movd	xmm0, edx
-	punpckldq xmm2, xmm1
-	punpckldq xmm4, xmm0
-	punpckldq xmm4, xmm2
-	paddq	xmm3, xmm4
-	movdqa	xmm0, xmm3
-	psrlq	xmm0, 12
-	paddq	xmm0, xmm12
-	sqrtpd	xmm1, xmm0
-	movq	r9, xmm1
-	movdqa xmm5, xmm1
-	psrlq xmm5, 19
-	test	r9, 524287
-	je	rwz_sqrt_fix_1
-rwz_sqrt_fix_1_ret:
-
-	movq r9, xmm10
-	psrldq	xmm1, 8
-	movq	r8, xmm1
-	test	r8, 524287
-	je	rwz_sqrt_fix_2
-rwz_sqrt_fix_2_ret:
-
-	mov r12d, ecx
-	mov r8d, ecx
-	xor r12d, 16
-	xor r8d, 32
-	xor ecx, 48
-	mov	rax, r10
-	mul	r9
-	movq xmm0, rax
-	movq xmm3, rdx
-	punpcklqdq xmm3, xmm0
-
-	movdqu	xmm0, XMMWORD PTR [r12+rsi]
-	pxor xmm0, xmm3
-	movdqu	xmm1, XMMWORD PTR [r8+rsi]
-	xor rdx, [r8+rsi]
-	xor rax, [r8+rsi+8]
-	movdqu	xmm3, XMMWORD PTR [rcx+rsi]
-	paddq	xmm3, xmm6
-	paddq	xmm1, xmm11
-	paddq	xmm0, xmm8
-	movdqu	XMMWORD PTR [r8+rsi], xmm3
-	movdqu	XMMWORD PTR [rcx+rsi], xmm1
-	movdqu	XMMWORD PTR [r12+rsi], xmm0
-
-	add	rdi, rdx
-	mov	QWORD PTR [r13], rdi
-	xor	rdi, r10
-	mov	ecx, edi
-	and	ecx, 2097136
-	lea	r8, QWORD PTR [rcx+rsi]
-
-	mov rdx, QWORD PTR [r13+8]	
-	add	rbp, rax
-	mov	QWORD PTR [r13+8], rbp
-	movdqu xmm11, XMMWORD PTR [rcx+rsi]
-	xor	rbp, rdx
-	mov	r13, QWORD PTR [rsp]
-	movdqa	xmm3, xmm7
-	mov	rdx, QWORD PTR [rsp+8]
-	movdqa	xmm8, xmm6
-	mov	r10, QWORD PTR [rsp+256]
-	movdqa	xmm7, xmm9
-	mov	r11, QWORD PTR [rsp+264]
-	movdqa	xmm6, xmm10
-	mov	r9, r15
-	dec r14d
-	jne	rwz_main_loop_double
-
-	ldmxcsr DWORD PTR [rsp+272]
-	movaps	xmm13, XMMWORD PTR [rsp+48]
-	lea	r11, QWORD PTR [rsp+184]
-	movaps	xmm6, XMMWORD PTR [r11-24]
-	movaps	xmm7, XMMWORD PTR [r11-40]
-	movaps	xmm8, XMMWORD PTR [r11-56]
-	movaps	xmm9, XMMWORD PTR [r11-72]
-	movaps	xmm10, XMMWORD PTR [r11-88]
-	movaps	xmm11, XMMWORD PTR [r11-104]
-	movaps	xmm12, XMMWORD PTR [r11-120]
-	movaps	xmm14, XMMWORD PTR [rsp+32]
-	movaps	xmm15, XMMWORD PTR [rsp+16]
-	mov	rsp, r11
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	jmp rwz_cnv2_double_mainloop_asm_endp
-
-rwz_div_fix_1:
-	dec	rbx
-	add	r11, rdx
-	jmp	rwz_div_fix_1_ret
-
-rwz_div_fix_2:
-	dec	rdx
-	add	r8, r9
-	jmp	rwz_div_fix_2_ret
-
-rwz_sqrt_fix_1:
-	movq	r8, xmm3
-	movdqa xmm0, xmm5
-	psrldq xmm0, 8
-	dec	r9
-	mov r11d, -1022
-	shl r11, 32
-	mov	rax, r9
-	shr	r9, 19
-	shr	rax, 20
-	mov	rdx, r9
-	sub	rdx, rax
-	lea	rdx, [rdx+r11+1]
-	add	rax, r11
-	imul	rdx, rax
-	sub	rdx, r8
-	adc	r9, 0
-	movq xmm5, r9
-	punpcklqdq xmm5, xmm0
-	jmp	rwz_sqrt_fix_1_ret
-
-rwz_sqrt_fix_2:
-	psrldq	xmm3, 8
-	movq	r11, xmm3
-	dec	r8
-	mov ebx, -1022
-	shl rbx, 32
-	mov	rax, r8
-	shr	r8, 19
-	shr	rax, 20
-	mov	rdx, r8
-	sub	rdx, rax
-	lea	rdx, [rdx+rbx+1]
-	add	rax, rbx
-	imul	rdx, rax
-	sub	rdx, r11
-	adc	r8, 0
-	movq xmm0, r8
-	punpcklqdq xmm5, xmm0
-	jmp	rwz_sqrt_fix_2_ret
-
-rwz_cnv2_double_mainloop_asm_endp:
diff --git a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc
deleted file mode 100644
index b59c02d6..00000000
--- a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc
+++ /dev/null
@@ -1,188 +0,0 @@
-	mov	rcx, [rcx]
-
-	mov	 QWORD PTR [rsp+24], rbx
-	push	 rbp
-	push	 rsi
-	push	 rdi
-	push	 r12
-	push	 r13
-	push	 r14
-	push	 r15
-	sub	 rsp, 80
-
-	stmxcsr DWORD PTR [rsp]
-	mov DWORD PTR [rsp+4], 24448
-	ldmxcsr DWORD PTR [rsp+4]
-
-	mov	 rax, QWORD PTR [rcx+48]
-	mov	 r9, rcx
-	xor	 rax, QWORD PTR [rcx+16]
-	mov	 esi, 393216
-	mov	 r8, QWORD PTR [rcx+32]
-	mov	 r13d, -2147483647
-	xor	 r8, QWORD PTR [rcx]
-	mov	 r11, QWORD PTR [rcx+40]
-	mov	 r10, r8
-	mov	 rdx, QWORD PTR [rcx+56]
-	movq	 xmm4, rax
-	xor	 rdx, QWORD PTR [rcx+24]
-	xor	 r11, QWORD PTR [rcx+8]
-	mov	 rbx, QWORD PTR [rcx+224]
-	mov	 rax, QWORD PTR [r9+80]
-	xor	 rax, QWORD PTR [r9+64]
-	movq	 xmm0, rdx
-	mov	 rcx, QWORD PTR [rcx+88]
-	xor	 rcx, QWORD PTR [r9+72]
-	movq	 xmm3, QWORD PTR [r9+104]
-	movaps	 XMMWORD PTR [rsp+64], xmm6
-	movaps	 XMMWORD PTR [rsp+48], xmm7
-	movaps	 XMMWORD PTR [rsp+32], xmm8
-	and	 r10d, 2097136
-	movq	 xmm5, rax
-
-	xor eax, eax
-	mov QWORD PTR [rsp+16], rax
-
-	mov ax, 1023
-	shl rax, 52
-	movq xmm8, rax
-	mov r15, QWORD PTR [r9+96]
-	punpcklqdq xmm4, xmm0
-	movq	 xmm0, rcx
-	punpcklqdq xmm5, xmm0
-	movdqu	 xmm6, XMMWORD PTR [r10+rbx]
-
-	ALIGN(64)
-rwz_main_loop:
-	lea	 rdx, QWORD PTR [r10+rbx]
-	mov	 ecx, r10d
-	mov	 eax, r10d
-	mov rdi, r15
-	xor	 ecx, 16
-	xor	 eax, 32
-	xor	 r10d, 48
-	movq	 xmm0, r11
-	movq	 xmm7, r8
-	punpcklqdq xmm7, xmm0
-	aesenc	 xmm6, xmm7
-	movq	 rbp, xmm6
-	mov	 r9, rbp
-	and	 r9d, 2097136
-	movdqu	 xmm0, XMMWORD PTR [rcx+rbx]
-	movdqu	 xmm1, XMMWORD PTR [rax+rbx]
-	movdqu	 xmm2, XMMWORD PTR [r10+rbx]
-	paddq	 xmm0, xmm5
-	paddq	 xmm1, xmm7
-	paddq	 xmm2, xmm4
-	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
-	movdqu	 XMMWORD PTR [rax+rbx], xmm2
-	movdqu	 XMMWORD PTR [r10+rbx], xmm1
-	mov r10, r9
-	xor r10d, 32
-	movq	 rcx, xmm3
-	mov	 rax, rcx
-	shl	 rax, 32
-	xor	 rdi, rax
-	movdqa	 xmm0, xmm6
-	pxor	 xmm0, xmm4
-	movdqu	 XMMWORD PTR [rdx], xmm0
-	xor	 rdi, QWORD PTR [r9+rbx]
-	lea	 r14, QWORD PTR [r9+rbx]
-	mov	 r12, QWORD PTR [r14+8]
-	xor	 edx, edx
-	lea	 r9d, DWORD PTR [ecx+ecx]
-	add	 r9d, ebp
-	movdqa	 xmm0, xmm6
-	psrldq	 xmm0, 8
-	or	 r9d, r13d
-	movq	 rax, xmm0
-	div	 r9
-	xorps xmm3, xmm3
-	mov	 eax, eax
-	shl	 rdx, 32
-	add	 rdx, rax
-	lea	 r9, QWORD PTR [rdx+rbp]
-	mov r15, rdx
-	mov	 rax, r9
-	shr	 rax, 12
-	movq	 xmm0, rax
-	paddq	 xmm0, xmm8
-	sqrtsd	 xmm3, xmm0
-	psubq	 xmm3, XMMWORD PTR [rsp+16]
-	movq	 rdx, xmm3
-	test	 edx, 524287
-	je	 rwz_sqrt_fixup
-	psrlq	 xmm3, 19
-rwz_sqrt_fixup_ret:
-
-	mov	 ecx, r10d
-	mov	 rax, rdi
-	mul	 rbp
-	movq xmm2, rdx
-	xor rdx, [rcx+rbx]
-	add	 r8, rdx
-	mov	 QWORD PTR [r14], r8
-	xor	 r8, rdi
-	mov edi, r8d
-	and edi, 2097136
-	movq xmm0, rax
-	xor rax, [rcx+rbx+8]
-	add	 r11, rax
-	mov	 QWORD PTR [r14+8], r11
-	punpcklqdq xmm2, xmm0
-
-	mov	 r9d, r10d
-	xor	 r9d, 48
-	xor	 r10d, 16
-	pxor	 xmm2, XMMWORD PTR [r9+rbx]
-	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
-	paddq	 xmm0, xmm4
-	movdqu	 xmm1, XMMWORD PTR [rcx+rbx]
-	paddq	 xmm2, xmm5
-	paddq	 xmm1, xmm7
-	movdqa	 xmm5, xmm4
-	movdqu	 XMMWORD PTR [r9+rbx], xmm2
-	movdqa	 xmm4, xmm6
-	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
-	movdqu	 XMMWORD PTR [r10+rbx], xmm1
-	movdqu xmm6, [rdi+rbx]
-	mov	 r10d, edi
-	xor	 r11, r12
-	dec rsi
-	jne	 rwz_main_loop
-
-	ldmxcsr DWORD PTR [rsp]
-	mov	 rbx, QWORD PTR [rsp+160]
-	movaps	 xmm6, XMMWORD PTR [rsp+64]
-	movaps	 xmm7, XMMWORD PTR [rsp+48]
-	movaps	 xmm8, XMMWORD PTR [rsp+32]
-	add	 rsp, 80
-	pop	 r15
-	pop	 r14
-	pop	 r13
-	pop	 r12
-	pop	 rdi
-	pop	 rsi
-	pop	 rbp
-	jmp cnv2_rwz_main_loop_endp
-
-rwz_sqrt_fixup:
-	dec	 rdx
-	mov r13d, -1022
-	shl r13, 32
-	mov	 rax, rdx
-	shr	 rdx, 19
-	shr	 rax, 20
-	mov	 rcx, rdx
-	sub	 rcx, rax
-	add	 rax, r13
-	not r13
-	sub	 rcx, r13
-	mov	 r13d, -2147483647
-	imul	 rcx, rax
-	sub	 rcx, r9
-	adc	 rdx, 0
-	movq	 xmm3, rdx
-	jmp	 rwz_sqrt_fixup_ret
-
-cnv2_rwz_main_loop_endp:
diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S
deleted file mode 100644
index 7aed6c20..00000000
--- a/src/crypto/asm/cn_main_loop.S
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifdef __APPLE__
-#   define ALIGN(x) .align 6
-#else
-#   define ALIGN(x) .align 64
-#endif
-.intel_syntax noprefix
-#ifdef __APPLE__
-#   define FN_PREFIX(fn) _ ## fn
-.text
-#else
-#   define FN_PREFIX(fn) fn
-.section .text
-#endif
-.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
-.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
-.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
-.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
-.global FN_PREFIX(cnv2_rwz_mainloop_asm)
-.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
-
-ALIGN(64)
-FN_PREFIX(cnv2_mainloop_ivybridge_asm):
-	sub rsp, 48
-	mov rcx, rdi
-	#include "cn2/cnv2_main_loop_ivybridge.inc"
-	add rsp, 48
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-FN_PREFIX(cnv2_mainloop_ryzen_asm):
-	sub rsp, 48
-	mov rcx, rdi
-	#include "cn2/cnv2_main_loop_ryzen.inc"
-	add rsp, 48
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-FN_PREFIX(cnv2_mainloop_bulldozer_asm):
-	sub rsp, 48
-	mov rcx, rdi
-	#include "cn2/cnv2_main_loop_bulldozer.inc"
-	add rsp, 48
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
-	sub rsp, 48
-	mov rcx, rdi
-	#include "cn2/cnv2_double_main_loop_sandybridge.inc"
-	add rsp, 48
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-FN_PREFIX(cnv2_rwz_mainloop_asm):
-	sub rsp, 48
-	mov rcx, rdi
-	#include "cn2/cnv2_rwz_main_loop.inc"
-	add rsp, 48
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-FN_PREFIX(cnv2_rwz_double_mainloop_asm):
-	sub rsp, 48
-	mov rcx, rdi
-	#include "cn2/cnv2_rwz_double_main_loop.inc"
-	add rsp, 48
-	ret 0
-	mov eax, 3735929054
diff --git a/src/crypto/asm/cn_main_loop.asm b/src/crypto/asm/cn_main_loop.asm
deleted file mode 100644
index f0766a7c..00000000
--- a/src/crypto/asm/cn_main_loop.asm
+++ /dev/null
@@ -1,52 +0,0 @@
-_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
-PUBLIC cnv2_mainloop_ivybridge_asm
-PUBLIC cnv2_mainloop_ryzen_asm
-PUBLIC cnv2_mainloop_bulldozer_asm
-PUBLIC cnv2_double_mainloop_sandybridge_asm
-PUBLIC cnv2_rwz_mainloop_asm
-PUBLIC cnv2_rwz_double_mainloop_asm
-
-ALIGN(64)
-cnv2_mainloop_ivybridge_asm PROC
-	INCLUDE cn2/cnv2_main_loop_ivybridge.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_mainloop_ivybridge_asm ENDP
-
-ALIGN(64)
-cnv2_mainloop_ryzen_asm PROC
-	INCLUDE cn2/cnv2_main_loop_ryzen.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_mainloop_ryzen_asm ENDP
-
-ALIGN(64)
-cnv2_mainloop_bulldozer_asm PROC
-	INCLUDE cn2/cnv2_main_loop_bulldozer.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_mainloop_bulldozer_asm ENDP
-
-ALIGN(64)
-cnv2_double_mainloop_sandybridge_asm PROC
-	INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_double_mainloop_sandybridge_asm ENDP
-
-ALIGN(64)
-cnv2_rwz_mainloop_asm PROC
-	INCLUDE cn2/cnv2_rwz_main_loop.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_rwz_mainloop_asm ENDP
-
-ALIGN(64)
-cnv2_rwz_double_mainloop_asm PROC
-	INCLUDE cn2/cnv2_rwz_double_main_loop.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_rwz_double_mainloop_asm ENDP
-
-_TEXT_CNV2_MAINLOOP ENDS
-END
diff --git a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc
deleted file mode 100644
index 6898a604..00000000
--- a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc
+++ /dev/null
@@ -1,281 +0,0 @@
-PUBLIC CryptonightR_soft_aes_template_part1
-PUBLIC CryptonightR_soft_aes_template_mainloop
-PUBLIC CryptonightR_soft_aes_template_part2
-PUBLIC CryptonightR_soft_aes_template_part3
-PUBLIC CryptonightR_soft_aes_template_end
-
-ALIGN(64)
-CryptonightR_soft_aes_template_part1:
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+8], rcx
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 232
-
-	mov	eax, [rcx+96]
-	mov	ebx, [rcx+100]
-	mov	esi, [rcx+104]
-	mov	edx, [rcx+108]
-	mov [rsp+144], eax
-	mov [rsp+148], ebx
-	mov [rsp+152], esi
-	mov [rsp+156], edx
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r10, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r9, QWORD PTR [rcx+40]
-	xor	r9, QWORD PTR [rcx+8]
-	movd	xmm4, rax
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	r11, QWORD PTR [rcx+224]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r10+72]
-	mov	rax, QWORD PTR [r10+80]
-	movd	xmm0, rdx
-	xor	rax, QWORD PTR [r10+64]
-
-	movaps	XMMWORD PTR [rsp+16], xmm6
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+48], xmm8
-	movaps	XMMWORD PTR [rsp+64], xmm9
-	movaps	XMMWORD PTR [rsp+80], xmm10
-	movaps	XMMWORD PTR [rsp+96], xmm11
-	movaps	XMMWORD PTR [rsp+112], xmm12
-	movaps	XMMWORD PTR [rsp+128], xmm13
-
-	movd	xmm5, rax
-
-	mov	rax, r8
-	punpcklqdq xmm4, xmm0
-	and	eax, 2097136
-	movd	xmm10, QWORD PTR [r10+96]
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [r10+104]
-	xorps	xmm9, xmm9
-	mov	QWORD PTR [rsp+328], rax
-	movd	xmm12, r11
-	mov	QWORD PTR [rsp+320], r9
-	punpcklqdq xmm5, xmm0
-	movd xmm13, rcx
-	mov r12d, 524288
-
-	ALIGN(64)
-CryptonightR_soft_aes_template_mainloop:
-	movd xmm11, r12d
-	mov	r12, QWORD PTR [r10+272]
-	lea	r13, QWORD PTR [rax+r11]
-	mov	esi, DWORD PTR [r13]
-	movd	xmm0, r9
-	mov	r10d, DWORD PTR [r13+4]
-	movd	xmm7, r8
-	mov	ebp, DWORD PTR [r13+12]
-	mov	r14d, DWORD PTR [r13+8]
-	mov	rdx, QWORD PTR [rsp+328]
-	movzx	ecx, sil
-	shr	esi, 8
-	punpcklqdq xmm7, xmm0
-	mov	r15d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	mov	edi, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	ebx, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	shr	ebp, 8
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	xor	r15d, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	eax, r14d
-	shr	eax, 8
-	xor	edi, DWORD PTR [r12+rcx*4+1024]
-	add	eax, 256
-	movzx	ecx, bpl
-	shr	ebp, 8
-	xor	ebx, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, sil
-	shr	esi, 8
-	xor	r9d, DWORD PTR [r12+rcx*4+1024]
-	add	r12, 2048
-	movzx	ecx, r10b
-	shr	r10d, 8
-	add	r10d, 256
-	mov	r11d, DWORD PTR [r12+rax*4]
-	xor	r11d, DWORD PTR [r12+rcx*4]
-	xor	r11d, r9d
-	movzx	ecx, sil
-	mov	r10d, DWORD PTR [r12+r10*4]
-	shr	esi, 8
-	add	esi, 256
-	xor	r10d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	xor	r10d, ebx
-	shr	ebp, 8
-	movd	xmm1, r11d
-	add	ebp, 256
-	movd	r11, xmm12
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	xor	r9d, DWORD PTR [r12+rsi*4]
-	mov	eax, DWORD PTR [r12+rbp*4]
-	xor	r9d, edi
-	movzx	ecx, r14b
-	movd	xmm0, r10d
-	movd	xmm2, r9d
-	xor	eax, DWORD PTR [r12+rcx*4]
-	mov	rcx, rdx
-	xor	eax, r15d
-	punpckldq xmm2, xmm1
-	xor	rcx, 16
-	movd	xmm6, eax
-	mov	rax, rdx
-	punpckldq xmm6, xmm0
-	xor	rax, 32
-	punpckldq xmm6, xmm2
-	xor	rdx, 48
-	movdqu	xmm2, XMMWORD PTR [rcx+r11]
-	pxor xmm6, xmm2
-	pxor	xmm6, xmm7
-	paddq	xmm2, xmm4
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	movdqu	xmm0, XMMWORD PTR [rdx+r11]
-	pxor xmm6, xmm1
-	pxor xmm6, xmm0
-	paddq	xmm0, xmm5
-	movdqu	XMMWORD PTR [rcx+r11], xmm0
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movd rcx, xmm13
-	paddq	xmm1, xmm7
-	movdqu	XMMWORD PTR [rdx+r11], xmm1
-	movd	rdi, xmm6
-	mov	r10, rdi
-	and	r10d, 2097136
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm4
-	movdqu	XMMWORD PTR [r13], xmm0
-
-	mov ebx, [rsp+144]
-	mov ebp, [rsp+152]
-	add ebx, [rsp+148]
-	add ebp, [rsp+156]
-	shl rbp, 32
-	or rbx, rbp
-
-	xor rbx, QWORD PTR [r10+r11]
-	lea	r14, QWORD PTR [r10+r11]
-	mov	rbp, QWORD PTR [r14+8]
-
-	mov [rsp+160], rbx
-	mov [rsp+168], rdi
-	mov [rsp+176], rbp
-	mov [rsp+184], r10
-	mov r10, rsp
-
-	mov ebx, [rsp+144]
-	mov esi, [rsp+148]
-	mov edi, [rsp+152]
-	mov ebp, [rsp+156]
-
-	movd esp, xmm7
-	movaps xmm0, xmm7
-	psrldq xmm0, 8
-	movd r15d, xmm0
-	movd eax, xmm4
-	movd edx, xmm5
-	movaps xmm0, xmm5
-	psrldq xmm0, 8
-	movd r9d, xmm0
-
-CryptonightR_soft_aes_template_part2:
-	mov rsp, r10
-	mov [rsp+144], ebx
-	mov [rsp+148], esi
-	mov [rsp+152], edi
-	mov [rsp+156], ebp
-
-	mov edi, edi
-	shl rbp, 32
-	or rbp, rdi
-	xor r8, rbp
-
-	mov ebx, ebx
-	shl rsi, 32
-	or rsi, rbx
-	xor QWORD PTR [rsp+320], rsi
-
-	mov rbx, [rsp+160]
-	mov rdi, [rsp+168]
-	mov rbp, [rsp+176]
-	mov r10, [rsp+184]
-
-	mov	r9, r10
-	xor	r9, 16
-	mov	rcx, r10
-	xor	rcx, 32
-	xor	r10, 48
-	mov	rax, rbx
-	mul	rdi
-	movdqu	xmm2, XMMWORD PTR [r9+r11]
-	movdqu	xmm1, XMMWORD PTR [rcx+r11]
-	pxor xmm6, xmm2
-	pxor xmm6, xmm1
-	paddq	xmm1, xmm7
-	add	r8, rdx
-	movdqu	xmm0, XMMWORD PTR [r10+r11]
-	pxor xmm6, xmm0
-	paddq	xmm0, xmm5
-	paddq	xmm2, xmm4
-	movdqu	XMMWORD PTR [r9+r11], xmm0
-	movdqa	xmm5, xmm4
-	mov	r9, QWORD PTR [rsp+320]
-	movdqa	xmm4, xmm6
-	add	r9, rax
-	movdqu	XMMWORD PTR [rcx+r11], xmm2
-	movdqu	XMMWORD PTR [r10+r11], xmm1
-	mov	r10, QWORD PTR [rsp+304]
-	movd r12d, xmm11
-	mov	QWORD PTR [r14], r8
-	xor	r8, rbx
-	mov	rax, r8
-	mov	QWORD PTR [r14+8], r9
-	and	eax, 2097136
-	xor	r9, rbp
-	mov	QWORD PTR [rsp+320], r9
-	mov	QWORD PTR [rsp+328], rax
-	sub	r12d, 1
-	jne	CryptonightR_soft_aes_template_mainloop
-
-CryptonightR_soft_aes_template_part3:
-	movaps	xmm6, XMMWORD PTR [rsp+16]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+48]
-	movaps	xmm9, XMMWORD PTR [rsp+64]
-	movaps	xmm10, XMMWORD PTR [rsp+80]
-	movaps	xmm11, XMMWORD PTR [rsp+96]
-	movaps	xmm12, XMMWORD PTR [rsp+112]
-	movaps	xmm13, XMMWORD PTR [rsp+128]
-
-	add	rsp, 232
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	ret
-CryptonightR_soft_aes_template_end:
diff --git a/src/crypto/asm/win64/CryptonightR_template.asm b/src/crypto/asm/win64/CryptonightR_template.asm
deleted file mode 100644
index 250eca3d..00000000
--- a/src/crypto/asm/win64/CryptonightR_template.asm
+++ /dev/null
@@ -1,1585 +0,0 @@
-; Auto-generated file, do not edit
-
-_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE
-PUBLIC CryptonightR_instruction0
-PUBLIC CryptonightR_instruction1
-PUBLIC CryptonightR_instruction2
-PUBLIC CryptonightR_instruction3
-PUBLIC CryptonightR_instruction4
-PUBLIC CryptonightR_instruction5
-PUBLIC CryptonightR_instruction6
-PUBLIC CryptonightR_instruction7
-PUBLIC CryptonightR_instruction8
-PUBLIC CryptonightR_instruction9
-PUBLIC CryptonightR_instruction10
-PUBLIC CryptonightR_instruction11
-PUBLIC CryptonightR_instruction12
-PUBLIC CryptonightR_instruction13
-PUBLIC CryptonightR_instruction14
-PUBLIC CryptonightR_instruction15
-PUBLIC CryptonightR_instruction16
-PUBLIC CryptonightR_instruction17
-PUBLIC CryptonightR_instruction18
-PUBLIC CryptonightR_instruction19
-PUBLIC CryptonightR_instruction20
-PUBLIC CryptonightR_instruction21
-PUBLIC CryptonightR_instruction22
-PUBLIC CryptonightR_instruction23
-PUBLIC CryptonightR_instruction24
-PUBLIC CryptonightR_instruction25
-PUBLIC CryptonightR_instruction26
-PUBLIC CryptonightR_instruction27
-PUBLIC CryptonightR_instruction28
-PUBLIC CryptonightR_instruction29
-PUBLIC CryptonightR_instruction30
-PUBLIC CryptonightR_instruction31
-PUBLIC CryptonightR_instruction32
-PUBLIC CryptonightR_instruction33
-PUBLIC CryptonightR_instruction34
-PUBLIC CryptonightR_instruction35
-PUBLIC CryptonightR_instruction36
-PUBLIC CryptonightR_instruction37
-PUBLIC CryptonightR_instruction38
-PUBLIC CryptonightR_instruction39
-PUBLIC CryptonightR_instruction40
-PUBLIC CryptonightR_instruction41
-PUBLIC CryptonightR_instruction42
-PUBLIC CryptonightR_instruction43
-PUBLIC CryptonightR_instruction44
-PUBLIC CryptonightR_instruction45
-PUBLIC CryptonightR_instruction46
-PUBLIC CryptonightR_instruction47
-PUBLIC CryptonightR_instruction48
-PUBLIC CryptonightR_instruction49
-PUBLIC CryptonightR_instruction50
-PUBLIC CryptonightR_instruction51
-PUBLIC CryptonightR_instruction52
-PUBLIC CryptonightR_instruction53
-PUBLIC CryptonightR_instruction54
-PUBLIC CryptonightR_instruction55
-PUBLIC CryptonightR_instruction56
-PUBLIC CryptonightR_instruction57
-PUBLIC CryptonightR_instruction58
-PUBLIC CryptonightR_instruction59
-PUBLIC CryptonightR_instruction60
-PUBLIC CryptonightR_instruction61
-PUBLIC CryptonightR_instruction62
-PUBLIC CryptonightR_instruction63
-PUBLIC CryptonightR_instruction64
-PUBLIC CryptonightR_instruction65
-PUBLIC CryptonightR_instruction66
-PUBLIC CryptonightR_instruction67
-PUBLIC CryptonightR_instruction68
-PUBLIC CryptonightR_instruction69
-PUBLIC CryptonightR_instruction70
-PUBLIC CryptonightR_instruction71
-PUBLIC CryptonightR_instruction72
-PUBLIC CryptonightR_instruction73
-PUBLIC CryptonightR_instruction74
-PUBLIC CryptonightR_instruction75
-PUBLIC CryptonightR_instruction76
-PUBLIC CryptonightR_instruction77
-PUBLIC CryptonightR_instruction78
-PUBLIC CryptonightR_instruction79
-PUBLIC CryptonightR_instruction80
-PUBLIC CryptonightR_instruction81
-PUBLIC CryptonightR_instruction82
-PUBLIC CryptonightR_instruction83
-PUBLIC CryptonightR_instruction84
-PUBLIC CryptonightR_instruction85
-PUBLIC CryptonightR_instruction86
-PUBLIC CryptonightR_instruction87
-PUBLIC CryptonightR_instruction88
-PUBLIC CryptonightR_instruction89
-PUBLIC CryptonightR_instruction90
-PUBLIC CryptonightR_instruction91
-PUBLIC CryptonightR_instruction92
-PUBLIC CryptonightR_instruction93
-PUBLIC CryptonightR_instruction94
-PUBLIC CryptonightR_instruction95
-PUBLIC CryptonightR_instruction96
-PUBLIC CryptonightR_instruction97
-PUBLIC CryptonightR_instruction98
-PUBLIC CryptonightR_instruction99
-PUBLIC CryptonightR_instruction100
-PUBLIC CryptonightR_instruction101
-PUBLIC CryptonightR_instruction102
-PUBLIC CryptonightR_instruction103
-PUBLIC CryptonightR_instruction104
-PUBLIC CryptonightR_instruction105
-PUBLIC CryptonightR_instruction106
-PUBLIC CryptonightR_instruction107
-PUBLIC CryptonightR_instruction108
-PUBLIC CryptonightR_instruction109
-PUBLIC CryptonightR_instruction110
-PUBLIC CryptonightR_instruction111
-PUBLIC CryptonightR_instruction112
-PUBLIC CryptonightR_instruction113
-PUBLIC CryptonightR_instruction114
-PUBLIC CryptonightR_instruction115
-PUBLIC CryptonightR_instruction116
-PUBLIC CryptonightR_instruction117
-PUBLIC CryptonightR_instruction118
-PUBLIC CryptonightR_instruction119
-PUBLIC CryptonightR_instruction120
-PUBLIC CryptonightR_instruction121
-PUBLIC CryptonightR_instruction122
-PUBLIC CryptonightR_instruction123
-PUBLIC CryptonightR_instruction124
-PUBLIC CryptonightR_instruction125
-PUBLIC CryptonightR_instruction126
-PUBLIC CryptonightR_instruction127
-PUBLIC CryptonightR_instruction128
-PUBLIC CryptonightR_instruction129
-PUBLIC CryptonightR_instruction130
-PUBLIC CryptonightR_instruction131
-PUBLIC CryptonightR_instruction132
-PUBLIC CryptonightR_instruction133
-PUBLIC CryptonightR_instruction134
-PUBLIC CryptonightR_instruction135
-PUBLIC CryptonightR_instruction136
-PUBLIC CryptonightR_instruction137
-PUBLIC CryptonightR_instruction138
-PUBLIC CryptonightR_instruction139
-PUBLIC CryptonightR_instruction140
-PUBLIC CryptonightR_instruction141
-PUBLIC CryptonightR_instruction142
-PUBLIC CryptonightR_instruction143
-PUBLIC CryptonightR_instruction144
-PUBLIC CryptonightR_instruction145
-PUBLIC CryptonightR_instruction146
-PUBLIC CryptonightR_instruction147
-PUBLIC CryptonightR_instruction148
-PUBLIC CryptonightR_instruction149
-PUBLIC CryptonightR_instruction150
-PUBLIC CryptonightR_instruction151
-PUBLIC CryptonightR_instruction152
-PUBLIC CryptonightR_instruction153
-PUBLIC CryptonightR_instruction154
-PUBLIC CryptonightR_instruction155
-PUBLIC CryptonightR_instruction156
-PUBLIC CryptonightR_instruction157
-PUBLIC CryptonightR_instruction158
-PUBLIC CryptonightR_instruction159
-PUBLIC CryptonightR_instruction160
-PUBLIC CryptonightR_instruction161
-PUBLIC CryptonightR_instruction162
-PUBLIC CryptonightR_instruction163
-PUBLIC CryptonightR_instruction164
-PUBLIC CryptonightR_instruction165
-PUBLIC CryptonightR_instruction166
-PUBLIC CryptonightR_instruction167
-PUBLIC CryptonightR_instruction168
-PUBLIC CryptonightR_instruction169
-PUBLIC CryptonightR_instruction170
-PUBLIC CryptonightR_instruction171
-PUBLIC CryptonightR_instruction172
-PUBLIC CryptonightR_instruction173
-PUBLIC CryptonightR_instruction174
-PUBLIC CryptonightR_instruction175
-PUBLIC CryptonightR_instruction176
-PUBLIC CryptonightR_instruction177
-PUBLIC CryptonightR_instruction178
-PUBLIC CryptonightR_instruction179
-PUBLIC CryptonightR_instruction180
-PUBLIC CryptonightR_instruction181
-PUBLIC CryptonightR_instruction182
-PUBLIC CryptonightR_instruction183
-PUBLIC CryptonightR_instruction184
-PUBLIC CryptonightR_instruction185
-PUBLIC CryptonightR_instruction186
-PUBLIC CryptonightR_instruction187
-PUBLIC CryptonightR_instruction188
-PUBLIC CryptonightR_instruction189
-PUBLIC CryptonightR_instruction190
-PUBLIC CryptonightR_instruction191
-PUBLIC CryptonightR_instruction192
-PUBLIC CryptonightR_instruction193
-PUBLIC CryptonightR_instruction194
-PUBLIC CryptonightR_instruction195
-PUBLIC CryptonightR_instruction196
-PUBLIC CryptonightR_instruction197
-PUBLIC CryptonightR_instruction198
-PUBLIC CryptonightR_instruction199
-PUBLIC CryptonightR_instruction200
-PUBLIC CryptonightR_instruction201
-PUBLIC CryptonightR_instruction202
-PUBLIC CryptonightR_instruction203
-PUBLIC CryptonightR_instruction204
-PUBLIC CryptonightR_instruction205
-PUBLIC CryptonightR_instruction206
-PUBLIC CryptonightR_instruction207
-PUBLIC CryptonightR_instruction208
-PUBLIC CryptonightR_instruction209
-PUBLIC CryptonightR_instruction210
-PUBLIC CryptonightR_instruction211
-PUBLIC CryptonightR_instruction212
-PUBLIC CryptonightR_instruction213
-PUBLIC CryptonightR_instruction214
-PUBLIC CryptonightR_instruction215
-PUBLIC CryptonightR_instruction216
-PUBLIC CryptonightR_instruction217
-PUBLIC CryptonightR_instruction218
-PUBLIC CryptonightR_instruction219
-PUBLIC CryptonightR_instruction220
-PUBLIC CryptonightR_instruction221
-PUBLIC CryptonightR_instruction222
-PUBLIC CryptonightR_instruction223
-PUBLIC CryptonightR_instruction224
-PUBLIC CryptonightR_instruction225
-PUBLIC CryptonightR_instruction226
-PUBLIC CryptonightR_instruction227
-PUBLIC CryptonightR_instruction228
-PUBLIC CryptonightR_instruction229
-PUBLIC CryptonightR_instruction230
-PUBLIC CryptonightR_instruction231
-PUBLIC CryptonightR_instruction232
-PUBLIC CryptonightR_instruction233
-PUBLIC CryptonightR_instruction234
-PUBLIC CryptonightR_instruction235
-PUBLIC CryptonightR_instruction236
-PUBLIC CryptonightR_instruction237
-PUBLIC CryptonightR_instruction238
-PUBLIC CryptonightR_instruction239
-PUBLIC CryptonightR_instruction240
-PUBLIC CryptonightR_instruction241
-PUBLIC CryptonightR_instruction242
-PUBLIC CryptonightR_instruction243
-PUBLIC CryptonightR_instruction244
-PUBLIC CryptonightR_instruction245
-PUBLIC CryptonightR_instruction246
-PUBLIC CryptonightR_instruction247
-PUBLIC CryptonightR_instruction248
-PUBLIC CryptonightR_instruction249
-PUBLIC CryptonightR_instruction250
-PUBLIC CryptonightR_instruction251
-PUBLIC CryptonightR_instruction252
-PUBLIC CryptonightR_instruction253
-PUBLIC CryptonightR_instruction254
-PUBLIC CryptonightR_instruction255
-PUBLIC CryptonightR_instruction256
-PUBLIC CryptonightR_instruction_mov0
-PUBLIC CryptonightR_instruction_mov1
-PUBLIC CryptonightR_instruction_mov2
-PUBLIC CryptonightR_instruction_mov3
-PUBLIC CryptonightR_instruction_mov4
-PUBLIC CryptonightR_instruction_mov5
-PUBLIC CryptonightR_instruction_mov6
-PUBLIC CryptonightR_instruction_mov7
-PUBLIC CryptonightR_instruction_mov8
-PUBLIC CryptonightR_instruction_mov9
-PUBLIC CryptonightR_instruction_mov10
-PUBLIC CryptonightR_instruction_mov11
-PUBLIC CryptonightR_instruction_mov12
-PUBLIC CryptonightR_instruction_mov13
-PUBLIC CryptonightR_instruction_mov14
-PUBLIC CryptonightR_instruction_mov15
-PUBLIC CryptonightR_instruction_mov16
-PUBLIC CryptonightR_instruction_mov17
-PUBLIC CryptonightR_instruction_mov18
-PUBLIC CryptonightR_instruction_mov19
-PUBLIC CryptonightR_instruction_mov20
-PUBLIC CryptonightR_instruction_mov21
-PUBLIC CryptonightR_instruction_mov22
-PUBLIC CryptonightR_instruction_mov23
-PUBLIC CryptonightR_instruction_mov24
-PUBLIC CryptonightR_instruction_mov25
-PUBLIC CryptonightR_instruction_mov26
-PUBLIC CryptonightR_instruction_mov27
-PUBLIC CryptonightR_instruction_mov28
-PUBLIC CryptonightR_instruction_mov29
-PUBLIC CryptonightR_instruction_mov30
-PUBLIC CryptonightR_instruction_mov31
-PUBLIC CryptonightR_instruction_mov32
-PUBLIC CryptonightR_instruction_mov33
-PUBLIC CryptonightR_instruction_mov34
-PUBLIC CryptonightR_instruction_mov35
-PUBLIC CryptonightR_instruction_mov36
-PUBLIC CryptonightR_instruction_mov37
-PUBLIC CryptonightR_instruction_mov38
-PUBLIC CryptonightR_instruction_mov39
-PUBLIC CryptonightR_instruction_mov40
-PUBLIC CryptonightR_instruction_mov41
-PUBLIC CryptonightR_instruction_mov42
-PUBLIC CryptonightR_instruction_mov43
-PUBLIC CryptonightR_instruction_mov44
-PUBLIC CryptonightR_instruction_mov45
-PUBLIC CryptonightR_instruction_mov46
-PUBLIC CryptonightR_instruction_mov47
-PUBLIC CryptonightR_instruction_mov48
-PUBLIC CryptonightR_instruction_mov49
-PUBLIC CryptonightR_instruction_mov50
-PUBLIC CryptonightR_instruction_mov51
-PUBLIC CryptonightR_instruction_mov52
-PUBLIC CryptonightR_instruction_mov53
-PUBLIC CryptonightR_instruction_mov54
-PUBLIC CryptonightR_instruction_mov55
-PUBLIC CryptonightR_instruction_mov56
-PUBLIC CryptonightR_instruction_mov57
-PUBLIC CryptonightR_instruction_mov58
-PUBLIC CryptonightR_instruction_mov59
-PUBLIC CryptonightR_instruction_mov60
-PUBLIC CryptonightR_instruction_mov61
-PUBLIC CryptonightR_instruction_mov62
-PUBLIC CryptonightR_instruction_mov63
-PUBLIC CryptonightR_instruction_mov64
-PUBLIC CryptonightR_instruction_mov65
-PUBLIC CryptonightR_instruction_mov66
-PUBLIC CryptonightR_instruction_mov67
-PUBLIC CryptonightR_instruction_mov68
-PUBLIC CryptonightR_instruction_mov69
-PUBLIC CryptonightR_instruction_mov70
-PUBLIC CryptonightR_instruction_mov71
-PUBLIC CryptonightR_instruction_mov72
-PUBLIC CryptonightR_instruction_mov73
-PUBLIC CryptonightR_instruction_mov74
-PUBLIC CryptonightR_instruction_mov75
-PUBLIC CryptonightR_instruction_mov76
-PUBLIC CryptonightR_instruction_mov77
-PUBLIC CryptonightR_instruction_mov78
-PUBLIC CryptonightR_instruction_mov79
-PUBLIC CryptonightR_instruction_mov80
-PUBLIC CryptonightR_instruction_mov81
-PUBLIC CryptonightR_instruction_mov82
-PUBLIC CryptonightR_instruction_mov83
-PUBLIC CryptonightR_instruction_mov84
-PUBLIC CryptonightR_instruction_mov85
-PUBLIC CryptonightR_instruction_mov86
-PUBLIC CryptonightR_instruction_mov87
-PUBLIC CryptonightR_instruction_mov88
-PUBLIC CryptonightR_instruction_mov89
-PUBLIC CryptonightR_instruction_mov90
-PUBLIC CryptonightR_instruction_mov91
-PUBLIC CryptonightR_instruction_mov92
-PUBLIC CryptonightR_instruction_mov93
-PUBLIC CryptonightR_instruction_mov94
-PUBLIC CryptonightR_instruction_mov95
-PUBLIC CryptonightR_instruction_mov96
-PUBLIC CryptonightR_instruction_mov97
-PUBLIC CryptonightR_instruction_mov98
-PUBLIC CryptonightR_instruction_mov99
-PUBLIC CryptonightR_instruction_mov100
-PUBLIC CryptonightR_instruction_mov101
-PUBLIC CryptonightR_instruction_mov102
-PUBLIC CryptonightR_instruction_mov103
-PUBLIC CryptonightR_instruction_mov104
-PUBLIC CryptonightR_instruction_mov105
-PUBLIC CryptonightR_instruction_mov106
-PUBLIC CryptonightR_instruction_mov107
-PUBLIC CryptonightR_instruction_mov108
-PUBLIC CryptonightR_instruction_mov109
-PUBLIC CryptonightR_instruction_mov110
-PUBLIC CryptonightR_instruction_mov111
-PUBLIC CryptonightR_instruction_mov112
-PUBLIC CryptonightR_instruction_mov113
-PUBLIC CryptonightR_instruction_mov114
-PUBLIC CryptonightR_instruction_mov115
-PUBLIC CryptonightR_instruction_mov116
-PUBLIC CryptonightR_instruction_mov117
-PUBLIC CryptonightR_instruction_mov118
-PUBLIC CryptonightR_instruction_mov119
-PUBLIC CryptonightR_instruction_mov120
-PUBLIC CryptonightR_instruction_mov121
-PUBLIC CryptonightR_instruction_mov122
-PUBLIC CryptonightR_instruction_mov123
-PUBLIC CryptonightR_instruction_mov124
-PUBLIC CryptonightR_instruction_mov125
-PUBLIC CryptonightR_instruction_mov126
-PUBLIC CryptonightR_instruction_mov127
-PUBLIC CryptonightR_instruction_mov128
-PUBLIC CryptonightR_instruction_mov129
-PUBLIC CryptonightR_instruction_mov130
-PUBLIC CryptonightR_instruction_mov131
-PUBLIC CryptonightR_instruction_mov132
-PUBLIC CryptonightR_instruction_mov133
-PUBLIC CryptonightR_instruction_mov134
-PUBLIC CryptonightR_instruction_mov135
-PUBLIC CryptonightR_instruction_mov136
-PUBLIC CryptonightR_instruction_mov137
-PUBLIC CryptonightR_instruction_mov138
-PUBLIC CryptonightR_instruction_mov139
-PUBLIC CryptonightR_instruction_mov140
-PUBLIC CryptonightR_instruction_mov141
-PUBLIC CryptonightR_instruction_mov142
-PUBLIC CryptonightR_instruction_mov143
-PUBLIC CryptonightR_instruction_mov144
-PUBLIC CryptonightR_instruction_mov145
-PUBLIC CryptonightR_instruction_mov146
-PUBLIC CryptonightR_instruction_mov147
-PUBLIC CryptonightR_instruction_mov148
-PUBLIC CryptonightR_instruction_mov149
-PUBLIC CryptonightR_instruction_mov150
-PUBLIC CryptonightR_instruction_mov151
-PUBLIC CryptonightR_instruction_mov152
-PUBLIC CryptonightR_instruction_mov153
-PUBLIC CryptonightR_instruction_mov154
-PUBLIC CryptonightR_instruction_mov155
-PUBLIC CryptonightR_instruction_mov156
-PUBLIC CryptonightR_instruction_mov157
-PUBLIC CryptonightR_instruction_mov158
-PUBLIC CryptonightR_instruction_mov159
-PUBLIC CryptonightR_instruction_mov160
-PUBLIC CryptonightR_instruction_mov161
-PUBLIC CryptonightR_instruction_mov162
-PUBLIC CryptonightR_instruction_mov163
-PUBLIC CryptonightR_instruction_mov164
-PUBLIC CryptonightR_instruction_mov165
-PUBLIC CryptonightR_instruction_mov166
-PUBLIC CryptonightR_instruction_mov167
-PUBLIC CryptonightR_instruction_mov168
-PUBLIC CryptonightR_instruction_mov169
-PUBLIC CryptonightR_instruction_mov170
-PUBLIC CryptonightR_instruction_mov171
-PUBLIC CryptonightR_instruction_mov172
-PUBLIC CryptonightR_instruction_mov173
-PUBLIC CryptonightR_instruction_mov174
-PUBLIC CryptonightR_instruction_mov175
-PUBLIC CryptonightR_instruction_mov176
-PUBLIC CryptonightR_instruction_mov177
-PUBLIC CryptonightR_instruction_mov178
-PUBLIC CryptonightR_instruction_mov179
-PUBLIC CryptonightR_instruction_mov180
-PUBLIC CryptonightR_instruction_mov181
-PUBLIC CryptonightR_instruction_mov182
-PUBLIC CryptonightR_instruction_mov183
-PUBLIC CryptonightR_instruction_mov184
-PUBLIC CryptonightR_instruction_mov185
-PUBLIC CryptonightR_instruction_mov186
-PUBLIC CryptonightR_instruction_mov187
-PUBLIC CryptonightR_instruction_mov188
-PUBLIC CryptonightR_instruction_mov189
-PUBLIC CryptonightR_instruction_mov190
-PUBLIC CryptonightR_instruction_mov191
-PUBLIC CryptonightR_instruction_mov192
-PUBLIC CryptonightR_instruction_mov193
-PUBLIC CryptonightR_instruction_mov194
-PUBLIC CryptonightR_instruction_mov195
-PUBLIC CryptonightR_instruction_mov196
-PUBLIC CryptonightR_instruction_mov197
-PUBLIC CryptonightR_instruction_mov198
-PUBLIC CryptonightR_instruction_mov199
-PUBLIC CryptonightR_instruction_mov200
-PUBLIC CryptonightR_instruction_mov201
-PUBLIC CryptonightR_instruction_mov202
-PUBLIC CryptonightR_instruction_mov203
-PUBLIC CryptonightR_instruction_mov204
-PUBLIC CryptonightR_instruction_mov205
-PUBLIC CryptonightR_instruction_mov206
-PUBLIC CryptonightR_instruction_mov207
-PUBLIC CryptonightR_instruction_mov208
-PUBLIC CryptonightR_instruction_mov209
-PUBLIC CryptonightR_instruction_mov210
-PUBLIC CryptonightR_instruction_mov211
-PUBLIC CryptonightR_instruction_mov212
-PUBLIC CryptonightR_instruction_mov213
-PUBLIC CryptonightR_instruction_mov214
-PUBLIC CryptonightR_instruction_mov215
-PUBLIC CryptonightR_instruction_mov216
-PUBLIC CryptonightR_instruction_mov217
-PUBLIC CryptonightR_instruction_mov218
-PUBLIC CryptonightR_instruction_mov219
-PUBLIC CryptonightR_instruction_mov220
-PUBLIC CryptonightR_instruction_mov221
-PUBLIC CryptonightR_instruction_mov222
-PUBLIC CryptonightR_instruction_mov223
-PUBLIC CryptonightR_instruction_mov224
-PUBLIC CryptonightR_instruction_mov225
-PUBLIC CryptonightR_instruction_mov226
-PUBLIC CryptonightR_instruction_mov227
-PUBLIC CryptonightR_instruction_mov228
-PUBLIC CryptonightR_instruction_mov229
-PUBLIC CryptonightR_instruction_mov230
-PUBLIC CryptonightR_instruction_mov231
-PUBLIC CryptonightR_instruction_mov232
-PUBLIC CryptonightR_instruction_mov233
-PUBLIC CryptonightR_instruction_mov234
-PUBLIC CryptonightR_instruction_mov235
-PUBLIC CryptonightR_instruction_mov236
-PUBLIC CryptonightR_instruction_mov237
-PUBLIC CryptonightR_instruction_mov238
-PUBLIC CryptonightR_instruction_mov239
-PUBLIC CryptonightR_instruction_mov240
-PUBLIC CryptonightR_instruction_mov241
-PUBLIC CryptonightR_instruction_mov242
-PUBLIC CryptonightR_instruction_mov243
-PUBLIC CryptonightR_instruction_mov244
-PUBLIC CryptonightR_instruction_mov245
-PUBLIC CryptonightR_instruction_mov246
-PUBLIC CryptonightR_instruction_mov247
-PUBLIC CryptonightR_instruction_mov248
-PUBLIC CryptonightR_instruction_mov249
-PUBLIC CryptonightR_instruction_mov250
-PUBLIC CryptonightR_instruction_mov251
-PUBLIC CryptonightR_instruction_mov252
-PUBLIC CryptonightR_instruction_mov253
-PUBLIC CryptonightR_instruction_mov254
-PUBLIC CryptonightR_instruction_mov255
-PUBLIC CryptonightR_instruction_mov256
-
-INCLUDE CryptonightWOW_template_win.inc
-INCLUDE CryptonightR_template_win.inc
-INCLUDE CryptonightWOW_soft_aes_template_win.inc
-INCLUDE CryptonightR_soft_aes_template_win.inc
-
-CryptonightR_instruction0:
-	imul	rbx, rbx
-CryptonightR_instruction1:
-	imul	rbx, rbx
-CryptonightR_instruction2:
-	imul	rbx, rbx
-CryptonightR_instruction3:
-	add	rbx, r9
-	add	rbx, 2147483647
-CryptonightR_instruction4:
-	sub	rbx, r9
-CryptonightR_instruction5:
-	ror	ebx, cl
-CryptonightR_instruction6:
-	rol	ebx, cl
-CryptonightR_instruction7:
-	xor	rbx, r9
-CryptonightR_instruction8:
-	imul	rsi, rbx
-CryptonightR_instruction9:
-	imul	rsi, rbx
-CryptonightR_instruction10:
-	imul	rsi, rbx
-CryptonightR_instruction11:
-	add	rsi, rbx
-	add	rsi, 2147483647
-CryptonightR_instruction12:
-	sub	rsi, rbx
-CryptonightR_instruction13:
-	ror	esi, cl
-CryptonightR_instruction14:
-	rol	esi, cl
-CryptonightR_instruction15:
-	xor	rsi, rbx
-CryptonightR_instruction16:
-	imul	rdi, rbx
-CryptonightR_instruction17:
-	imul	rdi, rbx
-CryptonightR_instruction18:
-	imul	rdi, rbx
-CryptonightR_instruction19:
-	add	rdi, rbx
-	add	rdi, 2147483647
-CryptonightR_instruction20:
-	sub	rdi, rbx
-CryptonightR_instruction21:
-	ror	edi, cl
-CryptonightR_instruction22:
-	rol	edi, cl
-CryptonightR_instruction23:
-	xor	rdi, rbx
-CryptonightR_instruction24:
-	imul	rbp, rbx
-CryptonightR_instruction25:
-	imul	rbp, rbx
-CryptonightR_instruction26:
-	imul	rbp, rbx
-CryptonightR_instruction27:
-	add	rbp, rbx
-	add	rbp, 2147483647
-CryptonightR_instruction28:
-	sub	rbp, rbx
-CryptonightR_instruction29:
-	ror	ebp, cl
-CryptonightR_instruction30:
-	rol	ebp, cl
-CryptonightR_instruction31:
-	xor	rbp, rbx
-CryptonightR_instruction32:
-	imul	rbx, rsi
-CryptonightR_instruction33:
-	imul	rbx, rsi
-CryptonightR_instruction34:
-	imul	rbx, rsi
-CryptonightR_instruction35:
-	add	rbx, rsi
-	add	rbx, 2147483647
-CryptonightR_instruction36:
-	sub	rbx, rsi
-CryptonightR_instruction37:
-	ror	ebx, cl
-CryptonightR_instruction38:
-	rol	ebx, cl
-CryptonightR_instruction39:
-	xor	rbx, rsi
-CryptonightR_instruction40:
-	imul	rsi, rsi
-CryptonightR_instruction41:
-	imul	rsi, rsi
-CryptonightR_instruction42:
-	imul	rsi, rsi
-CryptonightR_instruction43:
-	add	rsi, r9
-	add	rsi, 2147483647
-CryptonightR_instruction44:
-	sub	rsi, r9
-CryptonightR_instruction45:
-	ror	esi, cl
-CryptonightR_instruction46:
-	rol	esi, cl
-CryptonightR_instruction47:
-	xor	rsi, r9
-CryptonightR_instruction48:
-	imul	rdi, rsi
-CryptonightR_instruction49:
-	imul	rdi, rsi
-CryptonightR_instruction50:
-	imul	rdi, rsi
-CryptonightR_instruction51:
-	add	rdi, rsi
-	add	rdi, 2147483647
-CryptonightR_instruction52:
-	sub	rdi, rsi
-CryptonightR_instruction53:
-	ror	edi, cl
-CryptonightR_instruction54:
-	rol	edi, cl
-CryptonightR_instruction55:
-	xor	rdi, rsi
-CryptonightR_instruction56:
-	imul	rbp, rsi
-CryptonightR_instruction57:
-	imul	rbp, rsi
-CryptonightR_instruction58:
-	imul	rbp, rsi
-CryptonightR_instruction59:
-	add	rbp, rsi
-	add	rbp, 2147483647
-CryptonightR_instruction60:
-	sub	rbp, rsi
-CryptonightR_instruction61:
-	ror	ebp, cl
-CryptonightR_instruction62:
-	rol	ebp, cl
-CryptonightR_instruction63:
-	xor	rbp, rsi
-CryptonightR_instruction64:
-	imul	rbx, rdi
-CryptonightR_instruction65:
-	imul	rbx, rdi
-CryptonightR_instruction66:
-	imul	rbx, rdi
-CryptonightR_instruction67:
-	add	rbx, rdi
-	add	rbx, 2147483647
-CryptonightR_instruction68:
-	sub	rbx, rdi
-CryptonightR_instruction69:
-	ror	ebx, cl
-CryptonightR_instruction70:
-	rol	ebx, cl
-CryptonightR_instruction71:
-	xor	rbx, rdi
-CryptonightR_instruction72:
-	imul	rsi, rdi
-CryptonightR_instruction73:
-	imul	rsi, rdi
-CryptonightR_instruction74:
-	imul	rsi, rdi
-CryptonightR_instruction75:
-	add	rsi, rdi
-	add	rsi, 2147483647
-CryptonightR_instruction76:
-	sub	rsi, rdi
-CryptonightR_instruction77:
-	ror	esi, cl
-CryptonightR_instruction78:
-	rol	esi, cl
-CryptonightR_instruction79:
-	xor	rsi, rdi
-CryptonightR_instruction80:
-	imul	rdi, rdi
-CryptonightR_instruction81:
-	imul	rdi, rdi
-CryptonightR_instruction82:
-	imul	rdi, rdi
-CryptonightR_instruction83:
-	add	rdi, r9
-	add	rdi, 2147483647
-CryptonightR_instruction84:
-	sub	rdi, r9
-CryptonightR_instruction85:
-	ror	edi, cl
-CryptonightR_instruction86:
-	rol	edi, cl
-CryptonightR_instruction87:
-	xor	rdi, r9
-CryptonightR_instruction88:
-	imul	rbp, rdi
-CryptonightR_instruction89:
-	imul	rbp, rdi
-CryptonightR_instruction90:
-	imul	rbp, rdi
-CryptonightR_instruction91:
-	add	rbp, rdi
-	add	rbp, 2147483647
-CryptonightR_instruction92:
-	sub	rbp, rdi
-CryptonightR_instruction93:
-	ror	ebp, cl
-CryptonightR_instruction94:
-	rol	ebp, cl
-CryptonightR_instruction95:
-	xor	rbp, rdi
-CryptonightR_instruction96:
-	imul	rbx, rbp
-CryptonightR_instruction97:
-	imul	rbx, rbp
-CryptonightR_instruction98:
-	imul	rbx, rbp
-CryptonightR_instruction99:
-	add	rbx, rbp
-	add	rbx, 2147483647
-CryptonightR_instruction100:
-	sub	rbx, rbp
-CryptonightR_instruction101:
-	ror	ebx, cl
-CryptonightR_instruction102:
-	rol	ebx, cl
-CryptonightR_instruction103:
-	xor	rbx, rbp
-CryptonightR_instruction104:
-	imul	rsi, rbp
-CryptonightR_instruction105:
-	imul	rsi, rbp
-CryptonightR_instruction106:
-	imul	rsi, rbp
-CryptonightR_instruction107:
-	add	rsi, rbp
-	add	rsi, 2147483647
-CryptonightR_instruction108:
-	sub	rsi, rbp
-CryptonightR_instruction109:
-	ror	esi, cl
-CryptonightR_instruction110:
-	rol	esi, cl
-CryptonightR_instruction111:
-	xor	rsi, rbp
-CryptonightR_instruction112:
-	imul	rdi, rbp
-CryptonightR_instruction113:
-	imul	rdi, rbp
-CryptonightR_instruction114:
-	imul	rdi, rbp
-CryptonightR_instruction115:
-	add	rdi, rbp
-	add	rdi, 2147483647
-CryptonightR_instruction116:
-	sub	rdi, rbp
-CryptonightR_instruction117:
-	ror	edi, cl
-CryptonightR_instruction118:
-	rol	edi, cl
-CryptonightR_instruction119:
-	xor	rdi, rbp
-CryptonightR_instruction120:
-	imul	rbp, rbp
-CryptonightR_instruction121:
-	imul	rbp, rbp
-CryptonightR_instruction122:
-	imul	rbp, rbp
-CryptonightR_instruction123:
-	add	rbp, r9
-	add	rbp, 2147483647
-CryptonightR_instruction124:
-	sub	rbp, r9
-CryptonightR_instruction125:
-	ror	ebp, cl
-CryptonightR_instruction126:
-	rol	ebp, cl
-CryptonightR_instruction127:
-	xor	rbp, r9
-CryptonightR_instruction128:
-	imul	rbx, rsp
-CryptonightR_instruction129:
-	imul	rbx, rsp
-CryptonightR_instruction130:
-	imul	rbx, rsp
-CryptonightR_instruction131:
-	add	rbx, rsp
-	add	rbx, 2147483647
-CryptonightR_instruction132:
-	sub	rbx, rsp
-CryptonightR_instruction133:
-	ror	ebx, cl
-CryptonightR_instruction134:
-	rol	ebx, cl
-CryptonightR_instruction135:
-	xor	rbx, rsp
-CryptonightR_instruction136:
-	imul	rsi, rsp
-CryptonightR_instruction137:
-	imul	rsi, rsp
-CryptonightR_instruction138:
-	imul	rsi, rsp
-CryptonightR_instruction139:
-	add	rsi, rsp
-	add	rsi, 2147483647
-CryptonightR_instruction140:
-	sub	rsi, rsp
-CryptonightR_instruction141:
-	ror	esi, cl
-CryptonightR_instruction142:
-	rol	esi, cl
-CryptonightR_instruction143:
-	xor	rsi, rsp
-CryptonightR_instruction144:
-	imul	rdi, rsp
-CryptonightR_instruction145:
-	imul	rdi, rsp
-CryptonightR_instruction146:
-	imul	rdi, rsp
-CryptonightR_instruction147:
-	add	rdi, rsp
-	add	rdi, 2147483647
-CryptonightR_instruction148:
-	sub	rdi, rsp
-CryptonightR_instruction149:
-	ror	edi, cl
-CryptonightR_instruction150:
-	rol	edi, cl
-CryptonightR_instruction151:
-	xor	rdi, rsp
-CryptonightR_instruction152:
-	imul	rbp, rsp
-CryptonightR_instruction153:
-	imul	rbp, rsp
-CryptonightR_instruction154:
-	imul	rbp, rsp
-CryptonightR_instruction155:
-	add	rbp, rsp
-	add	rbp, 2147483647
-CryptonightR_instruction156:
-	sub	rbp, rsp
-CryptonightR_instruction157:
-	ror	ebp, cl
-CryptonightR_instruction158:
-	rol	ebp, cl
-CryptonightR_instruction159:
-	xor	rbp, rsp
-CryptonightR_instruction160:
-	imul	rbx, r15
-CryptonightR_instruction161:
-	imul	rbx, r15
-CryptonightR_instruction162:
-	imul	rbx, r15
-CryptonightR_instruction163:
-	add	rbx, r15
-	add	rbx, 2147483647
-CryptonightR_instruction164:
-	sub	rbx, r15
-CryptonightR_instruction165:
-	ror	ebx, cl
-CryptonightR_instruction166:
-	rol	ebx, cl
-CryptonightR_instruction167:
-	xor	rbx, r15
-CryptonightR_instruction168:
-	imul	rsi, r15
-CryptonightR_instruction169:
-	imul	rsi, r15
-CryptonightR_instruction170:
-	imul	rsi, r15
-CryptonightR_instruction171:
-	add	rsi, r15
-	add	rsi, 2147483647
-CryptonightR_instruction172:
-	sub	rsi, r15
-CryptonightR_instruction173:
-	ror	esi, cl
-CryptonightR_instruction174:
-	rol	esi, cl
-CryptonightR_instruction175:
-	xor	rsi, r15
-CryptonightR_instruction176:
-	imul	rdi, r15
-CryptonightR_instruction177:
-	imul	rdi, r15
-CryptonightR_instruction178:
-	imul	rdi, r15
-CryptonightR_instruction179:
-	add	rdi, r15
-	add	rdi, 2147483647
-CryptonightR_instruction180:
-	sub	rdi, r15
-CryptonightR_instruction181:
-	ror	edi, cl
-CryptonightR_instruction182:
-	rol	edi, cl
-CryptonightR_instruction183:
-	xor	rdi, r15
-CryptonightR_instruction184:
-	imul	rbp, r15
-CryptonightR_instruction185:
-	imul	rbp, r15
-CryptonightR_instruction186:
-	imul	rbp, r15
-CryptonightR_instruction187:
-	add	rbp, r15
-	add	rbp, 2147483647
-CryptonightR_instruction188:
-	sub	rbp, r15
-CryptonightR_instruction189:
-	ror	ebp, cl
-CryptonightR_instruction190:
-	rol	ebp, cl
-CryptonightR_instruction191:
-	xor	rbp, r15
-CryptonightR_instruction192:
-	imul	rbx, rax
-CryptonightR_instruction193:
-	imul	rbx, rax
-CryptonightR_instruction194:
-	imul	rbx, rax
-CryptonightR_instruction195:
-	add	rbx, rax
-	add	rbx, 2147483647
-CryptonightR_instruction196:
-	sub	rbx, rax
-CryptonightR_instruction197:
-	ror	ebx, cl
-CryptonightR_instruction198:
-	rol	ebx, cl
-CryptonightR_instruction199:
-	xor	rbx, rax
-CryptonightR_instruction200:
-	imul	rsi, rax
-CryptonightR_instruction201:
-	imul	rsi, rax
-CryptonightR_instruction202:
-	imul	rsi, rax
-CryptonightR_instruction203:
-	add	rsi, rax
-	add	rsi, 2147483647
-CryptonightR_instruction204:
-	sub	rsi, rax
-CryptonightR_instruction205:
-	ror	esi, cl
-CryptonightR_instruction206:
-	rol	esi, cl
-CryptonightR_instruction207:
-	xor	rsi, rax
-CryptonightR_instruction208:
-	imul	rdi, rax
-CryptonightR_instruction209:
-	imul	rdi, rax
-CryptonightR_instruction210:
-	imul	rdi, rax
-CryptonightR_instruction211:
-	add	rdi, rax
-	add	rdi, 2147483647
-CryptonightR_instruction212:
-	sub	rdi, rax
-CryptonightR_instruction213:
-	ror	edi, cl
-CryptonightR_instruction214:
-	rol	edi, cl
-CryptonightR_instruction215:
-	xor	rdi, rax
-CryptonightR_instruction216:
-	imul	rbp, rax
-CryptonightR_instruction217:
-	imul	rbp, rax
-CryptonightR_instruction218:
-	imul	rbp, rax
-CryptonightR_instruction219:
-	add	rbp, rax
-	add	rbp, 2147483647
-CryptonightR_instruction220:
-	sub	rbp, rax
-CryptonightR_instruction221:
-	ror	ebp, cl
-CryptonightR_instruction222:
-	rol	ebp, cl
-CryptonightR_instruction223:
-	xor	rbp, rax
-CryptonightR_instruction224:
-	imul	rbx, rdx
-CryptonightR_instruction225:
-	imul	rbx, rdx
-CryptonightR_instruction226:
-	imul	rbx, rdx
-CryptonightR_instruction227:
-	add	rbx, rdx
-	add	rbx, 2147483647
-CryptonightR_instruction228:
-	sub	rbx, rdx
-CryptonightR_instruction229:
-	ror	ebx, cl
-CryptonightR_instruction230:
-	rol	ebx, cl
-CryptonightR_instruction231:
-	xor	rbx, rdx
-CryptonightR_instruction232:
-	imul	rsi, rdx
-CryptonightR_instruction233:
-	imul	rsi, rdx
-CryptonightR_instruction234:
-	imul	rsi, rdx
-CryptonightR_instruction235:
-	add	rsi, rdx
-	add	rsi, 2147483647
-CryptonightR_instruction236:
-	sub	rsi, rdx
-CryptonightR_instruction237:
-	ror	esi, cl
-CryptonightR_instruction238:
-	rol	esi, cl
-CryptonightR_instruction239:
-	xor	rsi, rdx
-CryptonightR_instruction240:
-	imul	rdi, rdx
-CryptonightR_instruction241:
-	imul	rdi, rdx
-CryptonightR_instruction242:
-	imul	rdi, rdx
-CryptonightR_instruction243:
-	add	rdi, rdx
-	add	rdi, 2147483647
-CryptonightR_instruction244:
-	sub	rdi, rdx
-CryptonightR_instruction245:
-	ror	edi, cl
-CryptonightR_instruction246:
-	rol	edi, cl
-CryptonightR_instruction247:
-	xor	rdi, rdx
-CryptonightR_instruction248:
-	imul	rbp, rdx
-CryptonightR_instruction249:
-	imul	rbp, rdx
-CryptonightR_instruction250:
-	imul	rbp, rdx
-CryptonightR_instruction251:
-	add	rbp, rdx
-	add	rbp, 2147483647
-CryptonightR_instruction252:
-	sub	rbp, rdx
-CryptonightR_instruction253:
-	ror	ebp, cl
-CryptonightR_instruction254:
-	rol	ebp, cl
-CryptonightR_instruction255:
-	xor	rbp, rdx
-CryptonightR_instruction256:
-	imul	rbx, rbx
-CryptonightR_instruction_mov0:
-
-CryptonightR_instruction_mov1:
-
-CryptonightR_instruction_mov2:
-
-CryptonightR_instruction_mov3:
-
-CryptonightR_instruction_mov4:
-
-CryptonightR_instruction_mov5:
-	mov	rcx, rbx
-CryptonightR_instruction_mov6:
-	mov	rcx, rbx
-CryptonightR_instruction_mov7:
-
-CryptonightR_instruction_mov8:
-
-CryptonightR_instruction_mov9:
-
-CryptonightR_instruction_mov10:
-
-CryptonightR_instruction_mov11:
-
-CryptonightR_instruction_mov12:
-
-CryptonightR_instruction_mov13:
-	mov	rcx, rbx
-CryptonightR_instruction_mov14:
-	mov	rcx, rbx
-CryptonightR_instruction_mov15:
-
-CryptonightR_instruction_mov16:
-
-CryptonightR_instruction_mov17:
-
-CryptonightR_instruction_mov18:
-
-CryptonightR_instruction_mov19:
-
-CryptonightR_instruction_mov20:
-
-CryptonightR_instruction_mov21:
-	mov	rcx, rbx
-CryptonightR_instruction_mov22:
-	mov	rcx, rbx
-CryptonightR_instruction_mov23:
-
-CryptonightR_instruction_mov24:
-
-CryptonightR_instruction_mov25:
-
-CryptonightR_instruction_mov26:
-
-CryptonightR_instruction_mov27:
-
-CryptonightR_instruction_mov28:
-
-CryptonightR_instruction_mov29:
-	mov	rcx, rbx
-CryptonightR_instruction_mov30:
-	mov	rcx, rbx
-CryptonightR_instruction_mov31:
-
-CryptonightR_instruction_mov32:
-
-CryptonightR_instruction_mov33:
-
-CryptonightR_instruction_mov34:
-
-CryptonightR_instruction_mov35:
-
-CryptonightR_instruction_mov36:
-
-CryptonightR_instruction_mov37:
-	mov	rcx, rsi
-CryptonightR_instruction_mov38:
-	mov	rcx, rsi
-CryptonightR_instruction_mov39:
-
-CryptonightR_instruction_mov40:
-
-CryptonightR_instruction_mov41:
-
-CryptonightR_instruction_mov42:
-
-CryptonightR_instruction_mov43:
-
-CryptonightR_instruction_mov44:
-
-CryptonightR_instruction_mov45:
-	mov	rcx, rsi
-CryptonightR_instruction_mov46:
-	mov	rcx, rsi
-CryptonightR_instruction_mov47:
-
-CryptonightR_instruction_mov48:
-
-CryptonightR_instruction_mov49:
-
-CryptonightR_instruction_mov50:
-
-CryptonightR_instruction_mov51:
-
-CryptonightR_instruction_mov52:
-
-CryptonightR_instruction_mov53:
-	mov	rcx, rsi
-CryptonightR_instruction_mov54:
-	mov	rcx, rsi
-CryptonightR_instruction_mov55:
-
-CryptonightR_instruction_mov56:
-
-CryptonightR_instruction_mov57:
-
-CryptonightR_instruction_mov58:
-
-CryptonightR_instruction_mov59:
-
-CryptonightR_instruction_mov60:
-
-CryptonightR_instruction_mov61:
-	mov	rcx, rsi
-CryptonightR_instruction_mov62:
-	mov	rcx, rsi
-CryptonightR_instruction_mov63:
-
-CryptonightR_instruction_mov64:
-
-CryptonightR_instruction_mov65:
-
-CryptonightR_instruction_mov66:
-
-CryptonightR_instruction_mov67:
-
-CryptonightR_instruction_mov68:
-
-CryptonightR_instruction_mov69:
-	mov	rcx, rdi
-CryptonightR_instruction_mov70:
-	mov	rcx, rdi
-CryptonightR_instruction_mov71:
-
-CryptonightR_instruction_mov72:
-
-CryptonightR_instruction_mov73:
-
-CryptonightR_instruction_mov74:
-
-CryptonightR_instruction_mov75:
-
-CryptonightR_instruction_mov76:
-
-CryptonightR_instruction_mov77:
-	mov	rcx, rdi
-CryptonightR_instruction_mov78:
-	mov	rcx, rdi
-CryptonightR_instruction_mov79:
-
-CryptonightR_instruction_mov80:
-
-CryptonightR_instruction_mov81:
-
-CryptonightR_instruction_mov82:
-
-CryptonightR_instruction_mov83:
-
-CryptonightR_instruction_mov84:
-
-CryptonightR_instruction_mov85:
-	mov	rcx, rdi
-CryptonightR_instruction_mov86:
-	mov	rcx, rdi
-CryptonightR_instruction_mov87:
-
-CryptonightR_instruction_mov88:
-
-CryptonightR_instruction_mov89:
-
-CryptonightR_instruction_mov90:
-
-CryptonightR_instruction_mov91:
-
-CryptonightR_instruction_mov92:
-
-CryptonightR_instruction_mov93:
-	mov	rcx, rdi
-CryptonightR_instruction_mov94:
-	mov	rcx, rdi
-CryptonightR_instruction_mov95:
-
-CryptonightR_instruction_mov96:
-
-CryptonightR_instruction_mov97:
-
-CryptonightR_instruction_mov98:
-
-CryptonightR_instruction_mov99:
-
-CryptonightR_instruction_mov100:
-
-CryptonightR_instruction_mov101:
-	mov	rcx, rbp
-CryptonightR_instruction_mov102:
-	mov	rcx, rbp
-CryptonightR_instruction_mov103:
-
-CryptonightR_instruction_mov104:
-
-CryptonightR_instruction_mov105:
-
-CryptonightR_instruction_mov106:
-
-CryptonightR_instruction_mov107:
-
-CryptonightR_instruction_mov108:
-
-CryptonightR_instruction_mov109:
-	mov	rcx, rbp
-CryptonightR_instruction_mov110:
-	mov	rcx, rbp
-CryptonightR_instruction_mov111:
-
-CryptonightR_instruction_mov112:
-
-CryptonightR_instruction_mov113:
-
-CryptonightR_instruction_mov114:
-
-CryptonightR_instruction_mov115:
-
-CryptonightR_instruction_mov116:
-
-CryptonightR_instruction_mov117:
-	mov	rcx, rbp
-CryptonightR_instruction_mov118:
-	mov	rcx, rbp
-CryptonightR_instruction_mov119:
-
-CryptonightR_instruction_mov120:
-
-CryptonightR_instruction_mov121:
-
-CryptonightR_instruction_mov122:
-
-CryptonightR_instruction_mov123:
-
-CryptonightR_instruction_mov124:
-
-CryptonightR_instruction_mov125:
-	mov	rcx, rbp
-CryptonightR_instruction_mov126:
-	mov	rcx, rbp
-CryptonightR_instruction_mov127:
-
-CryptonightR_instruction_mov128:
-
-CryptonightR_instruction_mov129:
-
-CryptonightR_instruction_mov130:
-
-CryptonightR_instruction_mov131:
-
-CryptonightR_instruction_mov132:
-
-CryptonightR_instruction_mov133:
-	mov	rcx, rsp
-CryptonightR_instruction_mov134:
-	mov	rcx, rsp
-CryptonightR_instruction_mov135:
-
-CryptonightR_instruction_mov136:
-
-CryptonightR_instruction_mov137:
-
-CryptonightR_instruction_mov138:
-
-CryptonightR_instruction_mov139:
-
-CryptonightR_instruction_mov140:
-
-CryptonightR_instruction_mov141:
-	mov	rcx, rsp
-CryptonightR_instruction_mov142:
-	mov	rcx, rsp
-CryptonightR_instruction_mov143:
-
-CryptonightR_instruction_mov144:
-
-CryptonightR_instruction_mov145:
-
-CryptonightR_instruction_mov146:
-
-CryptonightR_instruction_mov147:
-
-CryptonightR_instruction_mov148:
-
-CryptonightR_instruction_mov149:
-	mov	rcx, rsp
-CryptonightR_instruction_mov150:
-	mov	rcx, rsp
-CryptonightR_instruction_mov151:
-
-CryptonightR_instruction_mov152:
-
-CryptonightR_instruction_mov153:
-
-CryptonightR_instruction_mov154:
-
-CryptonightR_instruction_mov155:
-
-CryptonightR_instruction_mov156:
-
-CryptonightR_instruction_mov157:
-	mov	rcx, rsp
-CryptonightR_instruction_mov158:
-	mov	rcx, rsp
-CryptonightR_instruction_mov159:
-
-CryptonightR_instruction_mov160:
-
-CryptonightR_instruction_mov161:
-
-CryptonightR_instruction_mov162:
-
-CryptonightR_instruction_mov163:
-
-CryptonightR_instruction_mov164:
-
-CryptonightR_instruction_mov165:
-	mov	rcx, r15
-CryptonightR_instruction_mov166:
-	mov	rcx, r15
-CryptonightR_instruction_mov167:
-
-CryptonightR_instruction_mov168:
-
-CryptonightR_instruction_mov169:
-
-CryptonightR_instruction_mov170:
-
-CryptonightR_instruction_mov171:
-
-CryptonightR_instruction_mov172:
-
-CryptonightR_instruction_mov173:
-	mov	rcx, r15
-CryptonightR_instruction_mov174:
-	mov	rcx, r15
-CryptonightR_instruction_mov175:
-
-CryptonightR_instruction_mov176:
-
-CryptonightR_instruction_mov177:
-
-CryptonightR_instruction_mov178:
-
-CryptonightR_instruction_mov179:
-
-CryptonightR_instruction_mov180:
-
-CryptonightR_instruction_mov181:
-	mov	rcx, r15
-CryptonightR_instruction_mov182:
-	mov	rcx, r15
-CryptonightR_instruction_mov183:
-
-CryptonightR_instruction_mov184:
-
-CryptonightR_instruction_mov185:
-
-CryptonightR_instruction_mov186:
-
-CryptonightR_instruction_mov187:
-
-CryptonightR_instruction_mov188:
-
-CryptonightR_instruction_mov189:
-	mov	rcx, r15
-CryptonightR_instruction_mov190:
-	mov	rcx, r15
-CryptonightR_instruction_mov191:
-
-CryptonightR_instruction_mov192:
-
-CryptonightR_instruction_mov193:
-
-CryptonightR_instruction_mov194:
-
-CryptonightR_instruction_mov195:
-
-CryptonightR_instruction_mov196:
-
-CryptonightR_instruction_mov197:
-	mov	rcx, rax
-CryptonightR_instruction_mov198:
-	mov	rcx, rax
-CryptonightR_instruction_mov199:
-
-CryptonightR_instruction_mov200:
-
-CryptonightR_instruction_mov201:
-
-CryptonightR_instruction_mov202:
-
-CryptonightR_instruction_mov203:
-
-CryptonightR_instruction_mov204:
-
-CryptonightR_instruction_mov205:
-	mov	rcx, rax
-CryptonightR_instruction_mov206:
-	mov	rcx, rax
-CryptonightR_instruction_mov207:
-
-CryptonightR_instruction_mov208:
-
-CryptonightR_instruction_mov209:
-
-CryptonightR_instruction_mov210:
-
-CryptonightR_instruction_mov211:
-
-CryptonightR_instruction_mov212:
-
-CryptonightR_instruction_mov213:
-	mov	rcx, rax
-CryptonightR_instruction_mov214:
-	mov	rcx, rax
-CryptonightR_instruction_mov215:
-
-CryptonightR_instruction_mov216:
-
-CryptonightR_instruction_mov217:
-
-CryptonightR_instruction_mov218:
-
-CryptonightR_instruction_mov219:
-
-CryptonightR_instruction_mov220:
-
-CryptonightR_instruction_mov221:
-	mov	rcx, rax
-CryptonightR_instruction_mov222:
-	mov	rcx, rax
-CryptonightR_instruction_mov223:
-
-CryptonightR_instruction_mov224:
-
-CryptonightR_instruction_mov225:
-
-CryptonightR_instruction_mov226:
-
-CryptonightR_instruction_mov227:
-
-CryptonightR_instruction_mov228:
-
-CryptonightR_instruction_mov229:
-	mov	rcx, rdx
-CryptonightR_instruction_mov230:
-	mov	rcx, rdx
-CryptonightR_instruction_mov231:
-
-CryptonightR_instruction_mov232:
-
-CryptonightR_instruction_mov233:
-
-CryptonightR_instruction_mov234:
-
-CryptonightR_instruction_mov235:
-
-CryptonightR_instruction_mov236:
-
-CryptonightR_instruction_mov237:
-	mov	rcx, rdx
-CryptonightR_instruction_mov238:
-	mov	rcx, rdx
-CryptonightR_instruction_mov239:
-
-CryptonightR_instruction_mov240:
-
-CryptonightR_instruction_mov241:
-
-CryptonightR_instruction_mov242:
-
-CryptonightR_instruction_mov243:
-
-CryptonightR_instruction_mov244:
-
-CryptonightR_instruction_mov245:
-	mov	rcx, rdx
-CryptonightR_instruction_mov246:
-	mov	rcx, rdx
-CryptonightR_instruction_mov247:
-
-CryptonightR_instruction_mov248:
-
-CryptonightR_instruction_mov249:
-
-CryptonightR_instruction_mov250:
-
-CryptonightR_instruction_mov251:
-
-CryptonightR_instruction_mov252:
-
-CryptonightR_instruction_mov253:
-	mov	rcx, rdx
-CryptonightR_instruction_mov254:
-	mov	rcx, rdx
-CryptonightR_instruction_mov255:
-
-CryptonightR_instruction_mov256:
-
-_TEXT_CN_TEMPLATE ENDS
-END
diff --git a/src/crypto/asm/win64/CryptonightR_template_win.inc b/src/crypto/asm/win64/CryptonightR_template_win.inc
deleted file mode 100644
index d24eedaa..00000000
--- a/src/crypto/asm/win64/CryptonightR_template_win.inc
+++ /dev/null
@@ -1,536 +0,0 @@
-PUBLIC CryptonightR_template_part1
-PUBLIC CryptonightR_template_mainloop
-PUBLIC CryptonightR_template_part2
-PUBLIC CryptonightR_template_part3
-PUBLIC CryptonightR_template_end
-PUBLIC CryptonightR_template_double_part1
-PUBLIC CryptonightR_template_double_mainloop
-PUBLIC CryptonightR_template_double_part2
-PUBLIC CryptonightR_template_double_part3
-PUBLIC CryptonightR_template_double_part4
-PUBLIC CryptonightR_template_double_end
-
-ALIGN(64)
-CryptonightR_template_part1:
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	r10
-	push	r11
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	push	rdi
-	sub	rsp, 64
-	mov	r12, rcx
-	mov	r8, QWORD PTR [r12+32]
-	mov	rdx, r12
-	xor	r8, QWORD PTR [r12]
-	mov	r15, QWORD PTR [r12+40]
-	mov	r9, r8
-	xor	r15, QWORD PTR [r12+8]
-	mov	r11, QWORD PTR [r12+224]
-	mov	r12, QWORD PTR [r12+56]
-	xor	r12, QWORD PTR [rdx+24]
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movd	xmm0, r12
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	movaps	XMMWORD PTR [rsp], xmm9
-	mov	r12, QWORD PTR [rdx+88]
-	xor	r12, QWORD PTR [rdx+72]
-	movd	xmm6, rax
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm6, xmm0
-	and	r9d, 2097136
-	movd	xmm0, r12
-	movd	xmm7, rax
-	punpcklqdq xmm7, xmm0
-	mov r10d, r9d
-	movd	xmm9, rsp
-	mov rsp, r8
-	mov	r8d, 524288
-
-	mov	ebx, [rdx+96]
-	mov	esi, [rdx+100]
-	mov	edi, [rdx+104]
-	mov	ebp, [rdx+108]
-
-	ALIGN(64)
-CryptonightR_template_mainloop:
-	movdqa	xmm5, XMMWORD PTR [r9+r11]
-	movd	xmm0, r15
-	movd	xmm4, rsp
-	punpcklqdq xmm4, xmm0
-	lea	rdx, QWORD PTR [r9+r11]
-
-	aesenc	xmm5, xmm4
-
-	mov	r13d, r9d
-	mov	eax, r9d
-	xor	r9d, 48
-	xor	r13d, 16
-	xor	eax, 32
-	movdqu	xmm0, XMMWORD PTR [r9+r11]
-	movaps xmm3, xmm0
-	movdqu	xmm2, XMMWORD PTR [r13+r11]
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	pxor xmm0, xmm2
-	pxor xmm5, xmm1
-	pxor xmm5, xmm0
-
-	movd	r12, xmm5
-	movd	r10d, xmm5
-	and	r10d, 2097136
-
-	paddq	xmm3, xmm7
-	paddq	xmm2, xmm6
-	paddq	xmm1, xmm4
-	movdqu	XMMWORD PTR [r13+r11], xmm3
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movdqu	XMMWORD PTR [r9+r11], xmm1
-
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [rdx], xmm0
-
-	lea	r13d, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	r13, rdx
-
-	movd eax, xmm6
-	movd edx, xmm7
-	pextrd r9d, xmm7, 2
-
-	xor	r13, QWORD PTR [r10+r11]
-	mov	r14, QWORD PTR [r10+r11+8]
-
-CryptonightR_template_part2:
-	lea	rcx, [r10+r11]
-
-	mov eax, edi
-	mov edx, ebp
-	shl rdx, 32
-	or rax, rdx
-	xor rsp, rax
-
-	mov eax, ebx
-	mov edx, esi
-	shl rdx, 32
-	or rax, rdx
-	xor r15, rax
-
-	mov	rax, r13
-	mul	r12
-	add	r15, rax
-	add	rsp, rdx
-
-	mov	r9d, r10d
-	mov	r12d, r10d
-	xor	r9d, 16
-	xor	r12d, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [r12+r11]
-	movaps xmm3, xmm1
-	movdqa	xmm2, XMMWORD PTR [r9+r11]
-	movdqa	xmm0, XMMWORD PTR [r10+r11]
-	pxor xmm1, xmm2
-	pxor xmm5, xmm0
-	pxor xmm5, xmm1
-	paddq	xmm3, xmm4
-	paddq	xmm2, xmm6
-	paddq	xmm0, xmm7
-	movdqu	XMMWORD PTR [r9+r11], xmm0
-	movdqu	XMMWORD PTR [r12+r11], xmm2
-	movdqu	XMMWORD PTR [r10+r11], xmm3
-
-	movdqa	xmm7, xmm6
-	mov	QWORD PTR [rcx], rsp
-	xor	rsp, r13
-	mov	r9d, esp
-	mov	QWORD PTR [rcx+8], r15
-	and	r9d, 2097136
-	xor	r15, r14
-	movdqa	xmm6, xmm5
-	dec	r8d
-	jnz	CryptonightR_template_mainloop
-
-CryptonightR_template_part3:
-	movd	rsp, xmm9
-
-	mov	rbx, QWORD PTR [rsp+136]
-	mov	rbp, QWORD PTR [rsp+144]
-	mov	rsi, QWORD PTR [rsp+152]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+16]
-	movaps	xmm9, XMMWORD PTR [rsp]
-	add	rsp, 64
-	pop	rdi
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	r11
-	pop	r10
-	ret	0
-CryptonightR_template_end:
-
-ALIGN(64)
-CryptonightR_template_double_part1:
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+24], rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 320
-	mov	r14, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r14, QWORD PTR [rcx]
-	mov	r12, QWORD PTR [rcx+40]
-	mov	ebx, r14d
-	mov	rsi, QWORD PTR [rcx+224]
-	and	ebx, 2097136
-	xor	r12, QWORD PTR [rcx+8]
-	mov	rcx, QWORD PTR [rcx+56]
-	xor	rcx, QWORD PTR [r8+24]
-	mov	rax, QWORD PTR [r8+48]
-	xor	rax, QWORD PTR [r8+16]
-	mov	r15, QWORD PTR [rdx+32]
-	xor	r15, QWORD PTR [rdx]
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [r8+88]
-	xor	rcx, QWORD PTR [r8+72]
-	mov	r13, QWORD PTR [rdx+40]
-	mov	rdi, QWORD PTR [rdx+224]
-	xor	r13, QWORD PTR [rdx+8]
-	movaps	XMMWORD PTR [rsp+160], xmm6
-	movaps	XMMWORD PTR [rsp+176], xmm7
-	movaps	XMMWORD PTR [rsp+192], xmm8
-	movaps	XMMWORD PTR [rsp+208], xmm9
-	movaps	XMMWORD PTR [rsp+224], xmm10
-	movaps	XMMWORD PTR [rsp+240], xmm11
-	movaps	XMMWORD PTR [rsp+256], xmm12
-	movaps	XMMWORD PTR [rsp+272], xmm13
-	movaps	XMMWORD PTR [rsp+288], xmm14
-	movaps	XMMWORD PTR [rsp+304], xmm15
-	movd	xmm7, rax
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-
-	movaps xmm1, XMMWORD PTR [rdx+96]
-	movaps xmm2, XMMWORD PTR [r8+96]
-	movaps XMMWORD PTR [rsp], xmm1
-	movaps XMMWORD PTR [rsp+16], xmm2
-
-	mov	r8d, r15d
-	punpcklqdq xmm7, xmm0
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+56]
-	xor	rcx, QWORD PTR [rdx+24]
-	movd	xmm9, rax
-	mov	QWORD PTR [rsp+128], rsi
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	punpcklqdq xmm9, xmm0
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+88]
-	xor	rcx, QWORD PTR [rdx+72]
-	movd	xmm8, rax
-	mov	QWORD PTR [rsp+136], rdi
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm8, xmm0
-	and	r8d, 2097136
-	movd	xmm0, rcx
-	mov	r11d, 524288
-	movd	xmm10, rax
-	punpcklqdq xmm10, xmm0
-	
-	movd xmm14, QWORD PTR [rsp+128]
-	movd xmm15, QWORD PTR [rsp+136]
-
-	ALIGN(64)
-CryptonightR_template_double_mainloop:
-	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
-	movd	xmm0, r12
-	mov	ecx, ebx
-	movd	xmm3, r14
-	punpcklqdq xmm3, xmm0
-	xor	ebx, 16
-	aesenc	xmm6, xmm3
-	movd	xmm4, r15
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	pxor	xmm6, xmm0
-	xor	ebx, 48
-	paddq	xmm0, xmm7
-	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
-	pxor	xmm6, xmm1
-	movdqu	XMMWORD PTR [rbx+rsi], xmm0
-	paddq	xmm1, xmm3
-	xor	ebx, 16
-	mov	eax, ebx
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	pxor	xmm6, xmm0
-	movd	rdx, xmm6
-	movdqu	XMMWORD PTR [rbx+rsi], xmm1
-	paddq	xmm0, xmm9
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [rcx+rsi], xmm0
-	mov	esi, edx
-	movdqu	xmm5, XMMWORD PTR [r8+rdi]
-	and	esi, 2097136
-	mov	ecx, r8d
-	movd	xmm0, r13
-	punpcklqdq xmm4, xmm0
-	xor	r8d, 16
-	aesenc	xmm5, xmm4
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	pxor	xmm5, xmm0
-	xor	r8d, 48
-	paddq	xmm0, xmm8
-	movdqu	xmm1, XMMWORD PTR [r8+rdi]
-	pxor	xmm5, xmm1
-	movdqu	XMMWORD PTR [r8+rdi], xmm0
-	paddq	xmm1, xmm4
-	xor	r8d, 16
-	mov	eax, r8d
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	pxor	xmm5, xmm0
-	movdqu	XMMWORD PTR [r8+rdi], xmm1
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rdi], xmm0
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm8
-	movdqu	XMMWORD PTR [rcx+rdi], xmm0
-	movd	rdi, xmm5
-	movd	rcx, xmm14
-	mov	ebp, edi
-	mov	r8, QWORD PTR [rcx+rsi]
-	mov	r10, QWORD PTR [rcx+rsi+8]
-	lea	r9, QWORD PTR [rcx+rsi]
-	xor	esi, 16
-
-	movd xmm0, rsp
-	movd xmm1, rsi
-	movd xmm2, rdi
-	movd xmm11, rbp
-	movd xmm12, r15
-	movd xmm13, rdx
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp+16]
-	mov esi, DWORD PTR [rsp+20]
-	mov edi, DWORD PTR [rsp+24]
-	mov ebp, DWORD PTR [rsp+28]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-	xor r8, rax
-
-	movd esp, xmm3
-	pextrd r15d, xmm3, 2
-	movd eax, xmm7
-	movd edx, xmm9
-	pextrd r9d, xmm9, 2
-
-CryptonightR_template_double_part2:
-
-	mov eax, edi
-	mov edx, ebp
-	shl rdx, 32
-	or rax, rdx
-	xor r14, rax
-
-	mov eax, ebx
-	mov edx, esi
-	shl rdx, 32
-	or rax, rdx
-	xor r12, rax
-
-	movd rsp, xmm0
-	mov DWORD PTR [rsp+16], ebx
-	mov DWORD PTR [rsp+20], esi
-	mov DWORD PTR [rsp+24], edi
-	mov DWORD PTR [rsp+28], ebp
-
-	movd rsi, xmm1
-	movd rdi, xmm2
-	movd rbp, xmm11
-	movd r15, xmm12
-	movd rdx, xmm13
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rbx, r8
-	mov	rax, r8
-	mul	rdx
-	and	ebp, 2097136
-	mov	r8, rax
-	movdqu	xmm1, XMMWORD PTR [rcx+rsi]
-	pxor	xmm6, xmm1
-	xor	esi, 48
-	paddq	xmm1, xmm7
-	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
-	pxor	xmm6, xmm2
-	paddq	xmm2, xmm3
-	movdqu	XMMWORD PTR [rsi+rcx], xmm1
-	xor	esi, 16
-	mov	eax, esi
-	mov	rsi, rcx
-	movdqu	xmm0, XMMWORD PTR [rax+rcx]
-	pxor	xmm6, xmm0
-	movdqu	XMMWORD PTR [rax+rcx], xmm2
-	paddq	xmm0, xmm9
-	add	r12, r8
-	xor	rax, 32
-	add	r14, rdx
-	movdqa	xmm9, xmm7
-	movdqa	xmm7, xmm6
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	mov	QWORD PTR [r9+8], r12
-	xor	r12, r10
-	mov	QWORD PTR [r9], r14
-	movd rcx, xmm15
-	xor	r14, rbx
-	mov	r10d, ebp
-	mov	ebx, r14d
-	xor	ebp, 16
-	and	ebx, 2097136
-	mov	r8, QWORD PTR [r10+rcx]
-	mov	r9, QWORD PTR [r10+rcx+8]
-
-	movd xmm0, rsp
-	movd xmm1, rbx
-	movd xmm2, rsi
-	movd xmm11, rdi
-	movd xmm12, rbp
-	movd xmm13, r15
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp]
-	mov esi, DWORD PTR [rsp+4]
-	mov edi, DWORD PTR [rsp+8]
-	mov ebp, DWORD PTR [rsp+12]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-
-	xor r8, rax
-	movd xmm3, r8
-
-	movd esp, xmm4
-	pextrd r15d, xmm4, 2
-	movd eax, xmm8
-	movd edx, xmm10
-	pextrd r9d, xmm10, 2
-
-CryptonightR_template_double_part3:
-
-	movd r15, xmm13
-
-	mov eax, edi
-	mov edx, ebp
-	shl rdx, 32
-	or rax, rdx
-	xor r15, rax
-
-	mov eax, ebx
-	mov edx, esi
-	shl rdx, 32
-	or rax, rdx
-	xor r13, rax
-
-	movd rsp, xmm0
-	mov DWORD PTR [rsp], ebx
-	mov DWORD PTR [rsp+4], esi
-	mov DWORD PTR [rsp+8], edi
-	mov DWORD PTR [rsp+12], ebp
-
-	movd rbx, xmm1
-	movd rsi, xmm2
-	movd rdi, xmm11
-	movd rbp, xmm12
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rax, r8
-	mul	rdi
-	mov	rdi, rcx
-	mov	r8, rax
-	movdqu	xmm1, XMMWORD PTR [rbp+rcx]
-	pxor xmm5, xmm1
-	xor	ebp, 48
-	paddq	xmm1, xmm8
-	add	r13, r8
-	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
-	pxor xmm5, xmm2
-	add	r15, rdx
-	movdqu	XMMWORD PTR [rbp+rcx], xmm1
-	paddq	xmm2, xmm4
-	xor	ebp, 16
-	mov	eax, ebp
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
-	pxor xmm5, xmm0
-	movdqu	XMMWORD PTR [rbp+rcx], xmm2
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	movd rax, xmm3
-	movdqa	xmm10, xmm8
-	mov	QWORD PTR [r10+rcx], r15
-	movdqa	xmm8, xmm5
-	xor	r15, rax
-	mov	QWORD PTR [r10+rcx+8], r13
-	mov	r8d, r15d
-	xor	r13, r9
-	and	r8d, 2097136
-	dec r11d
-	jnz	CryptonightR_template_double_mainloop
-
-CryptonightR_template_double_part4:
-
-	mov	rbx, QWORD PTR [rsp+400]
-	movaps	xmm6, XMMWORD PTR [rsp+160]
-	movaps	xmm7, XMMWORD PTR [rsp+176]
-	movaps	xmm8, XMMWORD PTR [rsp+192]
-	movaps	xmm9, XMMWORD PTR [rsp+208]
-	movaps	xmm10, XMMWORD PTR [rsp+224]
-	movaps	xmm11, XMMWORD PTR [rsp+240]
-	movaps	xmm12, XMMWORD PTR [rsp+256]
-	movaps	xmm13, XMMWORD PTR [rsp+272]
-	movaps	xmm14, XMMWORD PTR [rsp+288]
-	movaps	xmm15, XMMWORD PTR [rsp+304]
-	add	rsp, 320
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	ret	0
-CryptonightR_template_double_end:
diff --git a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc
deleted file mode 100644
index 1c73f77c..00000000
--- a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc
+++ /dev/null
@@ -1,268 +0,0 @@
-PUBLIC CryptonightWOW_soft_aes_template_part1
-PUBLIC CryptonightWOW_soft_aes_template_mainloop
-PUBLIC CryptonightWOW_soft_aes_template_part2
-PUBLIC CryptonightWOW_soft_aes_template_part3
-PUBLIC CryptonightWOW_soft_aes_template_end
-
-ALIGN(64)
-CryptonightWOW_soft_aes_template_part1:
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+8], rcx
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 232
-
-	mov	eax, [rcx+96]
-	mov	ebx, [rcx+100]
-	mov	esi, [rcx+104]
-	mov	edx, [rcx+108]
-	mov [rsp+144], eax
-	mov [rsp+148], ebx
-	mov [rsp+152], esi
-	mov [rsp+156], edx
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r10, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r9, QWORD PTR [rcx+40]
-	xor	r9, QWORD PTR [rcx+8]
-	movd	xmm4, rax
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	r11, QWORD PTR [rcx+224]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r10+72]
-	mov	rax, QWORD PTR [r10+80]
-	movd	xmm0, rdx
-	xor	rax, QWORD PTR [r10+64]
-
-	movaps	XMMWORD PTR [rsp+16], xmm6
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+48], xmm8
-	movaps	XMMWORD PTR [rsp+64], xmm9
-	movaps	XMMWORD PTR [rsp+80], xmm10
-	movaps	XMMWORD PTR [rsp+96], xmm11
-	movaps	XMMWORD PTR [rsp+112], xmm12
-	movaps	XMMWORD PTR [rsp+128], xmm13
-
-	movd	xmm5, rax
-
-	mov	rax, r8
-	punpcklqdq xmm4, xmm0
-	and	eax, 2097136
-	movd	xmm10, QWORD PTR [r10+96]
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [r10+104]
-	xorps	xmm9, xmm9
-	mov	QWORD PTR [rsp+328], rax
-	movd	xmm12, r11
-	mov	QWORD PTR [rsp+320], r9
-	punpcklqdq xmm5, xmm0
-	movd xmm13, rcx
-	mov r12d, 524288
-
-	ALIGN(64)
-CryptonightWOW_soft_aes_template_mainloop:
-	movd xmm11, r12d
-	mov	r12, QWORD PTR [r10+272]
-	lea	r13, QWORD PTR [rax+r11]
-	mov	esi, DWORD PTR [r13]
-	movd	xmm0, r9
-	mov	r10d, DWORD PTR [r13+4]
-	movd	xmm7, r8
-	mov	ebp, DWORD PTR [r13+12]
-	mov	r14d, DWORD PTR [r13+8]
-	mov	rdx, QWORD PTR [rsp+328]
-	movzx	ecx, sil
-	shr	esi, 8
-	punpcklqdq xmm7, xmm0
-	mov	r15d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	mov	edi, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	ebx, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	shr	ebp, 8
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, r10b
-	shr	r10d, 8
-	xor	r15d, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, r14b
-	shr	r14d, 8
-	mov	eax, r14d
-	shr	eax, 8
-	xor	edi, DWORD PTR [r12+rcx*4+1024]
-	add	eax, 256
-	movzx	ecx, bpl
-	shr	ebp, 8
-	xor	ebx, DWORD PTR [r12+rcx*4+1024]
-	movzx	ecx, sil
-	shr	esi, 8
-	xor	r9d, DWORD PTR [r12+rcx*4+1024]
-	add	r12, 2048
-	movzx	ecx, r10b
-	shr	r10d, 8
-	add	r10d, 256
-	mov	r11d, DWORD PTR [r12+rax*4]
-	xor	r11d, DWORD PTR [r12+rcx*4]
-	xor	r11d, r9d
-	movzx	ecx, sil
-	mov	r10d, DWORD PTR [r12+r10*4]
-	shr	esi, 8
-	add	esi, 256
-	xor	r10d, DWORD PTR [r12+rcx*4]
-	movzx	ecx, bpl
-	xor	r10d, ebx
-	shr	ebp, 8
-	movd	xmm1, r11d
-	add	ebp, 256
-	movd	r11, xmm12
-	mov	r9d, DWORD PTR [r12+rcx*4]
-	xor	r9d, DWORD PTR [r12+rsi*4]
-	mov	eax, DWORD PTR [r12+rbp*4]
-	xor	r9d, edi
-	movzx	ecx, r14b
-	movd	xmm0, r10d
-	movd	xmm2, r9d
-	xor	eax, DWORD PTR [r12+rcx*4]
-	mov	rcx, rdx
-	xor	eax, r15d
-	punpckldq xmm2, xmm1
-	xor	rcx, 16
-	movd	xmm6, eax
-	mov	rax, rdx
-	punpckldq xmm6, xmm0
-	xor	rax, 32
-	punpckldq xmm6, xmm2
-	xor	rdx, 48
-	movdqu	xmm2, XMMWORD PTR [rcx+r11]
-	pxor	xmm6, xmm7
-	paddq	xmm2, xmm4
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	movdqu	xmm0, XMMWORD PTR [rdx+r11]
-	paddq	xmm0, xmm5
-	movdqu	XMMWORD PTR [rcx+r11], xmm0
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movd rcx, xmm13
-	paddq	xmm1, xmm7
-	movdqu	XMMWORD PTR [rdx+r11], xmm1
-	movd	rdi, xmm6
-	mov	r10, rdi
-	and	r10d, 2097136
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm4
-	movdqu	XMMWORD PTR [r13], xmm0
-
-	mov ebx, [rsp+144]
-	mov ebp, [rsp+152]
-	add ebx, [rsp+148]
-	add ebp, [rsp+156]
-	shl rbp, 32
-	or rbx, rbp
-
-	xor rbx, QWORD PTR [r10+r11]
-	lea	r14, QWORD PTR [r10+r11]
-	mov	rbp, QWORD PTR [r14+8]
-
-	mov [rsp+160], rbx
-	mov [rsp+168], rdi
-	mov [rsp+176], rbp
-	mov [rsp+184], r10
-	mov r10, rsp
-
-	mov ebx, [rsp+144]
-	mov esi, [rsp+148]
-	mov edi, [rsp+152]
-	mov ebp, [rsp+156]
-
-	movd esp, xmm7
-	movaps xmm0, xmm7
-	psrldq xmm0, 8
-	movd r15d, xmm0
-	movd eax, xmm4
-	movd edx, xmm5
-
-CryptonightWOW_soft_aes_template_part2:
-	mov rsp, r10
-	mov [rsp+144], ebx
-	mov [rsp+148], esi
-	mov [rsp+152], edi
-	mov [rsp+156], ebp
-
-	mov rbx, [rsp+160]
-	mov rdi, [rsp+168]
-	mov rbp, [rsp+176]
-	mov r10, [rsp+184]
-
-	mov	r9, r10
-	xor	r9, 16
-	mov	rcx, r10
-	xor	rcx, 32
-	xor	r10, 48
-	mov	rax, rbx
-	mul	rdi
-	movdqu	xmm2, XMMWORD PTR [r9+r11]
-	movdqu	xmm1, XMMWORD PTR [rcx+r11]
-	paddq	xmm1, xmm7
-	movd	xmm0, rax
-	movd	xmm3, rdx
-	xor	rax, QWORD PTR [r11+rcx+8]
-	xor	rdx, QWORD PTR [rcx+r11]
-	punpcklqdq xmm3, xmm0
-	add	r8, rdx
-	movdqu	xmm0, XMMWORD PTR [r10+r11]
-	pxor	xmm2, xmm3
-	paddq	xmm0, xmm5
-	paddq	xmm2, xmm4
-	movdqu	XMMWORD PTR [r9+r11], xmm0
-	movdqa	xmm5, xmm4
-	mov	r9, QWORD PTR [rsp+320]
-	movdqa	xmm4, xmm6
-	add	r9, rax
-	movdqu	XMMWORD PTR [rcx+r11], xmm2
-	movdqu	XMMWORD PTR [r10+r11], xmm1
-	mov	r10, QWORD PTR [rsp+304]
-	movd r12d, xmm11
-	mov	QWORD PTR [r14], r8
-	xor	r8, rbx
-	mov	rax, r8
-	mov	QWORD PTR [r14+8], r9
-	and	eax, 2097136
-	xor	r9, rbp
-	mov	QWORD PTR [rsp+320], r9
-	mov	QWORD PTR [rsp+328], rax
-	sub	r12d, 1
-	jne	CryptonightWOW_soft_aes_template_mainloop
-
-CryptonightWOW_soft_aes_template_part3:
-	movaps	xmm6, XMMWORD PTR [rsp+16]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+48]
-	movaps	xmm9, XMMWORD PTR [rsp+64]
-	movaps	xmm10, XMMWORD PTR [rsp+80]
-	movaps	xmm11, XMMWORD PTR [rsp+96]
-	movaps	xmm12, XMMWORD PTR [rsp+112]
-	movaps	xmm13, XMMWORD PTR [rsp+128]
-
-	add	rsp, 232
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	ret
-CryptonightWOW_soft_aes_template_end:
diff --git a/src/crypto/asm/win64/CryptonightWOW_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_template_win.inc
deleted file mode 100644
index 55c8c8df..00000000
--- a/src/crypto/asm/win64/CryptonightWOW_template_win.inc
+++ /dev/null
@@ -1,491 +0,0 @@
-PUBLIC CryptonightWOW_template_part1
-PUBLIC CryptonightWOW_template_mainloop
-PUBLIC CryptonightWOW_template_part2
-PUBLIC CryptonightWOW_template_part3
-PUBLIC CryptonightWOW_template_end
-PUBLIC CryptonightWOW_template_double_part1
-PUBLIC CryptonightWOW_template_double_mainloop
-PUBLIC CryptonightWOW_template_double_part2
-PUBLIC CryptonightWOW_template_double_part3
-PUBLIC CryptonightWOW_template_double_part4
-PUBLIC CryptonightWOW_template_double_end
-
-ALIGN(64)
-CryptonightWOW_template_part1:
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	r10
-	push	r11
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	push	rdi
-	sub	rsp, 64
-	mov	r12, rcx
-	mov	r8, QWORD PTR [r12+32]
-	mov	rdx, r12
-	xor	r8, QWORD PTR [r12]
-	mov	r15, QWORD PTR [r12+40]
-	mov	r9, r8
-	xor	r15, QWORD PTR [r12+8]
-	mov	r11, QWORD PTR [r12+224]
-	mov	r12, QWORD PTR [r12+56]
-	xor	r12, QWORD PTR [rdx+24]
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movd	xmm0, r12
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	movaps	XMMWORD PTR [rsp], xmm9
-	mov	r12, QWORD PTR [rdx+88]
-	xor	r12, QWORD PTR [rdx+72]
-	movd	xmm6, rax
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm6, xmm0
-	and	r9d, 2097136
-	movd	xmm0, r12
-	movd	xmm7, rax
-	punpcklqdq xmm7, xmm0
-	mov r10d, r9d
-	movd	xmm9, rsp
-	mov rsp, r8
-	mov	r8d, 524288
-
-	mov	ebx, [rdx+96]
-	mov	esi, [rdx+100]
-	mov	edi, [rdx+104]
-	mov	ebp, [rdx+108]
-
-	ALIGN(64)
-CryptonightWOW_template_mainloop:
-	movdqa	xmm5, XMMWORD PTR [r9+r11]
-	movd	xmm0, r15
-	movd	xmm4, rsp
-	punpcklqdq xmm4, xmm0
-	lea	rdx, QWORD PTR [r9+r11]
-
-	aesenc	xmm5, xmm4
-	movd	r10d, xmm5
-	and	r10d, 2097136
-
-	mov	r12d, r9d
-	mov	eax, r9d
-	xor	r9d, 48
-	xor	r12d, 16
-	xor	eax, 32
-	movdqu	xmm0, XMMWORD PTR [r9+r11]
-	movdqu	xmm2, XMMWORD PTR [r12+r11]
-	movdqu	xmm1, XMMWORD PTR [rax+r11]
-	paddq	xmm0, xmm7
-	paddq	xmm2, xmm6
-	paddq	xmm1, xmm4
-	movdqu	XMMWORD PTR [r12+r11], xmm0
-	movd	r12, xmm5
-	movdqu	XMMWORD PTR [rax+r11], xmm2
-	movdqu	XMMWORD PTR [r9+r11], xmm1
-
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [rdx], xmm0
-
-	lea	r13d, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	r13, rdx
-
-	xor	r13, QWORD PTR [r10+r11]
-	mov	r14, QWORD PTR [r10+r11+8]
-
-	movd eax, xmm6
-	movd edx, xmm7
-	pextrd r9d, xmm7, 2
-
-CryptonightWOW_template_part2:
-	mov	rax, r13
-	mul	r12
-	movd	xmm0, rax
-	movd	xmm3, rdx
-	punpcklqdq xmm3, xmm0
-
-	mov	r9d, r10d
-	mov	r12d, r10d
-	xor	r9d, 16
-	xor	r12d, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [r12+r11]
-	xor	rdx, QWORD PTR [r12+r11]
-	xor	rax, QWORD PTR [r11+r12+8]
-	movdqa	xmm2, XMMWORD PTR [r9+r11]
-	pxor	xmm3, xmm2
-	paddq	xmm7, XMMWORD PTR [r10+r11]
-	paddq	xmm1, xmm4
-	paddq	xmm3, xmm6
-	movdqu	XMMWORD PTR [r9+r11], xmm7
-	movdqu	XMMWORD PTR [r12+r11], xmm3
-	movdqu	XMMWORD PTR [r10+r11], xmm1
-
-	movdqa	xmm7, xmm6
-	add	r15, rax
-	add	rsp, rdx
-	xor	r10, 48
-	mov	QWORD PTR [r10+r11], rsp
-	xor	rsp, r13
-	mov	r9d, esp
-	mov	QWORD PTR [r10+r11+8], r15
-	and	r9d, 2097136
-	xor	r15, r14
-	movdqa	xmm6, xmm5
-	dec	r8d
-	jnz	CryptonightWOW_template_mainloop
-
-CryptonightWOW_template_part3:
-	movd	rsp, xmm9
-
-	mov	rbx, QWORD PTR [rsp+136]
-	mov	rbp, QWORD PTR [rsp+144]
-	mov	rsi, QWORD PTR [rsp+152]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	movaps	xmm8, XMMWORD PTR [rsp+16]
-	movaps	xmm9, XMMWORD PTR [rsp]
-	add	rsp, 64
-	pop	rdi
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	r11
-	pop	r10
-	ret	0
-CryptonightWOW_template_end:
-
-ALIGN(64)
-CryptonightWOW_template_double_part1:
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+24], rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 320
-	mov	r14, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r14, QWORD PTR [rcx]
-	mov	r12, QWORD PTR [rcx+40]
-	mov	ebx, r14d
-	mov	rsi, QWORD PTR [rcx+224]
-	and	ebx, 2097136
-	xor	r12, QWORD PTR [rcx+8]
-	mov	rcx, QWORD PTR [rcx+56]
-	xor	rcx, QWORD PTR [r8+24]
-	mov	rax, QWORD PTR [r8+48]
-	xor	rax, QWORD PTR [r8+16]
-	mov	r15, QWORD PTR [rdx+32]
-	xor	r15, QWORD PTR [rdx]
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [r8+88]
-	xor	rcx, QWORD PTR [r8+72]
-	mov	r13, QWORD PTR [rdx+40]
-	mov	rdi, QWORD PTR [rdx+224]
-	xor	r13, QWORD PTR [rdx+8]
-	movaps	XMMWORD PTR [rsp+160], xmm6
-	movaps	XMMWORD PTR [rsp+176], xmm7
-	movaps	XMMWORD PTR [rsp+192], xmm8
-	movaps	XMMWORD PTR [rsp+208], xmm9
-	movaps	XMMWORD PTR [rsp+224], xmm10
-	movaps	XMMWORD PTR [rsp+240], xmm11
-	movaps	XMMWORD PTR [rsp+256], xmm12
-	movaps	XMMWORD PTR [rsp+272], xmm13
-	movaps	XMMWORD PTR [rsp+288], xmm14
-	movaps	XMMWORD PTR [rsp+304], xmm15
-	movd	xmm7, rax
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-
-	movaps xmm1, XMMWORD PTR [rdx+96]
-	movaps xmm2, XMMWORD PTR [r8+96]
-	movaps XMMWORD PTR [rsp], xmm1
-	movaps XMMWORD PTR [rsp+16], xmm2
-
-	mov	r8d, r15d
-	punpcklqdq xmm7, xmm0
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+56]
-	xor	rcx, QWORD PTR [rdx+24]
-	movd	xmm9, rax
-	mov	QWORD PTR [rsp+128], rsi
-	mov	rax, QWORD PTR [rdx+48]
-	xor	rax, QWORD PTR [rdx+16]
-	punpcklqdq xmm9, xmm0
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [rdx+88]
-	xor	rcx, QWORD PTR [rdx+72]
-	movd	xmm8, rax
-	mov	QWORD PTR [rsp+136], rdi
-	mov	rax, QWORD PTR [rdx+80]
-	xor	rax, QWORD PTR [rdx+64]
-	punpcklqdq xmm8, xmm0
-	and	r8d, 2097136
-	movd	xmm0, rcx
-	mov	r11d, 524288
-	movd	xmm10, rax
-	punpcklqdq xmm10, xmm0
-	
-	movd xmm14, QWORD PTR [rsp+128]
-	movd xmm15, QWORD PTR [rsp+136]
-
-	ALIGN(64)
-CryptonightWOW_template_double_mainloop:
-	movdqu	xmm6, XMMWORD PTR [rbx+rsi]
-	movd	xmm0, r12
-	mov	ecx, ebx
-	movd	xmm3, r14
-	punpcklqdq xmm3, xmm0
-	xor	ebx, 16
-	aesenc	xmm6, xmm3
-	movd	rdx, xmm6
-	movd	xmm4, r15
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	xor	ebx, 48
-	paddq	xmm0, xmm7
-	movdqu	xmm1, XMMWORD PTR [rbx+rsi]
-	movdqu	XMMWORD PTR [rbx+rsi], xmm0
-	paddq	xmm1, xmm3
-	xor	ebx, 16
-	mov	eax, ebx
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbx+rsi]
-	movdqu	XMMWORD PTR [rbx+rsi], xmm1
-	paddq	xmm0, xmm9
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-	movdqa	xmm0, xmm6
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [rcx+rsi], xmm0
-	mov	esi, edx
-	movdqu	xmm5, XMMWORD PTR [r8+rdi]
-	and	esi, 2097136
-	mov	ecx, r8d
-	movd	xmm0, r13
-	punpcklqdq xmm4, xmm0
-	xor	r8d, 16
-	aesenc	xmm5, xmm4
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	xor	r8d, 48
-	paddq	xmm0, xmm8
-	movdqu	xmm1, XMMWORD PTR [r8+rdi]
-	movdqu	XMMWORD PTR [r8+rdi], xmm0
-	paddq	xmm1, xmm4
-	xor	r8d, 16
-	mov	eax, r8d
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [r8+rdi]
-	movdqu	XMMWORD PTR [r8+rdi], xmm1
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rdi], xmm0
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm8
-	movdqu	XMMWORD PTR [rcx+rdi], xmm0
-	movd	rdi, xmm5
-	movd	rcx, xmm14
-	mov	ebp, edi
-	mov	r8, QWORD PTR [rcx+rsi]
-	mov	r10, QWORD PTR [rcx+rsi+8]
-	lea	r9, QWORD PTR [rcx+rsi]
-	xor	esi, 16
-
-	movd xmm0, rsp
-	movd xmm1, rsi
-	movd xmm2, rdi
-	movd xmm11, rbp
-	movd xmm12, r15
-	movd xmm13, rdx
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp+16]
-	mov esi, DWORD PTR [rsp+20]
-	mov edi, DWORD PTR [rsp+24]
-	mov ebp, DWORD PTR [rsp+28]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-	xor r8, rax
-
-	movd esp, xmm3
-	pextrd r15d, xmm3, 2
-	movd eax, xmm7
-	movd edx, xmm9
-	pextrd r9d, xmm9, 2
-
-CryptonightWOW_template_double_part2:
-
-	movd rsp, xmm0
-	mov DWORD PTR [rsp+16], ebx
-	mov DWORD PTR [rsp+20], esi
-	mov DWORD PTR [rsp+24], edi
-	mov DWORD PTR [rsp+28], ebp
-
-	movd rsi, xmm1
-	movd rdi, xmm2
-	movd rbp, xmm11
-	movd r15, xmm12
-	movd rdx, xmm13
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rbx, r8
-	mov	rax, r8
-	mul	rdx
-	and	ebp, 2097136
-	mov	r8, rax
-	movd	xmm1, rdx
-	movd	xmm0, r8
-	punpcklqdq xmm1, xmm0
-	pxor	xmm1, XMMWORD PTR [rcx+rsi]
-	xor	esi, 48
-	paddq	xmm1, xmm7
-	movdqu	xmm2, XMMWORD PTR [rsi+rcx]
-	xor	rdx, QWORD PTR [rsi+rcx]
-	paddq	xmm2, xmm3
-	xor	r8, QWORD PTR [rsi+rcx+8]
-	movdqu	XMMWORD PTR [rsi+rcx], xmm1
-	xor	esi, 16
-	mov	eax, esi
-	mov	rsi, rcx
-	movdqu	xmm0, XMMWORD PTR [rax+rcx]
-	movdqu	XMMWORD PTR [rax+rcx], xmm2
-	paddq	xmm0, xmm9
-	add	r12, r8
-	xor	rax, 32
-	add	r14, rdx
-	movdqa	xmm9, xmm7
-	movdqa	xmm7, xmm6
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	mov	QWORD PTR [r9+8], r12
-	xor	r12, r10
-	mov	QWORD PTR [r9], r14
-	movd rcx, xmm15
-	xor	r14, rbx
-	mov	r10d, ebp
-	mov	ebx, r14d
-	xor	ebp, 16
-	and	ebx, 2097136
-	mov	r8, QWORD PTR [r10+rcx]
-	mov	r9, QWORD PTR [r10+rcx+8]
-
-	movd xmm0, rsp
-	movd xmm1, rbx
-	movd xmm2, rsi
-	movd xmm11, rdi
-	movd xmm12, rbp
-	movd xmm13, r15
-	mov [rsp+104], rcx
-	mov [rsp+112], r9
-
-	mov ebx, DWORD PTR [rsp]
-	mov esi, DWORD PTR [rsp+4]
-	mov edi, DWORD PTR [rsp+8]
-	mov ebp, DWORD PTR [rsp+12]
-
-	lea	eax, [ebx+esi]
-	lea	edx, [edi+ebp]
-	shl rdx, 32
-	or	rax, rdx
-
-	xor r8, rax
-	movd xmm3, r8
-
-	movd esp, xmm4
-	pextrd r15d, xmm4, 2
-	movd eax, xmm8
-	movd edx, xmm10
-	pextrd r9d, xmm10, 2
-
-CryptonightWOW_template_double_part3:
-
-	movd rsp, xmm0
-	mov DWORD PTR [rsp], ebx
-	mov DWORD PTR [rsp+4], esi
-	mov DWORD PTR [rsp+8], edi
-	mov DWORD PTR [rsp+12], ebp
-
-	movd rbx, xmm1
-	movd rsi, xmm2
-	movd rdi, xmm11
-	movd rbp, xmm12
-	movd r15, xmm13
-	mov rcx, [rsp+104]
-	mov r9, [rsp+112]
-
-	mov rax, r8
-	mul	rdi
-	movd	xmm1, rdx
-	movd	xmm0, rax
-	punpcklqdq xmm1, xmm0
-	mov	rdi, rcx
-	mov	r8, rax
-	pxor	xmm1, XMMWORD PTR [rbp+rcx]
-	xor	ebp, 48
-	paddq	xmm1, xmm8
-	xor	r8, QWORD PTR [rbp+rcx+8]
-	xor	rdx, QWORD PTR [rbp+rcx]
-	add	r13, r8
-	movdqu	xmm2, XMMWORD PTR [rbp+rcx]
-	add	r15, rdx
-	movdqu	XMMWORD PTR [rbp+rcx], xmm1
-	paddq	xmm2, xmm4
-	xor	ebp, 16
-	mov	eax, ebp
-	xor	rax, 32
-	movdqu	xmm0, XMMWORD PTR [rbp+rcx]
-	movdqu	XMMWORD PTR [rbp+rcx], xmm2
-	paddq	xmm0, xmm10
-	movdqu	XMMWORD PTR [rax+rcx], xmm0
-	movd rax, xmm3
-	movdqa	xmm10, xmm8
-	mov	QWORD PTR [r10+rcx], r15
-	movdqa	xmm8, xmm5
-	xor	r15, rax
-	mov	QWORD PTR [r10+rcx+8], r13
-	mov	r8d, r15d
-	xor	r13, r9
-	and	r8d, 2097136
-	dec r11d
-	jnz	CryptonightWOW_template_double_mainloop
-
-CryptonightWOW_template_double_part4:
-
-	mov	rbx, QWORD PTR [rsp+400]
-	movaps	xmm6, XMMWORD PTR [rsp+160]
-	movaps	xmm7, XMMWORD PTR [rsp+176]
-	movaps	xmm8, XMMWORD PTR [rsp+192]
-	movaps	xmm9, XMMWORD PTR [rsp+208]
-	movaps	xmm10, XMMWORD PTR [rsp+224]
-	movaps	xmm11, XMMWORD PTR [rsp+240]
-	movaps	xmm12, XMMWORD PTR [rsp+256]
-	movaps	xmm13, XMMWORD PTR [rsp+272]
-	movaps	xmm14, XMMWORD PTR [rsp+288]
-	movaps	xmm15, XMMWORD PTR [rsp+304]
-	add	rsp, 320
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	ret	0
-CryptonightWOW_template_double_end:
diff --git a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc
deleted file mode 100644
index 85077a20..00000000
--- a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc
+++ /dev/null
@@ -1,413 +0,0 @@
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	rax, rsp
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 184
-
-	stmxcsr DWORD PTR [rsp+272]
-	mov DWORD PTR [rsp+276], 24448
-	ldmxcsr DWORD PTR [rsp+276]
-
-	mov	r13, QWORD PTR [rcx+224]
-	mov	r9, rdx
-	mov	r10, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r10, QWORD PTR [rcx]
-	mov	r14d, 524288
-	mov	r11, QWORD PTR [rcx+40]
-	xor	r11, QWORD PTR [rcx+8]
-	mov	rsi, QWORD PTR [rdx+224]
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	rdi, QWORD PTR [r9+32]
-	xor	rdi, QWORD PTR [r9]
-	mov	rbp, QWORD PTR [r9+40]
-	xor	rbp, QWORD PTR [r9+8]
-	movd	xmm0, rdx
-	movaps	XMMWORD PTR [rax-88], xmm6
-	movaps	XMMWORD PTR [rax-104], xmm7
-	movaps	XMMWORD PTR [rax-120], xmm8
-	movaps	XMMWORD PTR [rsp+112], xmm9
-	movaps	XMMWORD PTR [rsp+96], xmm10
-	movaps	XMMWORD PTR [rsp+80], xmm11
-	movaps	XMMWORD PTR [rsp+64], xmm12
-	movaps	XMMWORD PTR [rsp+48], xmm13
-	movaps	XMMWORD PTR [rsp+32], xmm14
-	movaps	XMMWORD PTR [rsp+16], xmm15
-	mov	rdx, r10
-	movd	xmm4, QWORD PTR [r8+96]
-	and	edx, 2097136
-	mov	rax, QWORD PTR [rcx+48]
-	xorps	xmm13, xmm13
-	xor	rax, QWORD PTR [rcx+16]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r8+72]
-	movd	xmm5, QWORD PTR [r8+104]
-	movd	xmm7, rax
-
-	mov eax, 1
-	shl rax, 52
-	movd xmm14, rax
-	punpcklqdq xmm14, xmm14
-
-	mov eax, 1023
-	shl rax, 52
-	movd xmm12, rax
-	punpcklqdq xmm12, xmm12
-
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-	punpcklqdq xmm7, xmm0
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [r9+56]
-	xor	rcx, QWORD PTR [r9+24]
-	movd	xmm3, rax
-	mov	rax, QWORD PTR [r9+48]
-	xor	rax, QWORD PTR [r9+16]
-	punpcklqdq xmm3, xmm0
-	movd	xmm0, rcx
-	mov	QWORD PTR [rsp], r13
-	mov	rcx, QWORD PTR [r9+88]
-	xor	rcx, QWORD PTR [r9+72]
-	movd	xmm6, rax
-	mov	rax, QWORD PTR [r9+80]
-	xor	rax, QWORD PTR [r9+64]
-	punpcklqdq xmm6, xmm0
-	movd	xmm0, rcx
-	mov	QWORD PTR [rsp+256], r10
-	mov	rcx, rdi
-	mov	QWORD PTR [rsp+264], r11
-	movd	xmm8, rax
-	and	ecx, 2097136
-	punpcklqdq xmm8, xmm0
-	movd	xmm0, QWORD PTR [r9+96]
-	punpcklqdq xmm4, xmm0
-	movd	xmm0, QWORD PTR [r9+104]
-	lea	r8, QWORD PTR [rcx+rsi]
-	movdqu	xmm11, XMMWORD PTR [r8]
-	punpcklqdq xmm5, xmm0
-	lea	r9, QWORD PTR [rdx+r13]
-	movdqu	xmm15, XMMWORD PTR [r9]
-
-	ALIGN(64)
-main_loop_double_sandybridge:
-	movdqu	xmm9, xmm15
-	mov eax, edx
-	mov ebx, edx
-	xor eax, 16
-	xor ebx, 32
-	xor edx, 48
-
-	movd	xmm0, r11
-	movd	xmm2, r10
-	punpcklqdq xmm2, xmm0
-	aesenc	xmm9, xmm2
-
-	movdqu	xmm0, XMMWORD PTR [rax+r13]
-	movdqu	xmm1, XMMWORD PTR [rbx+r13]
-	paddq	xmm0, xmm7
-	paddq	xmm1, xmm2
-	movdqu	XMMWORD PTR [rbx+r13], xmm0
-	movdqu	xmm0, XMMWORD PTR [rdx+r13]
-	movdqu	XMMWORD PTR [rdx+r13], xmm1
-	paddq	xmm0, xmm3
-	movdqu	XMMWORD PTR [rax+r13], xmm0
-
-	movd	r11, xmm9
-	mov	edx, r11d
-	and	edx, 2097136
-	movdqa	xmm0, xmm9
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [r9], xmm0
-
-	lea	rbx, QWORD PTR [rdx+r13]
-	mov	r10, QWORD PTR [rdx+r13]
-
-	movdqu	xmm10, xmm11
-	movd	xmm0, rbp
-	movd	xmm11, rdi
-	punpcklqdq xmm11, xmm0
-	aesenc	xmm10, xmm11
-
-	mov eax, ecx
-	mov r12d, ecx
-	xor eax, 16
-	xor r12d, 32
-	xor ecx, 48
-
-	movdqu	xmm0, XMMWORD PTR [rax+rsi]
-	paddq	xmm0, xmm6
-	movdqu	xmm1, XMMWORD PTR [r12+rsi]
-	movdqu	XMMWORD PTR [r12+rsi], xmm0
-	paddq	xmm1, xmm11
-	movdqu	xmm0, XMMWORD PTR [rcx+rsi]
-	movdqu	XMMWORD PTR [rcx+rsi], xmm1
-	paddq	xmm0, xmm8
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-
-	movd	rcx, xmm10
-	and	ecx, 2097136
-
-	movdqa	xmm0, xmm10
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [r8], xmm0
-	mov r12, QWORD PTR [rcx+rsi]
-
-	mov	r9, QWORD PTR [rbx+8]
-
-	xor edx, 16
-	mov r8d, edx
-	mov r15d, edx
-
-	movd	rdx, xmm5
-	shl	rdx, 32
-	movd	rax, xmm4
-	xor	rdx, rax
-	xor	r10, rdx
-	mov	rax, r10
-	mul	r11
-	mov r11d, r8d
-	xor r11d, 48
-	movd xmm0, rdx
-	xor rdx, [r11+r13]
-	movd xmm1, rax
-	xor rax, [r11+r13+8]
-	punpcklqdq xmm0, xmm1
-
-	pxor xmm0, XMMWORD PTR [r8+r13]
-	xor	r8d, 32
-	movdqu	xmm1, XMMWORD PTR [r11+r13]
-	paddq	xmm0, xmm7
-	paddq	xmm1, xmm2
-	movdqu	XMMWORD PTR [r11+r13], xmm0
-	movdqu	xmm0, XMMWORD PTR [r8+r13]
-	movdqu	XMMWORD PTR [r8+r13], xmm1
-	paddq	xmm0, xmm3
-	movdqu	XMMWORD PTR [r15+r13], xmm0
-
-	mov	r11, QWORD PTR [rsp+256]
-	add	r11, rdx
-	mov	rdx, QWORD PTR [rsp+264]
-	add	rdx, rax
-	mov	QWORD PTR [rbx], r11
-	xor	r11, r10
-	mov	QWORD PTR [rbx+8], rdx
-	xor	rdx, r9
-	mov	QWORD PTR [rsp+256], r11
-	and	r11d, 2097136
-	mov	QWORD PTR [rsp+264], rdx
-	mov	QWORD PTR [rsp+8], r11
-	lea	r15, QWORD PTR [r11+r13]
-	movdqu xmm15, XMMWORD PTR [r11+r13]
-	lea	r13, QWORD PTR [rsi+rcx]
-	movdqa	xmm0, xmm5
-	psrldq	xmm0, 8
-	movaps	xmm2, xmm13
-	movd	r10, xmm0
-	psllq	xmm5, 1
-	shl	r10, 32
-	movdqa	xmm0, xmm9
-	psrldq	xmm0, 8
-	movdqa	xmm1, xmm10
-	movd	r11, xmm0
-	psrldq	xmm1, 8
-	movd	r8, xmm1
-	psrldq	xmm4, 8
-	movaps	xmm0, xmm13
-	movd	rax, xmm4
-	xor	r10, rax
-	movaps	xmm1, xmm13
-	xor	r10, r12
-	lea	rax, QWORD PTR [r11+1]
-	shr	rax, 1
-	movdqa	xmm3, xmm9
-	punpcklqdq xmm3, xmm10
-	paddq	xmm5, xmm3
-	movd	rdx, xmm5
-	psrldq	xmm5, 8
-	cvtsi2sd xmm2, rax
-	or	edx, -2147483647
-	lea	rax, QWORD PTR [r8+1]
-	shr	rax, 1
-	movd	r9, xmm5
-	cvtsi2sd xmm0, rax
-	or	r9d, -2147483647
-	cvtsi2sd xmm1, rdx
-	unpcklpd xmm2, xmm0
-	movaps	xmm0, xmm13
-	cvtsi2sd xmm0, r9
-	unpcklpd xmm1, xmm0
-	divpd	xmm2, xmm1
-	paddq	xmm2, xmm14
-	cvttsd2si rax, xmm2
-	psrldq	xmm2, 8
-	mov	rbx, rax
-	imul	rax, rdx
-	sub	r11, rax
-	js	div_fix_1_sandybridge
-div_fix_1_ret_sandybridge:
-
-	cvttsd2si rdx, xmm2
-	mov	rax, rdx
-	imul	rax, r9
-	movd	xmm2, r11d
-	movd	xmm4, ebx
-	sub	r8, rax
-	js	div_fix_2_sandybridge
-div_fix_2_ret_sandybridge:
-
-	movd	xmm1, r8d
-	movd	xmm0, edx
-	punpckldq xmm2, xmm1
-	punpckldq xmm4, xmm0
-	punpckldq xmm4, xmm2
-	paddq	xmm3, xmm4
-	movdqa	xmm0, xmm3
-	psrlq	xmm0, 12
-	paddq	xmm0, xmm12
-	sqrtpd	xmm1, xmm0
-	movd	r9, xmm1
-	movdqa xmm5, xmm1
-	psrlq xmm5, 19
-	test	r9, 524287
-	je	sqrt_fix_1_sandybridge
-sqrt_fix_1_ret_sandybridge:
-
-	movd r9, xmm10
-	psrldq	xmm1, 8
-	movd	r8, xmm1
-	test	r8, 524287
-	je	sqrt_fix_2_sandybridge
-sqrt_fix_2_ret_sandybridge:
-
-	mov r12d, ecx
-	mov r8d, ecx
-	xor r12d, 16
-	xor r8d, 32
-	xor ecx, 48
-	mov	rax, r10
-	mul	r9
-	movd xmm0, rax
-	movd xmm3, rdx
-	punpcklqdq xmm3, xmm0
-
-	movdqu	xmm0, XMMWORD PTR [r12+rsi]
-	pxor xmm0, xmm3
-	movdqu	xmm1, XMMWORD PTR [r8+rsi]
-	xor rdx, [r8+rsi]
-	xor rax, [r8+rsi+8]
-	movdqu	xmm3, XMMWORD PTR [rcx+rsi]
-	paddq	xmm0, xmm6
-	paddq	xmm1, xmm11
-	paddq	xmm3, xmm8
-	movdqu	XMMWORD PTR [r8+rsi], xmm0
-	movdqu	XMMWORD PTR [rcx+rsi], xmm1
-	movdqu	XMMWORD PTR [r12+rsi], xmm3
-
-	add	rdi, rdx
-	mov	QWORD PTR [r13], rdi
-	xor	rdi, r10
-	mov	ecx, edi
-	and	ecx, 2097136
-	lea	r8, QWORD PTR [rcx+rsi]
-
-	mov rdx, QWORD PTR [r13+8]	
-	add	rbp, rax
-	mov	QWORD PTR [r13+8], rbp
-	movdqu xmm11, XMMWORD PTR [rcx+rsi]
-	xor	rbp, rdx
-	mov	r13, QWORD PTR [rsp]
-	movdqa	xmm3, xmm7
-	mov	rdx, QWORD PTR [rsp+8]
-	movdqa	xmm8, xmm6
-	mov	r10, QWORD PTR [rsp+256]
-	movdqa	xmm7, xmm9
-	mov	r11, QWORD PTR [rsp+264]
-	movdqa	xmm6, xmm10
-	mov	r9, r15
-	dec r14d
-	jne	main_loop_double_sandybridge
-
-	ldmxcsr DWORD PTR [rsp+272]
-	movaps	xmm13, XMMWORD PTR [rsp+48]
-	lea	r11, QWORD PTR [rsp+184]
-	movaps	xmm6, XMMWORD PTR [r11-24]
-	movaps	xmm7, XMMWORD PTR [r11-40]
-	movaps	xmm8, XMMWORD PTR [r11-56]
-	movaps	xmm9, XMMWORD PTR [r11-72]
-	movaps	xmm10, XMMWORD PTR [r11-88]
-	movaps	xmm11, XMMWORD PTR [r11-104]
-	movaps	xmm12, XMMWORD PTR [r11-120]
-	movaps	xmm14, XMMWORD PTR [rsp+32]
-	movaps	xmm15, XMMWORD PTR [rsp+16]
-	mov	rsp, r11
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	jmp cnv2_double_mainloop_asm_sandybridge_endp
-
-div_fix_1_sandybridge:
-	dec	rbx
-	add	r11, rdx
-	jmp	div_fix_1_ret_sandybridge
-
-div_fix_2_sandybridge:
-	dec	rdx
-	add	r8, r9
-	jmp	div_fix_2_ret_sandybridge
-
-sqrt_fix_1_sandybridge:
-	movd	r8, xmm3
-	movdqa xmm0, xmm5
-	psrldq xmm0, 8
-	dec	r9
-	mov r11d, -1022
-	shl r11, 32
-	mov	rax, r9
-	shr	r9, 19
-	shr	rax, 20
-	mov	rdx, r9
-	sub	rdx, rax
-	lea	rdx, [rdx+r11+1]
-	add	rax, r11
-	imul	rdx, rax
-	sub	rdx, r8
-	adc	r9, 0
-	movd xmm5, r9
-	punpcklqdq xmm5, xmm0
-	jmp	sqrt_fix_1_ret_sandybridge
-
-sqrt_fix_2_sandybridge:
-	psrldq	xmm3, 8
-	movd	r11, xmm3
-	dec	r8
-	mov ebx, -1022
-	shl rbx, 32
-	mov	rax, r8
-	shr	r8, 19
-	shr	rax, 20
-	mov	rdx, r8
-	sub	rdx, rax
-	lea	rdx, [rdx+rbx+1]
-	add	rax, rbx
-	imul	rdx, rax
-	sub	rdx, r11
-	adc	r8, 0
-	movd xmm0, r8
-	punpcklqdq xmm5, xmm0
-	jmp	sqrt_fix_2_ret_sandybridge
-
-cnv2_double_mainloop_asm_sandybridge_endp:
diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc
deleted file mode 100644
index f17017a0..00000000
--- a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc
+++ /dev/null
@@ -1,182 +0,0 @@
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 64
-
-	stmxcsr DWORD PTR [rsp]
-	mov DWORD PTR [rsp+4], 24448
-	ldmxcsr DWORD PTR [rsp+4]
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r9, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	ebp, 524288
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r11, QWORD PTR [rcx+40]
-	mov	r10, r8
-	mov	rdx, QWORD PTR [rcx+56]
-	movd	xmm3, rax
-	xor	rdx, QWORD PTR [rcx+24]
-	xor	r11, QWORD PTR [rcx+8]
-	mov	rbx, QWORD PTR [rcx+224]
-	mov	rax, QWORD PTR [r9+80]
-	xor	rax, QWORD PTR [r9+64]
-	movd	xmm0, rdx
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r9+72]
-	mov	rdi, QWORD PTR [r9+104]
-	and	r10d, 2097136
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movd	xmm4, rax
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	xorps	xmm8, xmm8
-	mov ax, 1023
-	shl rax, 52
-	movd xmm7, rax
-	mov	r15, QWORD PTR [r9+96]
-	punpcklqdq xmm3, xmm0
-	movd	xmm0, rcx
-	punpcklqdq xmm4, xmm0
-
-	ALIGN(64)
-cnv2_main_loop_bulldozer:
-	movdqa	xmm5, XMMWORD PTR [r10+rbx]
-	movd xmm6, r8
-	pinsrq xmm6, r11, 1
-	lea	rdx, QWORD PTR [r10+rbx]
-	lea	r9, QWORD PTR [rdi+rdi]
-	shl	rdi, 32
-
-	mov	ecx, r10d
-	mov	eax, r10d
-	xor	ecx, 16
-	xor	eax, 32
-	xor	r10d, 48
-	aesenc	xmm5, xmm6
-	movdqa	xmm2, XMMWORD PTR [rcx+rbx]
-	movdqa	xmm1, XMMWORD PTR [rax+rbx]
-	movdqa	xmm0, XMMWORD PTR [r10+rbx]
-	paddq	xmm2, xmm3
-	paddq	xmm1, xmm6
-	paddq	xmm0, xmm4
-	movdqa	XMMWORD PTR [rcx+rbx], xmm0
-	movdqa	XMMWORD PTR [rax+rbx], xmm2
-	movdqa	XMMWORD PTR [r10+rbx], xmm1
-
-	movaps	xmm1, xmm8
-	mov	rsi, r15
-	xor	rsi, rdi
-
-	mov edi, 1023
-	shl rdi, 52
-
-	movd	r14, xmm5
-	pextrq rax, xmm5, 1
-
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm3
-	mov	r10, r14
-	and	r10d, 2097136
-	movdqa	XMMWORD PTR [rdx], xmm0
-	xor	rsi, QWORD PTR [r10+rbx]
-	lea	r12, QWORD PTR [r10+rbx]
-	mov	r13, QWORD PTR [r10+rbx+8]
-
-	add	r9d, r14d
-	or	r9d, -2147483647
-	xor	edx, edx
-	div	r9
-	mov	eax, eax
-	shl	rdx, 32
-	lea	r15, [rax+rdx]
-	lea	rax, [r14+r15]
-	shr	rax, 12
-	add	rax, rdi
-	movd	xmm0, rax
-	sqrtsd	xmm1, xmm0
-	movd	rdi, xmm1
-	test	rdi, 524287
-	je	sqrt_fixup_bulldozer
-	shr	rdi, 19
-
-sqrt_fixup_bulldozer_ret:
-	mov	rax, rsi
-	mul	r14
-	movd xmm1, rax
-	movd xmm0, rdx
-	punpcklqdq xmm0, xmm1
-
-	mov	r9d, r10d
-	mov	ecx, r10d
-	xor	r9d, 16
-	xor	ecx, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [rcx+rbx]
-	xor rdx, [rcx+rbx]
-	xor rax, [rcx+rbx+8]
-	movdqa	xmm2, XMMWORD PTR [r9+rbx]
-	pxor xmm2, xmm0
-	paddq xmm4, XMMWORD PTR [r10+rbx]
-	paddq	xmm2, xmm3
-	paddq	xmm1, xmm6
-	movdqa	XMMWORD PTR [r9+rbx], xmm4
-	movdqa	XMMWORD PTR [rcx+rbx], xmm2
-	movdqa	XMMWORD PTR [r10+rbx], xmm1
-
-	movdqa	xmm4, xmm3
-	add	r8, rdx
-	add	r11, rax
-	mov	QWORD PTR [r12], r8
-	xor	r8, rsi
-	mov	QWORD PTR [r12+8], r11
-	mov	r10, r8
-	xor	r11, r13
-	and	r10d, 2097136
-	movdqa	xmm3, xmm5
-	dec	ebp
-	jne	cnv2_main_loop_bulldozer
-
-	ldmxcsr DWORD PTR [rsp]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	lea	r11, QWORD PTR [rsp+64]
-	mov	rbx, QWORD PTR [r11+56]
-	mov	rbp, QWORD PTR [r11+64]
-	mov	rsi, QWORD PTR [r11+72]
-	movaps	xmm8, XMMWORD PTR [r11-48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	mov	rsp, r11
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	jmp cnv2_main_loop_bulldozer_endp
-
-sqrt_fixup_bulldozer:
-	movd r9, xmm5
-	add r9, r15
-	dec	rdi
-	mov edx, -1022
-	shl rdx, 32
-	mov	rax, rdi
-	shr	rdi, 19
-	shr	rax, 20
-	mov	rcx, rdi
-	sub	rcx, rax
-	lea	rcx, [rcx+rdx+1]
-	add	rax, rdx
-	imul	rcx, rax
-	sub	rcx, r9
-	adc	rdi, 0
-	jmp	sqrt_fixup_bulldozer_ret
-
-cnv2_main_loop_bulldozer_endp:
diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc
deleted file mode 100644
index a12ac35c..00000000
--- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc
+++ /dev/null
@@ -1,188 +0,0 @@
-	mov	rcx, [rcx]
-
-	mov	 QWORD PTR [rsp+24], rbx
-	push	 rbp
-	push	 rsi
-	push	 rdi
-	push	 r12
-	push	 r13
-	push	 r14
-	push	 r15
-	sub	 rsp, 80
-
-	stmxcsr DWORD PTR [rsp]
-	mov DWORD PTR [rsp+4], 24448
-	ldmxcsr DWORD PTR [rsp+4]
-
-	mov	 rax, QWORD PTR [rcx+48]
-	mov	 r9, rcx
-	xor	 rax, QWORD PTR [rcx+16]
-	mov	 esi, 524288
-	mov	 r8, QWORD PTR [rcx+32]
-	mov	 r13d, -2147483647
-	xor	 r8, QWORD PTR [rcx]
-	mov	 r11, QWORD PTR [rcx+40]
-	mov	 r10, r8
-	mov	 rdx, QWORD PTR [rcx+56]
-	movd	 xmm4, rax
-	xor	 rdx, QWORD PTR [rcx+24]
-	xor	 r11, QWORD PTR [rcx+8]
-	mov	 rbx, QWORD PTR [rcx+224]
-	mov	 rax, QWORD PTR [r9+80]
-	xor	 rax, QWORD PTR [r9+64]
-	movd	 xmm0, rdx
-	mov	 rcx, QWORD PTR [rcx+88]
-	xor	 rcx, QWORD PTR [r9+72]
-	movd	 xmm3, QWORD PTR [r9+104]
-	movaps	 XMMWORD PTR [rsp+64], xmm6
-	movaps	 XMMWORD PTR [rsp+48], xmm7
-	movaps	 XMMWORD PTR [rsp+32], xmm8
-	and	 r10d, 2097136
-	movd	 xmm5, rax
-
-	xor eax, eax
-	mov QWORD PTR [rsp+16], rax
-
-	mov ax, 1023
-	shl rax, 52
-	movd xmm8, rax
-	mov r15, QWORD PTR [r9+96]
-	punpcklqdq xmm4, xmm0
-	movd	 xmm0, rcx
-	punpcklqdq xmm5, xmm0
-	movdqu	 xmm6, XMMWORD PTR [r10+rbx]
-
-	ALIGN(64)
-main_loop_ivybridge:
-	lea	 rdx, QWORD PTR [r10+rbx]
-	mov	 ecx, r10d
-	mov	 eax, r10d
-	mov rdi, r15
-	xor	 ecx, 16
-	xor	 eax, 32
-	xor	 r10d, 48
-	movd	 xmm0, r11
-	movd	 xmm7, r8
-	punpcklqdq xmm7, xmm0
-	aesenc	 xmm6, xmm7
-	movd	 rbp, xmm6
-	mov	 r9, rbp
-	and	 r9d, 2097136
-	movdqu	 xmm2, XMMWORD PTR [rcx+rbx]
-	movdqu	 xmm1, XMMWORD PTR [rax+rbx]
-	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
-	paddq	 xmm1, xmm7
-	paddq	 xmm0, xmm5
-	paddq	 xmm2, xmm4
-	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
-	movdqu	 XMMWORD PTR [rax+rbx], xmm2
-	movdqu	 XMMWORD PTR [r10+rbx], xmm1
-	mov r10, r9
-	xor r10d, 32
-	movd	 rcx, xmm3
-	mov	 rax, rcx
-	shl	 rax, 32
-	xor	 rdi, rax
-	movdqa	 xmm0, xmm6
-	pxor	 xmm0, xmm4
-	movdqu	 XMMWORD PTR [rdx], xmm0
-	xor	 rdi, QWORD PTR [r9+rbx]
-	lea	 r14, QWORD PTR [r9+rbx]
-	mov	 r12, QWORD PTR [r14+8]
-	xor	 edx, edx
-	lea	 r9d, DWORD PTR [ecx+ecx]
-	add	 r9d, ebp
-	movdqa	 xmm0, xmm6
-	psrldq	 xmm0, 8
-	or	 r9d, r13d
-	movd	 rax, xmm0
-	div	 r9
-	xorps xmm3, xmm3
-	mov	 eax, eax
-	shl	 rdx, 32
-	add	 rdx, rax
-	lea	 r9, QWORD PTR [rdx+rbp]
-	mov r15, rdx
-	mov	 rax, r9
-	shr	 rax, 12
-	movd	 xmm0, rax
-	paddq	 xmm0, xmm8
-	sqrtsd	 xmm3, xmm0
-	psubq	 xmm3, XMMWORD PTR [rsp+16]
-	movd	 rdx, xmm3
-	test	 edx, 524287
-	je	 sqrt_fixup_ivybridge
-	psrlq	 xmm3, 19
-sqrt_fixup_ivybridge_ret:
-
-	mov	 ecx, r10d
-	mov	 rax, rdi
-	mul	 rbp
-	movd xmm2, rdx
-	xor rdx, [rcx+rbx]
-	add	 r8, rdx
-	mov	 QWORD PTR [r14], r8
-	xor	 r8, rdi
-	mov edi, r8d
-	and edi, 2097136
-	movd xmm0, rax
-	xor rax, [rcx+rbx+8]
-	add	 r11, rax
-	mov	 QWORD PTR [r14+8], r11
-	punpcklqdq xmm2, xmm0
-
-	mov	 r9d, r10d
-	xor	 r9d, 48
-	xor	 r10d, 16
-	pxor	 xmm2, XMMWORD PTR [r9+rbx]
-	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
-	paddq	 xmm0, xmm5
-	movdqu	 xmm1, XMMWORD PTR [rcx+rbx]
-	paddq	 xmm2, xmm4
-	paddq	 xmm1, xmm7
-	movdqa	 xmm5, xmm4
-	movdqu	 XMMWORD PTR [r9+rbx], xmm0
-	movdqa	 xmm4, xmm6
-	movdqu	 XMMWORD PTR [rcx+rbx], xmm2
-	movdqu	 XMMWORD PTR [r10+rbx], xmm1
-	movdqu xmm6, [rdi+rbx]
-	mov	 r10d, edi
-	xor	 r11, r12
-	dec rsi
-	jne	 main_loop_ivybridge
-
-	ldmxcsr DWORD PTR [rsp]
-	mov	 rbx, QWORD PTR [rsp+160]
-	movaps	 xmm6, XMMWORD PTR [rsp+64]
-	movaps	 xmm7, XMMWORD PTR [rsp+48]
-	movaps	 xmm8, XMMWORD PTR [rsp+32]
-	add	 rsp, 80
-	pop	 r15
-	pop	 r14
-	pop	 r13
-	pop	 r12
-	pop	 rdi
-	pop	 rsi
-	pop	 rbp
-	jmp cnv2_main_loop_ivybridge_endp
-
-sqrt_fixup_ivybridge:
-	dec	 rdx
-	mov r13d, -1022
-	shl r13, 32
-	mov	 rax, rdx
-	shr	 rdx, 19
-	shr	 rax, 20
-	mov	 rcx, rdx
-	sub	 rcx, rax
-	add	 rax, r13
-	not r13
-	sub	 rcx, r13
-	mov	 r13d, -2147483647
-	imul	 rcx, rax
-	sub	 rcx, r9
-	adc	 rdx, 0
-	movd	 xmm3, rdx
-	jmp	 sqrt_fixup_ivybridge_ret
-
-cnv2_main_loop_ivybridge_endp:
diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc
deleted file mode 100644
index 044235d8..00000000
--- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc
+++ /dev/null
@@ -1,181 +0,0 @@
-	mov	rcx, [rcx]
-
-	mov	QWORD PTR [rsp+16], rbx
-	mov	QWORD PTR [rsp+24], rbp
-	mov	QWORD PTR [rsp+32], rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 64
-
-	stmxcsr DWORD PTR [rsp]
-	mov DWORD PTR [rsp+4], 24448
-	ldmxcsr DWORD PTR [rsp+4]
-
-	mov	rax, QWORD PTR [rcx+48]
-	mov	r9, rcx
-	xor	rax, QWORD PTR [rcx+16]
-	mov	ebp, 524288
-	mov	r8, QWORD PTR [rcx+32]
-	xor	r8, QWORD PTR [rcx]
-	mov	r11, QWORD PTR [rcx+40]
-	mov	r10, r8
-	mov	rdx, QWORD PTR [rcx+56]
-	movd	xmm3, rax
-	xor	rdx, QWORD PTR [rcx+24]
-	xor	r11, QWORD PTR [rcx+8]
-	mov	rbx, QWORD PTR [rcx+224]
-	mov	rax, QWORD PTR [r9+80]
-	xor	rax, QWORD PTR [r9+64]
-	movd	xmm0, rdx
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r9+72]
-	mov	rdi, QWORD PTR [r9+104]
-	and	r10d, 2097136
-	movaps	XMMWORD PTR [rsp+48], xmm6
-	movd	xmm4, rax
-	movaps	XMMWORD PTR [rsp+32], xmm7
-	movaps	XMMWORD PTR [rsp+16], xmm8
-	xorps	xmm8, xmm8
-	mov ax, 1023
-	shl rax, 52
-	movd xmm7, rax
-	mov	r15, QWORD PTR [r9+96]
-	punpcklqdq xmm3, xmm0
-	movd	xmm0, rcx
-	punpcklqdq xmm4, xmm0
-
-	ALIGN(64)
-main_loop_ryzen:
-	movdqa	xmm5, XMMWORD PTR [r10+rbx]
-	movd	xmm0, r11
-	movd	xmm6, r8
-	punpcklqdq xmm6, xmm0
-	lea	rdx, QWORD PTR [r10+rbx]
-	lea	r9, QWORD PTR [rdi+rdi]
-	shl	rdi, 32
-
-	mov	ecx, r10d
-	mov	eax, r10d
-	xor	ecx, 16
-	xor	eax, 32
-	xor	r10d, 48
-	aesenc	xmm5, xmm6
-	movdqa	xmm2, XMMWORD PTR [rcx+rbx]
-	movdqa	xmm1, XMMWORD PTR [rax+rbx]
-	movdqa	xmm0, XMMWORD PTR [r10+rbx]
-	paddq	xmm2, xmm3
-	paddq	xmm1, xmm6
-	paddq	xmm0, xmm4
-	movdqa	XMMWORD PTR [rcx+rbx], xmm0
-	movdqa	XMMWORD PTR [rax+rbx], xmm2
-	movdqa	XMMWORD PTR [r10+rbx], xmm1
-
-	movaps	xmm1, xmm8
-	mov	rsi, r15
-	xor	rsi, rdi
-	movd	r14, xmm5
-	movdqa	xmm0, xmm5
-	pxor	xmm0, xmm3
-	mov	r10, r14
-	and	r10d, 2097136
-	movdqa	XMMWORD PTR [rdx], xmm0
-	xor	rsi, QWORD PTR [r10+rbx]
-	lea	r12, QWORD PTR [r10+rbx]
-	mov	r13, QWORD PTR [r10+rbx+8]
-
-	add	r9d, r14d
-	or	r9d, -2147483647
-	xor	edx, edx
-	movdqa	xmm0, xmm5
-	psrldq	xmm0, 8
-	movd	rax, xmm0
-
-	div	r9
-	movd xmm0, rax
-	movd xmm1, rdx
-	punpckldq xmm0, xmm1
-	movd r15, xmm0
-	paddq xmm0, xmm5
-	movdqa xmm2, xmm0
-	psrlq xmm0, 12
-	paddq	xmm0, xmm7
-	sqrtsd	xmm1, xmm0
-	movd	rdi, xmm1
-	test	rdi, 524287
-	je	sqrt_fixup_ryzen
-	shr	rdi, 19
-
-sqrt_fixup_ryzen_ret:
-	mov	rax, rsi
-	mul	r14
-	movd xmm1, rax
-	movd xmm0, rdx
-	punpcklqdq xmm0, xmm1
-
-	mov	r9d, r10d
-	mov	ecx, r10d
-	xor	r9d, 16
-	xor	ecx, 32
-	xor	r10d, 48
-	movdqa	xmm1, XMMWORD PTR [rcx+rbx]
-	xor rdx, [rcx+rbx]
-	xor rax, [rcx+rbx+8]
-	movdqa	xmm2, XMMWORD PTR [r9+rbx]
-	pxor xmm2, xmm0
-	paddq xmm4, XMMWORD PTR [r10+rbx]
-	paddq	xmm2, xmm3
-	paddq	xmm1, xmm6
-	movdqa	XMMWORD PTR [r9+rbx], xmm4
-	movdqa	XMMWORD PTR [rcx+rbx], xmm2
-	movdqa	XMMWORD PTR [r10+rbx], xmm1
-
-	movdqa	xmm4, xmm3
-	add	r8, rdx
-	add	r11, rax
-	mov	QWORD PTR [r12], r8
-	xor	r8, rsi
-	mov	QWORD PTR [r12+8], r11
-	mov	r10, r8
-	xor	r11, r13
-	and	r10d, 2097136
-	movdqa	xmm3, xmm5
-	dec	ebp
-	jne	main_loop_ryzen
-
-	ldmxcsr DWORD PTR [rsp]
-	movaps	xmm6, XMMWORD PTR [rsp+48]
-	lea	r11, QWORD PTR [rsp+64]
-	mov	rbx, QWORD PTR [r11+56]
-	mov	rbp, QWORD PTR [r11+64]
-	mov	rsi, QWORD PTR [r11+72]
-	movaps	xmm8, XMMWORD PTR [r11-48]
-	movaps	xmm7, XMMWORD PTR [rsp+32]
-	mov	rsp, r11
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	jmp cnv2_main_loop_ryzen_endp
-
-sqrt_fixup_ryzen:
-	movd r9, xmm2
-	dec	rdi
-	mov edx, -1022
-	shl rdx, 32
-	mov	rax, rdi
-	shr	rdi, 19
-	shr	rax, 20
-	mov	rcx, rdi
-	sub	rcx, rax
-	lea	rcx, [rcx+rdx+1]
-	add	rax, rdx
-	imul	rcx, rax
-	sub	rcx, r9
-	adc	rdi, 0
-	jmp	sqrt_fixup_ryzen_ret
-
-cnv2_main_loop_ryzen_endp:
diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc
deleted file mode 100644
index 97fb691b..00000000
--- a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc
+++ /dev/null
@@ -1,413 +0,0 @@
-	mov	rdx, [rcx+8]
-	mov	rcx, [rcx]
-
-	mov	rax, rsp
-	push	rbx
-	push	rbp
-	push	rsi
-	push	rdi
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	sub	rsp, 184
-
-	stmxcsr DWORD PTR [rsp+272]
-	mov DWORD PTR [rsp+276], 24448
-	ldmxcsr DWORD PTR [rsp+276]
-
-	mov	r13, QWORD PTR [rcx+224]
-	mov	r9, rdx
-	mov	r10, QWORD PTR [rcx+32]
-	mov	r8, rcx
-	xor	r10, QWORD PTR [rcx]
-	mov	r14d, 393216
-	mov	r11, QWORD PTR [rcx+40]
-	xor	r11, QWORD PTR [rcx+8]
-	mov	rsi, QWORD PTR [rdx+224]
-	mov	rdx, QWORD PTR [rcx+56]
-	xor	rdx, QWORD PTR [rcx+24]
-	mov	rdi, QWORD PTR [r9+32]
-	xor	rdi, QWORD PTR [r9]
-	mov	rbp, QWORD PTR [r9+40]
-	xor	rbp, QWORD PTR [r9+8]
-	movd	xmm0, rdx
-	movaps	XMMWORD PTR [rax-88], xmm6
-	movaps	XMMWORD PTR [rax-104], xmm7
-	movaps	XMMWORD PTR [rax-120], xmm8
-	movaps	XMMWORD PTR [rsp+112], xmm9
-	movaps	XMMWORD PTR [rsp+96], xmm10
-	movaps	XMMWORD PTR [rsp+80], xmm11
-	movaps	XMMWORD PTR [rsp+64], xmm12
-	movaps	XMMWORD PTR [rsp+48], xmm13
-	movaps	XMMWORD PTR [rsp+32], xmm14
-	movaps	XMMWORD PTR [rsp+16], xmm15
-	mov	rdx, r10
-	movd	xmm4, QWORD PTR [r8+96]
-	and	edx, 2097136
-	mov	rax, QWORD PTR [rcx+48]
-	xorps	xmm13, xmm13
-	xor	rax, QWORD PTR [rcx+16]
-	mov	rcx, QWORD PTR [rcx+88]
-	xor	rcx, QWORD PTR [r8+72]
-	movd	xmm5, QWORD PTR [r8+104]
-	movd	xmm7, rax
-
-	mov eax, 1
-	shl rax, 52
-	movd xmm14, rax
-	punpcklqdq xmm14, xmm14
-
-	mov eax, 1023
-	shl rax, 52
-	movd xmm12, rax
-	punpcklqdq xmm12, xmm12
-
-	mov	rax, QWORD PTR [r8+80]
-	xor	rax, QWORD PTR [r8+64]
-	punpcklqdq xmm7, xmm0
-	movd	xmm0, rcx
-	mov	rcx, QWORD PTR [r9+56]
-	xor	rcx, QWORD PTR [r9+24]
-	movd	xmm3, rax
-	mov	rax, QWORD PTR [r9+48]
-	xor	rax, QWORD PTR [r9+16]
-	punpcklqdq xmm3, xmm0
-	movd	xmm0, rcx
-	mov	QWORD PTR [rsp], r13
-	mov	rcx, QWORD PTR [r9+88]
-	xor	rcx, QWORD PTR [r9+72]
-	movd	xmm6, rax
-	mov	rax, QWORD PTR [r9+80]
-	xor	rax, QWORD PTR [r9+64]
-	punpcklqdq xmm6, xmm0
-	movd	xmm0, rcx
-	mov	QWORD PTR [rsp+256], r10
-	mov	rcx, rdi
-	mov	QWORD PTR [rsp+264], r11
-	movd	xmm8, rax
-	and	ecx, 2097136
-	punpcklqdq xmm8, xmm0
-	movd	xmm0, QWORD PTR [r9+96]
-	punpcklqdq xmm4, xmm0
-	movd	xmm0, QWORD PTR [r9+104]
-	lea	r8, QWORD PTR [rcx+rsi]
-	movdqu	xmm11, XMMWORD PTR [r8]
-	punpcklqdq xmm5, xmm0
-	lea	r9, QWORD PTR [rdx+r13]
-	movdqu	xmm15, XMMWORD PTR [r9]
-
-	ALIGN(64)
-rwz_main_loop_double:
-	movdqu	xmm9, xmm15
-	mov eax, edx
-	mov ebx, edx
-	xor eax, 16
-	xor ebx, 32
-	xor edx, 48
-
-	movd	xmm0, r11
-	movd	xmm2, r10
-	punpcklqdq xmm2, xmm0
-	aesenc	xmm9, xmm2
-
-	movdqu	xmm0, XMMWORD PTR [rdx+r13]
-	movdqu	xmm1, XMMWORD PTR [rbx+r13]
-	paddq	xmm0, xmm7
-	paddq	xmm1, xmm2
-	movdqu	XMMWORD PTR [rbx+r13], xmm0
-	movdqu	xmm0, XMMWORD PTR [rax+r13]
-	movdqu	XMMWORD PTR [rdx+r13], xmm1
-	paddq	xmm0, xmm3
-	movdqu	XMMWORD PTR [rax+r13], xmm0
-
-	movd	r11, xmm9
-	mov	edx, r11d
-	and	edx, 2097136
-	movdqa	xmm0, xmm9
-	pxor	xmm0, xmm7
-	movdqu	XMMWORD PTR [r9], xmm0
-
-	lea	rbx, QWORD PTR [rdx+r13]
-	mov	r10, QWORD PTR [rdx+r13]
-
-	movdqu	xmm10, xmm11
-	movd	xmm0, rbp
-	movd	xmm11, rdi
-	punpcklqdq xmm11, xmm0
-	aesenc	xmm10, xmm11
-
-	mov eax, ecx
-	mov r12d, ecx
-	xor eax, 16
-	xor r12d, 32
-	xor ecx, 48
-
-	movdqu	xmm0, XMMWORD PTR [rcx+rsi]
-	paddq	xmm0, xmm6
-	movdqu	xmm1, XMMWORD PTR [r12+rsi]
-	movdqu	XMMWORD PTR [r12+rsi], xmm0
-	paddq	xmm1, xmm11
-	movdqu	xmm0, XMMWORD PTR [rax+rsi]
-	movdqu	XMMWORD PTR [rcx+rsi], xmm1
-	paddq	xmm0, xmm8
-	movdqu	XMMWORD PTR [rax+rsi], xmm0
-
-	movd	rcx, xmm10
-	and	ecx, 2097136
-
-	movdqa	xmm0, xmm10
-	pxor	xmm0, xmm6
-	movdqu	XMMWORD PTR [r8], xmm0
-	mov r12, QWORD PTR [rcx+rsi]
-
-	mov	r9, QWORD PTR [rbx+8]
-
-	xor edx, 16
-	mov r8d, edx
-	mov r15d, edx
-
-	movd	rdx, xmm5
-	shl	rdx, 32
-	movd	rax, xmm4
-	xor	rdx, rax
-	xor	r10, rdx
-	mov	rax, r10
-	mul	r11
-	mov r11d, r8d
-	xor r11d, 48
-	movd xmm0, rdx
-	xor rdx, [r11+r13]
-	movd xmm1, rax
-	xor rax, [r11+r13+8]
-	punpcklqdq xmm0, xmm1
-
-	pxor xmm0, XMMWORD PTR [r8+r13]
-	movdqu	xmm1, XMMWORD PTR [r11+r13]
-	paddq	xmm0, xmm3
-	paddq	xmm1, xmm2
-	movdqu	XMMWORD PTR [r8+r13], xmm0
-	xor	r8d, 32
-	movdqu	xmm0, XMMWORD PTR [r8+r13]
-	movdqu	XMMWORD PTR [r8+r13], xmm1
-	paddq	xmm0, xmm7
-	movdqu	XMMWORD PTR [r11+r13], xmm0
-
-	mov	r11, QWORD PTR [rsp+256]
-	add	r11, rdx
-	mov	rdx, QWORD PTR [rsp+264]
-	add	rdx, rax
-	mov	QWORD PTR [rbx], r11
-	xor	r11, r10
-	mov	QWORD PTR [rbx+8], rdx
-	xor	rdx, r9
-	mov	QWORD PTR [rsp+256], r11
-	and	r11d, 2097136
-	mov	QWORD PTR [rsp+264], rdx
-	mov	QWORD PTR [rsp+8], r11
-	lea	r15, QWORD PTR [r11+r13]
-	movdqu xmm15, XMMWORD PTR [r11+r13]
-	lea	r13, QWORD PTR [rsi+rcx]
-	movdqa	xmm0, xmm5
-	psrldq	xmm0, 8
-	movaps	xmm2, xmm13
-	movd	r10, xmm0
-	psllq	xmm5, 1
-	shl	r10, 32
-	movdqa	xmm0, xmm9
-	psrldq	xmm0, 8
-	movdqa	xmm1, xmm10
-	movd	r11, xmm0
-	psrldq	xmm1, 8
-	movd	r8, xmm1
-	psrldq	xmm4, 8
-	movaps	xmm0, xmm13
-	movd	rax, xmm4
-	xor	r10, rax
-	movaps	xmm1, xmm13
-	xor	r10, r12
-	lea	rax, QWORD PTR [r11+1]
-	shr	rax, 1
-	movdqa	xmm3, xmm9
-	punpcklqdq xmm3, xmm10
-	paddq	xmm5, xmm3
-	movd	rdx, xmm5
-	psrldq	xmm5, 8
-	cvtsi2sd xmm2, rax
-	or	edx, -2147483647
-	lea	rax, QWORD PTR [r8+1]
-	shr	rax, 1
-	movd	r9, xmm5
-	cvtsi2sd xmm0, rax
-	or	r9d, -2147483647
-	cvtsi2sd xmm1, rdx
-	unpcklpd xmm2, xmm0
-	movaps	xmm0, xmm13
-	cvtsi2sd xmm0, r9
-	unpcklpd xmm1, xmm0
-	divpd	xmm2, xmm1
-	paddq	xmm2, xmm14
-	cvttsd2si rax, xmm2
-	psrldq	xmm2, 8
-	mov	rbx, rax
-	imul	rax, rdx
-	sub	r11, rax
-	js	rwz_div_fix_1
-rwz_div_fix_1_ret:
-
-	cvttsd2si rdx, xmm2
-	mov	rax, rdx
-	imul	rax, r9
-	movd	xmm2, r11d
-	movd	xmm4, ebx
-	sub	r8, rax
-	js	rwz_div_fix_2
-rwz_div_fix_2_ret:
-
-	movd	xmm1, r8d
-	movd	xmm0, edx
-	punpckldq xmm2, xmm1
-	punpckldq xmm4, xmm0
-	punpckldq xmm4, xmm2
-	paddq	xmm3, xmm4
-	movdqa	xmm0, xmm3
-	psrlq	xmm0, 12
-	paddq	xmm0, xmm12
-	sqrtpd	xmm1, xmm0
-	movd	r9, xmm1
-	movdqa xmm5, xmm1
-	psrlq xmm5, 19
-	test	r9, 524287
-	je	rwz_sqrt_fix_1
-rwz_sqrt_fix_1_ret:
-
-	movd r9, xmm10
-	psrldq	xmm1, 8
-	movd	r8, xmm1
-	test	r8, 524287
-	je	rwz_sqrt_fix_2
-rwz_sqrt_fix_2_ret:
-
-	mov r12d, ecx
-	mov r8d, ecx
-	xor r12d, 16
-	xor r8d, 32
-	xor ecx, 48
-	mov	rax, r10
-	mul	r9
-	movd xmm0, rax
-	movd xmm3, rdx
-	punpcklqdq xmm3, xmm0
-
-	movdqu	xmm0, XMMWORD PTR [r12+rsi]
-	pxor xmm0, xmm3
-	movdqu	xmm1, XMMWORD PTR [r8+rsi]
-	xor rdx, [r8+rsi]
-	xor rax, [r8+rsi+8]
-	movdqu	xmm3, XMMWORD PTR [rcx+rsi]
-	paddq	xmm3, xmm6
-	paddq	xmm1, xmm11
-	paddq	xmm0, xmm8
-	movdqu	XMMWORD PTR [r8+rsi], xmm3
-	movdqu	XMMWORD PTR [rcx+rsi], xmm1
-	movdqu	XMMWORD PTR [r12+rsi], xmm0
-
-	add	rdi, rdx
-	mov	QWORD PTR [r13], rdi
-	xor	rdi, r10
-	mov	ecx, edi
-	and	ecx, 2097136
-	lea	r8, QWORD PTR [rcx+rsi]
-
-	mov rdx, QWORD PTR [r13+8]	
-	add	rbp, rax
-	mov	QWORD PTR [r13+8], rbp
-	movdqu xmm11, XMMWORD PTR [rcx+rsi]
-	xor	rbp, rdx
-	mov	r13, QWORD PTR [rsp]
-	movdqa	xmm3, xmm7
-	mov	rdx, QWORD PTR [rsp+8]
-	movdqa	xmm8, xmm6
-	mov	r10, QWORD PTR [rsp+256]
-	movdqa	xmm7, xmm9
-	mov	r11, QWORD PTR [rsp+264]
-	movdqa	xmm6, xmm10
-	mov	r9, r15
-	dec r14d
-	jne	rwz_main_loop_double
-
-	ldmxcsr DWORD PTR [rsp+272]
-	movaps	xmm13, XMMWORD PTR [rsp+48]
-	lea	r11, QWORD PTR [rsp+184]
-	movaps	xmm6, XMMWORD PTR [r11-24]
-	movaps	xmm7, XMMWORD PTR [r11-40]
-	movaps	xmm8, XMMWORD PTR [r11-56]
-	movaps	xmm9, XMMWORD PTR [r11-72]
-	movaps	xmm10, XMMWORD PTR [r11-88]
-	movaps	xmm11, XMMWORD PTR [r11-104]
-	movaps	xmm12, XMMWORD PTR [r11-120]
-	movaps	xmm14, XMMWORD PTR [rsp+32]
-	movaps	xmm15, XMMWORD PTR [rsp+16]
-	mov	rsp, r11
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rdi
-	pop	rsi
-	pop	rbp
-	pop	rbx
-	jmp rwz_cnv2_double_mainloop_asm_endp
-
-rwz_div_fix_1:
-	dec	rbx
-	add	r11, rdx
-	jmp	rwz_div_fix_1_ret
-
-rwz_div_fix_2:
-	dec	rdx
-	add	r8, r9
-	jmp	rwz_div_fix_2_ret
-
-rwz_sqrt_fix_1:
-	movd	r8, xmm3
-	movdqa xmm0, xmm5
-	psrldq xmm0, 8
-	dec	r9
-	mov r11d, -1022
-	shl r11, 32
-	mov	rax, r9
-	shr	r9, 19
-	shr	rax, 20
-	mov	rdx, r9
-	sub	rdx, rax
-	lea	rdx, [rdx+r11+1]
-	add	rax, r11
-	imul	rdx, rax
-	sub	rdx, r8
-	adc	r9, 0
-	movd xmm5, r9
-	punpcklqdq xmm5, xmm0
-	jmp	rwz_sqrt_fix_1_ret
-
-rwz_sqrt_fix_2:
-	psrldq	xmm3, 8
-	movd	r11, xmm3
-	dec	r8
-	mov ebx, -1022
-	shl rbx, 32
-	mov	rax, r8
-	shr	r8, 19
-	shr	rax, 20
-	mov	rdx, r8
-	sub	rdx, rax
-	lea	rdx, [rdx+rbx+1]
-	add	rax, rbx
-	imul	rdx, rax
-	sub	rdx, r11
-	adc	r8, 0
-	movd xmm0, r8
-	punpcklqdq xmm5, xmm0
-	jmp	rwz_sqrt_fix_2_ret
-
-rwz_cnv2_double_mainloop_asm_endp:
diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc
deleted file mode 100644
index e2b7a5fc..00000000
--- a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc
+++ /dev/null
@@ -1,188 +0,0 @@
-	mov	rcx, [rcx]
-
-	mov	 QWORD PTR [rsp+24], rbx
-	push	 rbp
-	push	 rsi
-	push	 rdi
-	push	 r12
-	push	 r13
-	push	 r14
-	push	 r15
-	sub	 rsp, 80
-
-	stmxcsr DWORD PTR [rsp]
-	mov DWORD PTR [rsp+4], 24448
-	ldmxcsr DWORD PTR [rsp+4]
-
-	mov	 rax, QWORD PTR [rcx+48]
-	mov	 r9, rcx
-	xor	 rax, QWORD PTR [rcx+16]
-	mov	 esi, 393216
-	mov	 r8, QWORD PTR [rcx+32]
-	mov	 r13d, -2147483647
-	xor	 r8, QWORD PTR [rcx]
-	mov	 r11, QWORD PTR [rcx+40]
-	mov	 r10, r8
-	mov	 rdx, QWORD PTR [rcx+56]
-	movd	 xmm4, rax
-	xor	 rdx, QWORD PTR [rcx+24]
-	xor	 r11, QWORD PTR [rcx+8]
-	mov	 rbx, QWORD PTR [rcx+224]
-	mov	 rax, QWORD PTR [r9+80]
-	xor	 rax, QWORD PTR [r9+64]
-	movd	 xmm0, rdx
-	mov	 rcx, QWORD PTR [rcx+88]
-	xor	 rcx, QWORD PTR [r9+72]
-	movd	 xmm3, QWORD PTR [r9+104]
-	movaps	 XMMWORD PTR [rsp+64], xmm6
-	movaps	 XMMWORD PTR [rsp+48], xmm7
-	movaps	 XMMWORD PTR [rsp+32], xmm8
-	and	 r10d, 2097136
-	movd	 xmm5, rax
-
-	xor eax, eax
-	mov QWORD PTR [rsp+16], rax
-
-	mov ax, 1023
-	shl rax, 52
-	movd xmm8, rax
-	mov r15, QWORD PTR [r9+96]
-	punpcklqdq xmm4, xmm0
-	movd	 xmm0, rcx
-	punpcklqdq xmm5, xmm0
-	movdqu	 xmm6, XMMWORD PTR [r10+rbx]
-
-	ALIGN(64)
-rwz_main_loop:
-	lea	 rdx, QWORD PTR [r10+rbx]
-	mov	 ecx, r10d
-	mov	 eax, r10d
-	mov rdi, r15
-	xor	 ecx, 16
-	xor	 eax, 32
-	xor	 r10d, 48
-	movd	 xmm0, r11
-	movd	 xmm7, r8
-	punpcklqdq xmm7, xmm0
-	aesenc	 xmm6, xmm7
-	movd	 rbp, xmm6
-	mov	 r9, rbp
-	and	 r9d, 2097136
-	movdqu	 xmm0, XMMWORD PTR [rcx+rbx]
-	movdqu	 xmm1, XMMWORD PTR [rax+rbx]
-	movdqu	 xmm2, XMMWORD PTR [r10+rbx]
-	paddq	 xmm0, xmm5
-	paddq	 xmm1, xmm7
-	paddq	 xmm2, xmm4
-	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
-	movdqu	 XMMWORD PTR [rax+rbx], xmm2
-	movdqu	 XMMWORD PTR [r10+rbx], xmm1
-	mov r10, r9
-	xor r10d, 32
-	movd	 rcx, xmm3
-	mov	 rax, rcx
-	shl	 rax, 32
-	xor	 rdi, rax
-	movdqa	 xmm0, xmm6
-	pxor	 xmm0, xmm4
-	movdqu	 XMMWORD PTR [rdx], xmm0
-	xor	 rdi, QWORD PTR [r9+rbx]
-	lea	 r14, QWORD PTR [r9+rbx]
-	mov	 r12, QWORD PTR [r14+8]
-	xor	 edx, edx
-	lea	 r9d, DWORD PTR [ecx+ecx]
-	add	 r9d, ebp
-	movdqa	 xmm0, xmm6
-	psrldq	 xmm0, 8
-	or	 r9d, r13d
-	movd	 rax, xmm0
-	div	 r9
-	xorps xmm3, xmm3
-	mov	 eax, eax
-	shl	 rdx, 32
-	add	 rdx, rax
-	lea	 r9, QWORD PTR [rdx+rbp]
-	mov r15, rdx
-	mov	 rax, r9
-	shr	 rax, 12
-	movd	 xmm0, rax
-	paddq	 xmm0, xmm8
-	sqrtsd	 xmm3, xmm0
-	psubq	 xmm3, XMMWORD PTR [rsp+16]
-	movd	 rdx, xmm3
-	test	 edx, 524287
-	je	 rwz_sqrt_fixup
-	psrlq	 xmm3, 19
-rwz_sqrt_fixup_ret:
-
-	mov	 ecx, r10d
-	mov	 rax, rdi
-	mul	 rbp
-	movd xmm2, rdx
-	xor rdx, [rcx+rbx]
-	add	 r8, rdx
-	mov	 QWORD PTR [r14], r8
-	xor	 r8, rdi
-	mov edi, r8d
-	and edi, 2097136
-	movd xmm0, rax
-	xor rax, [rcx+rbx+8]
-	add	 r11, rax
-	mov	 QWORD PTR [r14+8], r11
-	punpcklqdq xmm2, xmm0
-
-	mov	 r9d, r10d
-	xor	 r9d, 48
-	xor	 r10d, 16
-	pxor	 xmm2, XMMWORD PTR [r9+rbx]
-	movdqu	 xmm0, XMMWORD PTR [r10+rbx]
-	paddq	 xmm0, xmm4
-	movdqu	 xmm1, XMMWORD PTR [rcx+rbx]
-	paddq	 xmm2, xmm5
-	paddq	 xmm1, xmm7
-	movdqa	 xmm5, xmm4
-	movdqu	 XMMWORD PTR [r9+rbx], xmm2
-	movdqa	 xmm4, xmm6
-	movdqu	 XMMWORD PTR [rcx+rbx], xmm0
-	movdqu	 XMMWORD PTR [r10+rbx], xmm1
-	movdqu xmm6, [rdi+rbx]
-	mov	 r10d, edi
-	xor	 r11, r12
-	dec rsi
-	jne	 rwz_main_loop
-
-	ldmxcsr DWORD PTR [rsp]
-	mov	 rbx, QWORD PTR [rsp+160]
-	movaps	 xmm6, XMMWORD PTR [rsp+64]
-	movaps	 xmm7, XMMWORD PTR [rsp+48]
-	movaps	 xmm8, XMMWORD PTR [rsp+32]
-	add	 rsp, 80
-	pop	 r15
-	pop	 r14
-	pop	 r13
-	pop	 r12
-	pop	 rdi
-	pop	 rsi
-	pop	 rbp
-	jmp cnv2_rwz_main_loop_endp
-
-rwz_sqrt_fixup:
-	dec	 rdx
-	mov r13d, -1022
-	shl r13, 32
-	mov	 rax, rdx
-	shr	 rdx, 19
-	shr	 rax, 20
-	mov	 rcx, rdx
-	sub	 rcx, rax
-	add	 rax, r13
-	not r13
-	sub	 rcx, r13
-	mov	 r13d, -2147483647
-	imul	 rcx, rax
-	sub	 rcx, r9
-	adc	 rdx, 0
-	movd	 xmm3, rdx
-	jmp	 rwz_sqrt_fixup_ret
-
-cnv2_rwz_main_loop_endp:
diff --git a/src/crypto/asm/win64/cn_main_loop.S b/src/crypto/asm/win64/cn_main_loop.S
deleted file mode 100644
index 63c3a8ba..00000000
--- a/src/crypto/asm/win64/cn_main_loop.S
+++ /dev/null
@@ -1,45 +0,0 @@
-#define ALIGN(x) .align 64
-.intel_syntax noprefix
-.section .text
-.global cnv2_mainloop_ivybridge_asm
-.global cnv2_mainloop_ryzen_asm
-.global cnv2_mainloop_bulldozer_asm
-.global cnv2_double_mainloop_sandybridge_asm
-.global cnv2_rwz_mainloop_asm
-.global cnv2_rwz_double_mainloop_asm
-
-ALIGN(64)
-cnv2_mainloop_ivybridge_asm:
-	#include "../cn2/cnv2_main_loop_ivybridge.inc"
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-cnv2_mainloop_ryzen_asm:
-	#include "../cn2/cnv2_main_loop_ryzen.inc"
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-cnv2_mainloop_bulldozer_asm:
-	#include "../cn2/cnv2_main_loop_bulldozer.inc"
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-cnv2_double_mainloop_sandybridge_asm:
-	#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-cnv2_rwz_mainloop_asm:
-	#include "cn2/cnv2_rwz_main_loop.inc"
-	ret 0
-	mov eax, 3735929054
-
-ALIGN(64)
-cnv2_rwz_double_mainloop_asm:
-	#include "cn2/cnv2_rwz_double_main_loop.inc"
-	ret 0
-	mov eax, 3735929054
diff --git a/src/crypto/asm/win64/cn_main_loop.asm b/src/crypto/asm/win64/cn_main_loop.asm
deleted file mode 100644
index 57246cf5..00000000
--- a/src/crypto/asm/win64/cn_main_loop.asm
+++ /dev/null
@@ -1,52 +0,0 @@
-_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
-PUBLIC cnv2_mainloop_ivybridge_asm
-PUBLIC cnv2_mainloop_ryzen_asm
-PUBLIC cnv2_mainloop_bulldozer_asm
-PUBLIC cnv2_double_mainloop_sandybridge_asm
-PUBLIC cnv2_rwz_mainloop_asm
-PUBLIC cnv2_rwz_double_mainloop_asm
-
-ALIGN 64
-cnv2_mainloop_ivybridge_asm PROC
-	INCLUDE cn2/cnv2_main_loop_ivybridge.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_mainloop_ivybridge_asm ENDP
-
-ALIGN 64
-cnv2_mainloop_ryzen_asm PROC
-	INCLUDE cn2/cnv2_main_loop_ryzen.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_mainloop_ryzen_asm ENDP
-
-ALIGN 64
-cnv2_mainloop_bulldozer_asm PROC
-	INCLUDE cn2/cnv2_main_loop_bulldozer.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_mainloop_bulldozer_asm ENDP
-
-ALIGN 64
-cnv2_double_mainloop_sandybridge_asm PROC
-	INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_double_mainloop_sandybridge_asm ENDP
-
-ALIGN(64)
-cnv2_rwz_mainloop_asm PROC
-	INCLUDE cn2/cnv2_rwz_main_loop.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_rwz_mainloop_asm ENDP
-
-ALIGN(64)
-cnv2_rwz_double_mainloop_asm PROC
-	INCLUDE cn2/cnv2_rwz_double_main_loop.inc
-	ret 0
-	mov eax, 3735929054
-cnv2_rwz_double_mainloop_asm ENDP
-
-_TEXT_CNV2_MAINLOOP ENDS
-END
diff --git a/src/crypto/c_blake256.c b/src/crypto/c_blake256.c
deleted file mode 100644
index 00a84c22..00000000
--- a/src/crypto/c_blake256.c
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * The blake256_* and blake224_* functions are largely copied from
- * blake256_light.c and blake224_light.c from the BLAKE website:
- *
- *     http://131002.net/blake/
- *
- * The hmac_* functions implement HMAC-BLAKE-256 and HMAC-BLAKE-224.
- * HMAC is specified by RFC 2104.
- */
-
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-#include "c_blake256.h"
-
-#define U8TO32(p) \
-    (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) |    \
-     ((uint32_t)((p)[2]) <<  8) | ((uint32_t)((p)[3])      ))
-#define U32TO8(p, v) \
-    (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
-    (p)[2] = (uint8_t)((v) >>  8); (p)[3] = (uint8_t)((v)      );
-
-const uint8_t sigma[][16] = {
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
-    {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
-    {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
-    { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8},
-    { 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13},
-    { 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9},
-    {12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11},
-    {13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10},
-    { 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5},
-    {10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0},
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
-    {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
-    {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
-    { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}
-};
-
-const uint32_t cst[16] = {
-    0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
-    0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
-    0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
-    0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
-};
-
-static const uint8_t padding[] = {
-    0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-};
-
-
-void blake256_compress(state *S, const uint8_t *block) {
-    uint32_t v[16], m[16], i;
-
-#define ROT(x,n) (((x)<<(32-n))|((x)>>(n)))
-#define G(a,b,c,d,e)                                      \
-    v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \
-    v[d] = ROT(v[d] ^ v[a],16);                           \
-    v[c] += v[d];                                         \
-    v[b] = ROT(v[b] ^ v[c],12);                           \
-    v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b];   \
-    v[d] = ROT(v[d] ^ v[a], 8);                           \
-    v[c] += v[d];                                         \
-    v[b] = ROT(v[b] ^ v[c], 7);
-
-    for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
-    for (i = 0; i < 8;  ++i) v[i] = S->h[i];
-    v[ 8] = S->s[0] ^ 0x243F6A88;
-    v[ 9] = S->s[1] ^ 0x85A308D3;
-    v[10] = S->s[2] ^ 0x13198A2E;
-    v[11] = S->s[3] ^ 0x03707344;
-    v[12] = 0xA4093822;
-    v[13] = 0x299F31D0;
-    v[14] = 0x082EFA98;
-    v[15] = 0xEC4E6C89;
-
-    if (S->nullt == 0) {
-        v[12] ^= S->t[0];
-        v[13] ^= S->t[0];
-        v[14] ^= S->t[1];
-        v[15] ^= S->t[1];
-    }
-
-    for (i = 0; i < 14; ++i) {
-        G(0, 4,  8, 12,  0);
-        G(1, 5,  9, 13,  2);
-        G(2, 6, 10, 14,  4);
-        G(3, 7, 11, 15,  6);
-        G(3, 4,  9, 14, 14);
-        G(2, 7,  8, 13, 12);
-        G(0, 5, 10, 15,  8);
-        G(1, 6, 11, 12, 10);
-    }
-
-    for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
-    for (i = 0; i < 8;  ++i) S->h[i] ^= S->s[i % 4];
-}
-
-void blake256_init(state *S) {
-    S->h[0] = 0x6A09E667;
-    S->h[1] = 0xBB67AE85;
-    S->h[2] = 0x3C6EF372;
-    S->h[3] = 0xA54FF53A;
-    S->h[4] = 0x510E527F;
-    S->h[5] = 0x9B05688C;
-    S->h[6] = 0x1F83D9AB;
-    S->h[7] = 0x5BE0CD19;
-    S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
-    S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
-}
-
-void blake224_init(state *S) {
-    S->h[0] = 0xC1059ED8;
-    S->h[1] = 0x367CD507;
-    S->h[2] = 0x3070DD17;
-    S->h[3] = 0xF70E5939;
-    S->h[4] = 0xFFC00B31;
-    S->h[5] = 0x68581511;
-    S->h[6] = 0x64F98FA7;
-    S->h[7] = 0xBEFA4FA4;
-    S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
-    S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
-}
-
-// datalen = number of bits
-void blake256_update(state *S, const uint8_t *data, uint64_t datalen) {
-    int left = S->buflen >> 3;
-    int fill = 64 - left;
-
-    if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
-        memcpy((void *) (S->buf + left), (void *) data, fill);
-        S->t[0] += 512;
-        if (S->t[0] == 0) S->t[1]++;
-        blake256_compress(S, S->buf);
-        data += fill;
-        datalen -= (fill << 3);
-        left = 0;
-    }
-
-    while (datalen >= 512) {
-        S->t[0] += 512;
-        if (S->t[0] == 0) S->t[1]++;
-        blake256_compress(S, data);
-        data += 64;
-        datalen -= 512;
-    }
-
-    if (datalen > 0) {
-        memcpy((void *) (S->buf + left), (void *) data, datalen >> 3);
-        S->buflen = (left << 3) + (int) datalen;
-    } else {
-        S->buflen = 0;
-    }
-}
-
-// datalen = number of bits
-void blake224_update(state *S, const uint8_t *data, uint64_t datalen) {
-    blake256_update(S, data, datalen);
-}
-
-void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) {
-    uint8_t msglen[8];
-    uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
-    if (lo < (unsigned) S->buflen) hi++;
-    U32TO8(msglen + 0, hi);
-    U32TO8(msglen + 4, lo);
-
-    if (S->buflen == 440) { /* one padding byte */
-        S->t[0] -= 8;
-        blake256_update(S, &pa, 8);
-    } else {
-        if (S->buflen < 440) { /* enough space to fill the block  */
-            if (S->buflen == 0) S->nullt = 1;
-            S->t[0] -= 440 - S->buflen;
-            blake256_update(S, padding, 440 - S->buflen);
-        } else { /* need 2 compressions */
-            S->t[0] -= 512 - S->buflen;
-            blake256_update(S, padding, 512 - S->buflen);
-            S->t[0] -= 440;
-            blake256_update(S, padding + 1, 440);
-            S->nullt = 1;
-        }
-        blake256_update(S, &pb, 8);
-        S->t[0] -= 8;
-    }
-    S->t[0] -= 64;
-    blake256_update(S, msglen, 64);
-
-    U32TO8(digest +  0, S->h[0]);
-    U32TO8(digest +  4, S->h[1]);
-    U32TO8(digest +  8, S->h[2]);
-    U32TO8(digest + 12, S->h[3]);
-    U32TO8(digest + 16, S->h[4]);
-    U32TO8(digest + 20, S->h[5]);
-    U32TO8(digest + 24, S->h[6]);
-    U32TO8(digest + 28, S->h[7]);
-}
-
-void blake256_final(state *S, uint8_t *digest) {
-    blake256_final_h(S, digest, 0x81, 0x01);
-}
-
-void blake224_final(state *S, uint8_t *digest) {
-    blake256_final_h(S, digest, 0x80, 0x00);
-}
-
-// inlen = number of bytes
-void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
-    state S;
-    blake256_init(&S);
-    blake256_update(&S, in, inlen * 8);
-    blake256_final(&S, out);
-}
-
-// inlen = number of bytes
-void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
-    state S;
-    blake224_init(&S);
-    blake224_update(&S, in, inlen * 8);
-    blake224_final(&S, out);
-}
-
-// keylen = number of bytes
-void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
-    const uint8_t *key = _key;
-    uint8_t keyhash[32];
-    uint8_t pad[64];
-    uint64_t i;
-
-    if (keylen > 64) {
-        blake256_hash(keyhash, key, keylen);
-        key = keyhash;
-        keylen = 32;
-    }
-
-    blake256_init(&S->inner);
-    memset(pad, 0x36, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake256_update(&S->inner, pad, 512);
-
-    blake256_init(&S->outer);
-    memset(pad, 0x5c, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake256_update(&S->outer, pad, 512);
-
-    memset(keyhash, 0, 32);
-}
-
-// keylen = number of bytes
-void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
-    const uint8_t *key = _key;
-    uint8_t keyhash[32];
-    uint8_t pad[64];
-    uint64_t i;
-
-    if (keylen > 64) {
-        blake256_hash(keyhash, key, keylen);
-        key = keyhash;
-        keylen = 28;
-    }
-
-    blake224_init(&S->inner);
-    memset(pad, 0x36, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake224_update(&S->inner, pad, 512);
-
-    blake224_init(&S->outer);
-    memset(pad, 0x5c, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake224_update(&S->outer, pad, 512);
-
-    memset(keyhash, 0, 32);
-}
-
-// datalen = number of bits
-void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
-  // update the inner state
-  blake256_update(&S->inner, data, datalen);
-}
-
-// datalen = number of bits
-void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
-  // update the inner state
-  blake224_update(&S->inner, data, datalen);
-}
-
-void hmac_blake256_final(hmac_state *S, uint8_t *digest) {
-    uint8_t ihash[32];
-    blake256_final(&S->inner, ihash);
-    blake256_update(&S->outer, ihash, 256);
-    blake256_final(&S->outer, digest);
-    memset(ihash, 0, 32);
-}
-
-void hmac_blake224_final(hmac_state *S, uint8_t *digest) {
-    uint8_t ihash[32];
-    blake224_final(&S->inner, ihash);
-    blake224_update(&S->outer, ihash, 224);
-    blake224_final(&S->outer, digest);
-    memset(ihash, 0, 32);
-}
-
-// keylen = number of bytes; inlen = number of bytes
-void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
-    hmac_state S;
-    hmac_blake256_init(&S, key, keylen);
-    hmac_blake256_update(&S, in, inlen * 8);
-    hmac_blake256_final(&S, out);
-}
-
-// keylen = number of bytes; inlen = number of bytes
-void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
-    hmac_state S;
-    hmac_blake224_init(&S, key, keylen);
-    hmac_blake224_update(&S, in, inlen * 8);
-    hmac_blake224_final(&S, out);
-}
diff --git a/src/crypto/c_blake256.h b/src/crypto/c_blake256.h
deleted file mode 100644
index b9c2aad0..00000000
--- a/src/crypto/c_blake256.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef _BLAKE256_H_
-#define _BLAKE256_H_
-
-#include <stdint.h>
-
-typedef struct {
-  uint32_t h[8], s[4], t[2];
-  int buflen, nullt;
-  uint8_t buf[64];
-} state;
-
-typedef struct {
-  state inner;
-  state outer;
-} hmac_state;
-
-void blake256_init(state *);
-void blake224_init(state *);
-
-void blake256_update(state *, const uint8_t *, uint64_t);
-void blake224_update(state *, const uint8_t *, uint64_t);
-
-void blake256_final(state *, uint8_t *);
-void blake224_final(state *, uint8_t *);
-
-void blake256_hash(uint8_t *, const uint8_t *, uint64_t);
-void blake224_hash(uint8_t *, const uint8_t *, uint64_t);
-
-/* HMAC functions: */
-
-void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t);
-void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t);
-
-void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t);
-void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t);
-
-void hmac_blake256_final(hmac_state *, uint8_t *);
-void hmac_blake224_final(hmac_state *, uint8_t *);
-
-void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
-void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
-
-#endif /* _BLAKE256_H_ */
diff --git a/src/crypto/c_groestl.c b/src/crypto/c_groestl.c
deleted file mode 100644
index 0f57ea12..00000000
--- a/src/crypto/c_groestl.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/* hash.c     April 2012
- * Groestl ANSI C code optimised for 32-bit machines
- * Author: Thomas Krinninger
- *
- *  This work is based on the implementation of
- *          Soeren S. Thomsen and Krystian Matusiewicz
- *          
- *
- */
-
-#include "c_groestl.h"
-#include "groestl_tables.h"
-
-#define P_TYPE 0
-#define Q_TYPE 1
-
-const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}};
-
-const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
-
-
-#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
-															v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
-															v1 = temp_var;}
-  
-
-#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t)				\
-   tu = T[2*(uint32_t)x[4*c0+0]];			    \
-   tl = T[2*(uint32_t)x[4*c0+0]+1];		    \
-   tv1 = T[2*(uint32_t)x[4*c1+1]];			\
-   tv2 = T[2*(uint32_t)x[4*c1+1]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
-   tu ^= tv1;						\
-   tl ^= tv2;						\
-   tv1 = T[2*(uint32_t)x[4*c2+2]];			\
-   tv2 = T[2*(uint32_t)x[4*c2+2]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
-   tu ^= tv1;						\
-   tl ^= tv2;   					\
-   tv1 = T[2*(uint32_t)x[4*c3+3]];			\
-   tv2 = T[2*(uint32_t)x[4*c3+3]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
-   tu ^= tv1;						\
-   tl ^= tv2;						\
-   tl ^= T[2*(uint32_t)x[4*c4+0]];			\
-   tu ^= T[2*(uint32_t)x[4*c4+0]+1];			\
-   tv1 = T[2*(uint32_t)x[4*c5+1]];			\
-   tv2 = T[2*(uint32_t)x[4*c5+1]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
-   tl ^= tv1;						\
-   tu ^= tv2;						\
-   tv1 = T[2*(uint32_t)x[4*c6+2]];			\
-   tv2 = T[2*(uint32_t)x[4*c6+2]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
-   tl ^= tv1;						\
-   tu ^= tv2;   					\
-   tv1 = T[2*(uint32_t)x[4*c7+3]];			\
-   tv2 = T[2*(uint32_t)x[4*c7+3]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
-   tl ^= tv1;						\
-   tu ^= tv2;						\
-   y[i] = tu;						\
-   y[i+1] = tl;
-
-
-/* compute one round of P (short variants) */
-static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) {
-  uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
-  uint32_t* x32 = (uint32_t*)x;
-  x32[ 0] ^= 0x00000000^r;
-  x32[ 2] ^= 0x00000010^r;
-  x32[ 4] ^= 0x00000020^r;
-  x32[ 6] ^= 0x00000030^r;
-  x32[ 8] ^= 0x00000040^r;
-  x32[10] ^= 0x00000050^r;
-  x32[12] ^= 0x00000060^r;
-  x32[14] ^= 0x00000070^r;
-  COLUMN(x,y, 0,  0,  2,  4,  6,  9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 2,  2,  4,  6,  8, 11, 13, 15,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 4,  4,  6,  8, 10, 13, 15,  1,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 6,  6,  8, 10, 12, 15,  1,  3,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 8,  8, 10, 12, 14,  1,  3,  5,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,10, 10, 12, 14,  0,  3,  5,  7,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,12, 12, 14,  0,  2,  5,  7,  9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,14, 14,  0,  2,  4,  7,  9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-}
-
-/* compute one round of Q (short variants) */
-static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) {
-  uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
-  uint32_t* x32 = (uint32_t*)x;
-  x32[ 0] = ~x32[ 0];
-  x32[ 1] ^= 0xffffffff^r;
-  x32[ 2] = ~x32[ 2];
-  x32[ 3] ^= 0xefffffff^r;
-  x32[ 4] = ~x32[ 4];
-  x32[ 5] ^= 0xdfffffff^r;
-  x32[ 6] = ~x32[ 6];
-  x32[ 7] ^= 0xcfffffff^r;
-  x32[ 8] = ~x32[ 8];
-  x32[ 9] ^= 0xbfffffff^r;
-  x32[10] = ~x32[10];
-  x32[11] ^= 0xafffffff^r;
-  x32[12] = ~x32[12];
-  x32[13] ^= 0x9fffffff^r;
-  x32[14] = ~x32[14];
-  x32[15] ^= 0x8fffffff^r;
-  COLUMN(x,y, 0,  2,  6, 10, 14,  1,  5,  9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 2,  4,  8, 12,  0,  3,  7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 4,  6, 10, 14,  2,  5,  9, 13,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 6,  8, 12,  0,  4,  7, 11, 15,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 8, 10, 14,  2,  6,  9, 13,  1,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,10, 12,  0,  4,  8, 11, 15,  3,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,12, 14,  2,  6, 10, 13,  1,  5,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,14,  0,  4,  8, 12, 15,  3,  7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-}
-
-/* compute compression function (short variants) */
-static void F512(uint32_t *h, const uint32_t *m) {
-  int i;
-  uint32_t Ptmp[2*COLS512];
-  uint32_t Qtmp[2*COLS512];
-  uint32_t y[2*COLS512];
-  uint32_t z[2*COLS512];
-
-  for (i = 0; i < 2*COLS512; i++) {
-    z[i] = m[i];
-    Ptmp[i] = h[i]^m[i];
-  }
-
-  /* compute Q(m) */
-  RND512Q((uint8_t*)z, y, 0x00000000);
-  RND512Q((uint8_t*)y, z, 0x01000000);
-  RND512Q((uint8_t*)z, y, 0x02000000);
-  RND512Q((uint8_t*)y, z, 0x03000000);
-  RND512Q((uint8_t*)z, y, 0x04000000);
-  RND512Q((uint8_t*)y, z, 0x05000000);
-  RND512Q((uint8_t*)z, y, 0x06000000);
-  RND512Q((uint8_t*)y, z, 0x07000000);
-  RND512Q((uint8_t*)z, y, 0x08000000);
-  RND512Q((uint8_t*)y, Qtmp, 0x09000000);
-
-  /* compute P(h+m) */
-  RND512P((uint8_t*)Ptmp, y, 0x00000000);
-  RND512P((uint8_t*)y, z, 0x00000001);
-  RND512P((uint8_t*)z, y, 0x00000002);
-  RND512P((uint8_t*)y, z, 0x00000003);
-  RND512P((uint8_t*)z, y, 0x00000004);
-  RND512P((uint8_t*)y, z, 0x00000005);
-  RND512P((uint8_t*)z, y, 0x00000006);
-  RND512P((uint8_t*)y, z, 0x00000007);
-  RND512P((uint8_t*)z, y, 0x00000008);
-  RND512P((uint8_t*)y, Ptmp, 0x00000009);
-
-  /* compute P(h+m) + Q(m) + h */
-  for (i = 0; i < 2*COLS512; i++) {
-    h[i] ^= Ptmp[i]^Qtmp[i];
-  }
-}
-
-
-/* digest up to msglen bytes of input (full blocks only) */
-static void Transform(groestlHashState *ctx,
-	       const uint8_t *input, 
-	       int msglen) {
-
-  /* digest message, one block at a time */
-  for (; msglen >= SIZE512; 
-       msglen -= SIZE512, input += SIZE512) {
-    F512(ctx->chaining,(uint32_t*)input);
-
-    /* increment block counter */
-    ctx->block_counter1++;
-    if (ctx->block_counter1 == 0) ctx->block_counter2++;
-  }
-}
-
-/* given state h, do h <- P(h)+h */
-static void OutputTransformation(groestlHashState *ctx) {
-  int j;
-  uint32_t temp[2*COLS512];
-  uint32_t y[2*COLS512];
-  uint32_t z[2*COLS512];
-
-
-
-	for (j = 0; j < 2*COLS512; j++) {
-	  temp[j] = ctx->chaining[j];
-	}
-	RND512P((uint8_t*)temp, y, 0x00000000);
-	RND512P((uint8_t*)y, z, 0x00000001);
-	RND512P((uint8_t*)z, y, 0x00000002);
-	RND512P((uint8_t*)y, z, 0x00000003);
-	RND512P((uint8_t*)z, y, 0x00000004);
-	RND512P((uint8_t*)y, z, 0x00000005);
-	RND512P((uint8_t*)z, y, 0x00000006);
-	RND512P((uint8_t*)y, z, 0x00000007);
-	RND512P((uint8_t*)z, y, 0x00000008);
-	RND512P((uint8_t*)y, temp, 0x00000009);
-	for (j = 0; j < 2*COLS512; j++) {
-	  ctx->chaining[j] ^= temp[j];
-	}									  
-}
-
-/* initialise context */
-static void Init(groestlHashState* ctx) {
-  int i = 0;
-  /* allocate memory for state and data buffer */
-
-  for(;i<(SIZE512/sizeof(uint32_t));i++)
-  {
-	ctx->chaining[i] = 0;
-  }
-
-  /* set initial value */
-  ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN);
-
-  /* set other variables */
-  ctx->buf_ptr = 0;
-  ctx->block_counter1 = 0;
-  ctx->block_counter2 = 0;
-  ctx->bits_in_last_byte = 0;
-}
-
-/* update state with databitlen bits of input */
-static void Update(groestlHashState* ctx,
-		  const BitSequence* input,
-		  DataLength databitlen) {
-  int index = 0;
-  int msglen = (int)(databitlen/8);
-  int rem = (int)(databitlen%8);
-
-  /* if the buffer contains data that has not yet been digested, first
-     add data to buffer until full */
-  if (ctx->buf_ptr) {
-    while (ctx->buf_ptr < SIZE512 && index < msglen) {
-      ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
-    }
-    if (ctx->buf_ptr < SIZE512) {
-      /* buffer still not full, return */
-      if (rem) {
-	ctx->bits_in_last_byte = rem;
-	ctx->buffer[(int)ctx->buf_ptr++] = input[index];
-      }
-      return;
-    }
-
-    /* digest buffer */
-    ctx->buf_ptr = 0;
-    Transform(ctx, ctx->buffer, SIZE512);
-  }
-
-  /* digest bulk of message */
-  Transform(ctx, input+index, msglen-index);
-  index += ((msglen-index)/SIZE512)*SIZE512;
-
-  /* store remaining data in buffer */
-  while (index < msglen) {
-    ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
-  }
-
-  /* if non-integral number of bytes have been supplied, store
-     remaining bits in last byte, together with information about
-     number of bits */
-  if (rem) {
-    ctx->bits_in_last_byte = rem;
-    ctx->buffer[(int)ctx->buf_ptr++] = input[index];
-  }
-}
-
-#define BILB ctx->bits_in_last_byte
-
-/* finalise: process remaining data (including padding), perform
-   output transformation, and write hash result to 'output' */
-static void Final(groestlHashState* ctx,
-		 BitSequence* output) {
-  int i, j = 0, hashbytelen = HASH_BIT_LEN/8;
-  uint8_t *s = (BitSequence*)ctx->chaining;
-
-  /* pad with '1'-bit and first few '0'-bits */
-  if (BILB) {
-    ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
-    ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
-    BILB = 0;
-  }
-  else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
-
-  /* pad with '0'-bits */
-  if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
-    /* padding requires two blocks */
-    while (ctx->buf_ptr < SIZE512) {
-      ctx->buffer[(int)ctx->buf_ptr++] = 0;
-    }
-    /* digest first padding block */
-    Transform(ctx, ctx->buffer, SIZE512);
-    ctx->buf_ptr = 0;
-  }
-  while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) {
-    ctx->buffer[(int)ctx->buf_ptr++] = 0;
-  }
-
-  /* length padding */
-  ctx->block_counter1++;
-  if (ctx->block_counter1 == 0) ctx->block_counter2++;
-  ctx->buf_ptr = SIZE512;
-
-  while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) {
-    ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1;
-    ctx->block_counter1 >>= 8;
-  }
-  while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
-    ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2;
-    ctx->block_counter2 >>= 8;
-  }
-  /* digest final padding block */
-  Transform(ctx, ctx->buffer, SIZE512); 
-  /* perform output transformation */
-  OutputTransformation(ctx);
-
-  /* store hash result in output */
-  for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) {
-    output[j] = s[i];
-  }
-
-  /* zeroise relevant variables and deallocate memory */
-  for (i = 0; i < COLS512; i++) {
-    ctx->chaining[i] = 0;
-  }
-  for (i = 0; i < SIZE512; i++) {
-    ctx->buffer[i] = 0;
-  }
-}
-
-/* hash bit sequence */
-void groestl(const BitSequence* data, 
-		DataLength databitlen,
-		BitSequence* hashval) {
-
-  groestlHashState context;
-
-  /* initialise */
-    Init(&context);
-
-
-  /* process message */
-  Update(&context, data, databitlen);
-
-  /* finalise */
-  Final(&context, hashval);
-}
-/*
-static int crypto_hash(unsigned char *out,
-		const unsigned char *in,
-		unsigned long long len)
-{
-  groestl(in, 8*len, out);
-  return 0;
-}
-
-*/
diff --git a/src/crypto/c_groestl.h b/src/crypto/c_groestl.h
deleted file mode 100644
index 2b513393..00000000
--- a/src/crypto/c_groestl.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef __hash_h
-#define __hash_h
-/*
-#include "crypto_uint8.h"
-#include "crypto_uint32.h"
-#include "crypto_uint64.h"
-#include "crypto_hash.h" 
-
-typedef crypto_uint8 uint8_t; 
-typedef crypto_uint32 uint32_t; 
-typedef crypto_uint64 uint64_t;
-*/
-#include <stdint.h>
-
-#include "hash.h"
-
-/* some sizes (number of bytes) */
-#define ROWS 8
-#define LENGTHFIELDLEN ROWS
-#define COLS512 8
-
-#define SIZE512 (ROWS*COLS512)
-
-#define ROUNDS512 10
-#define HASH_BIT_LEN 256
-
-#define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff))
-
-
-#define li_32(h) 0x##h##u
-#define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n)))
-#define u32BIG(a)				\
-  ((ROTL32(a,8) & li_32(00FF00FF)) |		\
-   (ROTL32(a,24) & li_32(FF00FF00)))
-
-
-/* NIST API begin */
-typedef struct {
-  uint32_t chaining[SIZE512/sizeof(uint32_t)];            /* actual state */
-  uint32_t block_counter1,
-  block_counter2;         /* message block counter(s) */
-  BitSequence buffer[SIZE512];      /* data buffer */
-  int buf_ptr;              /* data buffer pointer */
-  int bits_in_last_byte;    /* no. of message bits in last byte of
-			       data buffer */
-} groestlHashState;
-
-/*void Init(hashState*);
-void Update(hashState*, const BitSequence*, DataLength);
-void Final(hashState*, BitSequence*); */
-void groestl(const BitSequence*, DataLength, BitSequence*);
-/* NIST API end   */
-
-/*
-int crypto_hash(unsigned char *out,
-		const unsigned char *in,
-		unsigned long long len);
-*/
-
-#endif /* __hash_h */
diff --git a/src/crypto/c_jh.c b/src/crypto/c_jh.c
deleted file mode 100644
index 728f3bbe..00000000
--- a/src/crypto/c_jh.c
+++ /dev/null
@@ -1,367 +0,0 @@
-/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
-
-   --------------------------------
-   Performance
-
-   Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
-   Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
-   Speed for long message:
-   1) 45.8 cycles/byte   compiler: Intel C++ Compiler 11.1   compilation option: icc -O2
-   2) 56.8 cycles/byte   compiler: gcc 4.4.3                 compilation option: gcc -O3
-
-   --------------------------------
-   Last Modified: January 16, 2011
-*/
-
-#include "c_jh.h"
-
-#include <stdint.h>
-#include <string.h>
-
-/*typedef unsigned long long uint64;*/
-typedef uint64_t uint64;
-
-/*define data alignment for different C compilers*/
-#if defined(__GNUC__)
-      #define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
-#else
-      #define DATA_ALIGN16(x) __declspec(align(16)) x
-#endif
-
-
-typedef struct {
-	int hashbitlen;	   	              /*the message digest size*/
-	unsigned long long databitlen;    /*the message size in bits*/
-	unsigned long long datasize_in_buffer;      /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/
-	DATA_ALIGN16(uint64 x[8][2]);     /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/
-	unsigned char buffer[64];         /*the 512-bit message block to be hashed;*/
-} hashState;
-
-
-/*The initial hash value H(0)*/
-const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e};
-const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69};
-const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f};
-const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b};
-
-/*42 round constants, each round constant is 32-byte (256-bit)*/
-const unsigned char E8_bitslice_roundconstant[42][32]={
-{0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40},
-{0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31},
-{0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc},
-{0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3},
-{0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23},
-{0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97},
-{0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14},
-{0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4},
-{0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36},
-{0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f},
-{0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b},
-{0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62},
-{0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5},
-{0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f},
-{0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a},
-{0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf},
-{0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0},
-{0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a},
-{0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6},
-{0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67},
-{0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18},
-{0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e},
-{0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1},
-{0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83},
-{0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef},
-{0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65},
-{0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c},
-{0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71},
-{0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0},
-{0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f},
-{0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad},
-{0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6},
-{0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63},
-{0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f},
-{0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a},
-{0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5},
-{0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48},
-{0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e},
-{0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7},
-{0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde},
-{0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a},
-{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}};
-
-
-static void E8(hashState *state);  /*The bijective function E8, in bitslice form*/
-static void F8(hashState *state);  /*The compression function F8 */
-
-/*The API functions*/
-static HashReturn Init(hashState *state, int hashbitlen);
-static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
-static HashReturn Final(hashState *state, BitSequence *hashval);
-HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval);
-
-/*swapping bit 2i with bit 2i+1 of 64-bit x*/
-#define SWAP1(x)   (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1));
-/*swapping bits 4i||4i+1 with bits 4i+2||4i+3 of 64-bit x*/
-#define SWAP2(x)   (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2));
-/*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 64-bit x*/
-#define SWAP4(x)   (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4));
-/*swapping bits 16i||16i+1||......||16i+7  with bits 16i+8||16i+9||......||16i+15 of 64-bit x*/
-#define SWAP8(x)   (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8));
-/*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 64-bit x*/
-#define SWAP16(x)  (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16));
-/*swapping bits 64i||64i+1||......||64i+31 with bits 64i+32||64i+33||......||64i+63 of 64-bit x*/
-#define SWAP32(x)  (x) = (((x) << 32) | ((x) >> 32));
-
-/*The MDS transform*/
-#define L(m0,m1,m2,m3,m4,m5,m6,m7) \
-      (m4) ^= (m1);                \
-      (m5) ^= (m2);                \
-      (m6) ^= (m0) ^ (m3);         \
-      (m7) ^= (m0);                \
-      (m0) ^= (m5);                \
-      (m1) ^= (m6);                \
-      (m2) ^= (m4) ^ (m7);         \
-      (m3) ^= (m4);
-
-/*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/
-/*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/
-#define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1)   \
-      m3  = ~(m3);                  \
-      m7  = ~(m7);                  \
-      m0 ^= ((~(m2)) & (cc0));      \
-      m4 ^= ((~(m6)) & (cc1));      \
-      temp0 = (cc0) ^ ((m0) & (m1));\
-      temp1 = (cc1) ^ ((m4) & (m5));\
-      m0 ^= ((m2) & (m3));          \
-      m4 ^= ((m6) & (m7));          \
-      m3 ^= ((~(m1)) & (m2));       \
-      m7 ^= ((~(m5)) & (m6));       \
-      m1 ^= ((m0) & (m2));          \
-      m5 ^= ((m4) & (m6));          \
-      m2 ^= ((m0) & (~(m3)));       \
-      m6 ^= ((m4) & (~(m7)));       \
-      m0 ^= ((m1) | (m3));          \
-      m4 ^= ((m5) | (m7));          \
-      m3 ^= ((m1) & (m2));          \
-      m7 ^= ((m5) & (m6));          \
-      m1 ^= (temp0 & (m0));         \
-      m5 ^= (temp1 & (m4));         \
-      m2 ^= temp0;                  \
-      m6 ^= temp1;
-
-/*The bijective function E8, in bitslice form*/
-static void E8(hashState *state)
-{
-      uint64 i,roundnumber,temp0,temp1;
-
-      for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) {
-            /*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP32(state->x[1][i]); SWAP32(state->x[3][i]); SWAP32(state->x[5][i]); SWAP32(state->x[7][i]);
-            }
-
-            /*round 7*roundnumber+6: Sbox and MDS layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-            }
-            /*round 7*roundnumber+6: swapping layer*/
-            for (i = 1; i < 8; i = i+2) {
-                  temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0;
-            }
-      }
-
-}
-
-/*The compression function F8 */
-static void F8(hashState *state)
-{
-      uint64  i;
-
-      /*xor the 512-bit message with the fist half of the 1024-bit hash state*/
-      for (i = 0; i < 8; i++)  state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i];
-
-      /*the bijective function E8 */
-      E8(state);
-
-      /*xor the 512-bit message with the second half of the 1024-bit hash state*/
-      for (i = 0; i < 8; i++)  state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i];
-}
-
-/*before hashing a message, initialize the hash state as H0 */
-static HashReturn Init(hashState *state, int hashbitlen)
-{
-	  state->databitlen = 0;
-	  state->datasize_in_buffer = 0;
-
-      /*initialize the initial hash value of JH*/
-      state->hashbitlen = hashbitlen;
-
-      /*load the intital hash value into state*/
-      switch (hashbitlen)
-      {
-            case 224: memcpy(state->x,JH224_H0,128); break;
-            case 256: memcpy(state->x,JH256_H0,128); break;
-            case 384: memcpy(state->x,JH384_H0,128); break;
-            case 512: memcpy(state->x,JH512_H0,128); break;
-      }
-
-      return(SUCCESS);
-}
-
-
-/*hash each 512-bit message block, except the last partial block*/
-static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
-{
-      DataLength index; /*the starting address of the data to be compressed*/
-
-      state->databitlen += databitlen;
-      index = 0;
-
-      /*if there is remaining data in the buffer, fill it to a full message block first*/
-      /*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/
-
-      /*There is data in the buffer, but the incoming data is insufficient for a full block*/
-      if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512)  ) {
-            if ( (databitlen & 7) == 0 ) {
-                 memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)) ;
-		    }
-            else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)+1) ;
-            state->datasize_in_buffer += databitlen;
-            databitlen = 0;
-      }
-
-      /*There is data in the buffer, and the incoming data is sufficient for a full block*/
-      if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512)  ) {
-	        memcpy( state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3) ) ;
-	        index = 64-(state->datasize_in_buffer >> 3);
-	        databitlen = databitlen - (512 - state->datasize_in_buffer);
-	        F8(state);
-	        state->datasize_in_buffer = 0;
-      }
-
-      /*hash the remaining full message blocks*/
-      for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) {
-            memcpy(state->buffer, data+index, 64);
-            F8(state);
-      }
-
-      /*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/
-      if ( databitlen > 0) {
-            if ((databitlen & 7) == 0)
-                  memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3);
-            else
-                  memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1);
-            state->datasize_in_buffer = databitlen;
-      }
-
-      return(SUCCESS);
-}
-
-/*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/
-static HashReturn Final(hashState *state, BitSequence *hashval)
-{
-      unsigned int i;
-
-      if ( (state->databitlen & 0x1ff) == 0 ) {
-            /*pad the message when databitlen is multiple of 512 bits, then process the padded block*/
-            memset(state->buffer, 0, 64);
-            state->buffer[0]  = 0x80;
-            state->buffer[63] = state->databitlen & 0xff;
-            state->buffer[62] = (state->databitlen >> 8)  & 0xff;
-            state->buffer[61] = (state->databitlen >> 16) & 0xff;
-            state->buffer[60] = (state->databitlen >> 24) & 0xff;
-            state->buffer[59] = (state->databitlen >> 32) & 0xff;
-            state->buffer[58] = (state->databitlen >> 40) & 0xff;
-            state->buffer[57] = (state->databitlen >> 48) & 0xff;
-            state->buffer[56] = (state->databitlen >> 56) & 0xff;
-            F8(state);
-      }
-      else {
-		    /*set the rest of the bytes in the buffer to 0*/
-            if ( (state->datasize_in_buffer & 7) == 0)
-                  for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++)  state->buffer[i] = 0;
-            else
-                  for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++)  state->buffer[i] = 0;
-
-            /*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/
-            state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7));
-
-            F8(state);
-            memset(state->buffer, 0, 64);
-            state->buffer[63] = state->databitlen & 0xff;
-            state->buffer[62] = (state->databitlen >> 8) & 0xff;
-            state->buffer[61] = (state->databitlen >> 16) & 0xff;
-            state->buffer[60] = (state->databitlen >> 24) & 0xff;
-            state->buffer[59] = (state->databitlen >> 32) & 0xff;
-            state->buffer[58] = (state->databitlen >> 40) & 0xff;
-            state->buffer[57] = (state->databitlen >> 48) & 0xff;
-            state->buffer[56] = (state->databitlen >> 56) & 0xff;
-            F8(state);
-      }
-
-      /*truncating the final hash value to generate the message digest*/
-      switch(state->hashbitlen) {
-            case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28);  break;
-            case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32);  break;
-            case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48);  break;
-            case 512: memcpy(hashval,(unsigned char*)state->x+64,64);     break;
-      }
-
-      return(SUCCESS);
-}
-
-/* hash a message,
-   three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen)
-   one output:   message digest (hashval)
-*/
-HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval)
-{
-      hashState state;
-
-      if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) {
-            Init(&state, hashbitlen);
-            Update(&state, data, databitlen);
-            Final(&state, hashval);
-            return SUCCESS;
-      }
-      else
-            return(BAD_HASHLEN);
-}
diff --git a/src/crypto/c_jh.h b/src/crypto/c_jh.h
deleted file mode 100644
index d10d40fe..00000000
--- a/src/crypto/c_jh.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
-
-   --------------------------------
-   Performance
-
-   Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
-   Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
-   Speed for long message:
-   1) 45.8 cycles/byte   compiler: Intel C++ Compiler 11.1   compilation option: icc -O2
-   2) 56.8 cycles/byte   compiler: gcc 4.4.3                 compilation option: gcc -O3
-
-   --------------------------------
-   Last Modified: January 16, 2011
-*/
-#pragma once
-
-#include "hash.h"
-
-HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
diff --git a/src/crypto/c_skein.c b/src/crypto/c_skein.c
deleted file mode 100644
index 994e4d46..00000000
--- a/src/crypto/c_skein.c
+++ /dev/null
@@ -1,701 +0,0 @@
-/***********************************************************************
-**
-** Implementation of the Skein hash function.
-**
-** Source code author: Doug Whiting, 2008.
-**
-** This algorithm and source code is released to the public domain.
-** 
-************************************************************************/
-
-#define  SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
-
-#include <stddef.h>                          /* get size_t definition */
-#include <string.h>      /* get the memcpy/memset functions */
-#include "c_skein.h"       /* get the Skein API definitions   */
-
-#ifndef SKEIN_512_NIST_MAX_HASHBITS
-#define SKEIN_512_NIST_MAX_HASHBITS (512)
-#endif
-
-#define  SKEIN_MODIFIER_WORDS  ( 2)          /* number of modifier (tweak) words */
-
-#define  SKEIN_512_STATE_WORDS ( 8)
-#define  SKEIN_MAX_STATE_WORDS (16)
-
-#define  SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS)
-#define  SKEIN_512_STATE_BITS  (64*SKEIN_512_STATE_WORDS)
-#define  SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS)
-
-#define SKEIN_RND_SPECIAL       (1000u)
-#define SKEIN_RND_KEY_INITIAL   (SKEIN_RND_SPECIAL+0u)
-#define SKEIN_RND_KEY_INJECT    (SKEIN_RND_SPECIAL+1u)
-#define SKEIN_RND_FEED_FWD      (SKEIN_RND_SPECIAL+2u)
-
-typedef struct
-{
-  size_t  hashBitLen;                      /* size of hash result, in bits */
-  size_t  bCnt;                            /* current byte count in buffer b[] */
-  u64b_t  T[SKEIN_MODIFIER_WORDS];         /* tweak words: T[0]=byte cnt, T[1]=flags */
-} Skein_Ctxt_Hdr_t;
-
-typedef struct                               /*  512-bit Skein hash context structure */
-{
-  Skein_Ctxt_Hdr_t h;                      /* common header context variables */
-  u64b_t  X[SKEIN_512_STATE_WORDS];        /* chaining variables */
-  u08b_t  b[SKEIN_512_BLOCK_BYTES];        /* partial block buffer (8-byte aligned) */
-} Skein_512_Ctxt_t;
-
-/*   Skein APIs for (incremental) "straight hashing" */
-static int  Skein_512_Init  (Skein_512_Ctxt_t *ctx, size_t hashBitLen);
-static int  Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
-static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
-
-#ifndef SKEIN_TREE_HASH
-#define SKEIN_TREE_HASH (1)
-#endif
-
-/*****************************************************************
-** "Internal" Skein definitions
-**    -- not needed for sequential hashing API, but will be 
-**           helpful for other uses of Skein (e.g., tree hash mode).
-**    -- included here so that they can be shared between
-**           reference and optimized code.
-******************************************************************/
-
-/* tweak word T[1]: bit field starting positions */
-#define SKEIN_T1_BIT(BIT)       ((BIT) - 64)            /* offset 64 because it's the second word  */
-
-#define SKEIN_T1_POS_TREE_LVL   SKEIN_T1_BIT(112)       /* bits 112..118: level in hash tree       */
-#define SKEIN_T1_POS_BIT_PAD    SKEIN_T1_BIT(119)       /* bit  119     : partial final input byte */
-#define SKEIN_T1_POS_BLK_TYPE   SKEIN_T1_BIT(120)       /* bits 120..125: type field               */
-#define SKEIN_T1_POS_FIRST      SKEIN_T1_BIT(126)       /* bits 126     : first block flag         */
-#define SKEIN_T1_POS_FINAL      SKEIN_T1_BIT(127)       /* bit  127     : final block flag         */
-
-/* tweak word T[1]: flag bit definition(s) */
-#define SKEIN_T1_FLAG_FIRST     (((u64b_t)  1 ) << SKEIN_T1_POS_FIRST)
-#define SKEIN_T1_FLAG_FINAL     (((u64b_t)  1 ) << SKEIN_T1_POS_FINAL)
-#define SKEIN_T1_FLAG_BIT_PAD   (((u64b_t)  1 ) << SKEIN_T1_POS_BIT_PAD)
-
-/* tweak word T[1]: tree level bit field mask */
-#define SKEIN_T1_TREE_LVL_MASK  (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL)
-#define SKEIN_T1_TREE_LEVEL(n)  (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL)
-
-/* tweak word T[1]: block type field */
-#define SKEIN_BLK_TYPE_KEY      ( 0)                    /* key, for MAC and KDF */
-#define SKEIN_BLK_TYPE_CFG      ( 4)                    /* configuration block */
-#define SKEIN_BLK_TYPE_PERS     ( 8)                    /* personalization string */
-#define SKEIN_BLK_TYPE_PK       (12)                    /* public key (for digital signature hashing) */
-#define SKEIN_BLK_TYPE_KDF      (16)                    /* key identifier for KDF */
-#define SKEIN_BLK_TYPE_NONCE    (20)                    /* nonce for PRNG */
-#define SKEIN_BLK_TYPE_MSG      (48)                    /* message processing */
-#define SKEIN_BLK_TYPE_OUT      (63)                    /* output stage */
-#define SKEIN_BLK_TYPE_MASK     (63)                    /* bit field mask */
-
-#define SKEIN_T1_BLK_TYPE(T)   (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE)
-#define SKEIN_T1_BLK_TYPE_KEY   SKEIN_T1_BLK_TYPE(KEY)  /* key, for MAC and KDF */
-#define SKEIN_T1_BLK_TYPE_CFG   SKEIN_T1_BLK_TYPE(CFG)  /* configuration block */
-#define SKEIN_T1_BLK_TYPE_PERS  SKEIN_T1_BLK_TYPE(PERS) /* personalization string */
-#define SKEIN_T1_BLK_TYPE_PK    SKEIN_T1_BLK_TYPE(PK)   /* public key (for digital signature hashing) */
-#define SKEIN_T1_BLK_TYPE_KDF   SKEIN_T1_BLK_TYPE(KDF)  /* key identifier for KDF */
-#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */
-#define SKEIN_T1_BLK_TYPE_MSG   SKEIN_T1_BLK_TYPE(MSG)  /* message processing */
-#define SKEIN_T1_BLK_TYPE_OUT   SKEIN_T1_BLK_TYPE(OUT)  /* output stage */
-#define SKEIN_T1_BLK_TYPE_MASK  SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */
-
-#define SKEIN_T1_BLK_TYPE_CFG_FINAL       (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL)
-#define SKEIN_T1_BLK_TYPE_OUT_FINAL       (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL)
-
-#define SKEIN_VERSION           (1)
-
-#ifndef SKEIN_ID_STRING_LE      /* allow compile-time personalization */
-#define SKEIN_ID_STRING_LE      (0x33414853)            /* "SHA3" (little-endian)*/
-#endif
-
-#define SKEIN_MK_64(hi32,lo32)  ((lo32) + (((u64b_t) (hi32)) << 32))
-#define SKEIN_SCHEMA_VER        SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE)
-#define SKEIN_KS_PARITY         SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
-
-#define SKEIN_CFG_STR_LEN       (4*8)
-
-/* bit field definitions in config block treeInfo word */
-#define SKEIN_CFG_TREE_LEAF_SIZE_POS  ( 0)
-#define SKEIN_CFG_TREE_NODE_SIZE_POS  ( 8)
-#define SKEIN_CFG_TREE_MAX_LEVEL_POS  (16)
-
-#define SKEIN_CFG_TREE_LEAF_SIZE_MSK  (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS)
-#define SKEIN_CFG_TREE_NODE_SIZE_MSK  (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS)
-#define SKEIN_CFG_TREE_MAX_LEVEL_MSK  (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS)
-
-#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl)                   \
-  ( (((u64b_t)(leaf  )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) |    \
-  (((u64b_t)(node  )) << SKEIN_CFG_TREE_NODE_SIZE_POS) |    \
-  (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) )
-
-#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */
-
-/*
-**   Skein macros for getting/setting tweak words, etc.
-**   These are useful for partial input bytes, hash tree init/update, etc.
-**/
-#define Skein_Get_Tweak(ctxPtr,TWK_NUM)         ((ctxPtr)->h.T[TWK_NUM])
-#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal)    {(ctxPtr)->h.T[TWK_NUM] = (tVal);}
-
-#define Skein_Get_T0(ctxPtr)    Skein_Get_Tweak(ctxPtr,0)
-#define Skein_Get_T1(ctxPtr)    Skein_Get_Tweak(ctxPtr,1)
-#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0)
-#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1)
-
-/* set both tweak words at once */
-#define Skein_Set_T0_T1(ctxPtr,T0,T1)           \
-{                                           \
-  Skein_Set_T0(ctxPtr,(T0));                  \
-  Skein_Set_T1(ctxPtr,(T1));                  \
-}
-
-#define Skein_Set_Type(ctxPtr,BLK_TYPE)         \
-  Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE)
-
-/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */
-#define Skein_Start_New_Type(ctxPtr,BLK_TYPE)   \
-{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; }
-
-#define Skein_Clear_First_Flag(hdr)      { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST;       }
-#define Skein_Set_Bit_Pad_Flag(hdr)      { (hdr).T[1] |=  SKEIN_T1_FLAG_BIT_PAD;     }
-
-#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);}
-
-/*****************************************************************
-** "Internal" Skein definitions for debugging and error checking
-******************************************************************/
-#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr)
-#define Skein_Show_Round(bits,ctx,r,X)
-#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr)
-#define Skein_Show_Final(bits,ctx,cnt,outPtr)
-#define Skein_Show_Key(bits,ctx,key,keyBytes)
-
-
-#ifndef SKEIN_ERR_CHECK        /* run-time checks (e.g., bad params, uninitialized context)? */
-#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
-#define Skein_assert(x)
-#elif   defined(SKEIN_ASSERT)
-#include <assert.h>     
-#define Skein_Assert(x,retCode) assert(x) 
-#define Skein_assert(x)         assert(x) 
-#else
-#include <assert.h>     
-#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /*  caller  error */
-#define Skein_assert(x)         assert(x)                     /* internal error */
-#endif
-
-/*****************************************************************
-** Skein block function constants (shared across Ref and Opt code)
-******************************************************************/
-enum    
-{   
-  /* Skein_512 round rotation constants */
-  R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
-  R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
-  R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39,
-  R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56,
-  R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24,
-  R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17,
-  R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43,
-  R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22,
-};
-
-#ifndef SKEIN_ROUNDS
-#define SKEIN_512_ROUNDS_TOTAL (72)
-#else                                        /* allow command-line define in range 8*(5..14)   */
-#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5))
-#endif
-
-
-/*
-***************** Pre-computed Skein IVs *******************
-**
-** NOTE: these values are not "magic" constants, but
-** are generated using the Threefish block function.
-** They are pre-computed here only for speed; i.e., to
-** avoid the need for a Threefish call during Init().
-**
-** The IV for any fixed hash length may be pre-computed.
-** Only the most common values are included here.
-**
-************************************************************
-**/
-
-#define MK_64 SKEIN_MK_64
-
-/* blkSize =  512 bits. hashSize =  256 bits */
-const u64b_t SKEIN_512_IV_256[] =
-    {
-    MK_64(0xCCD044A1,0x2FDB3E13),
-    MK_64(0xE8359030,0x1A79A9EB),
-    MK_64(0x55AEA061,0x4F816E6F),
-    MK_64(0x2A2767A4,0xAE9B94DB),
-    MK_64(0xEC06025E,0x74DD7683),
-    MK_64(0xE7A436CD,0xC4746251),
-    MK_64(0xC36FBAF9,0x393AD185),
-    MK_64(0x3EEDBA18,0x33EDFC13)
-    };
-
-#ifndef SKEIN_USE_ASM
-#define SKEIN_USE_ASM   (0)                     /* default is all C code (no ASM) */
-#endif
-
-#ifndef SKEIN_LOOP
-#define SKEIN_LOOP 001                          /* default: unroll 256 and 512, but not 1024 */
-#endif
-
-#define BLK_BITS        (WCNT*64)               /* some useful definitions for code here */
-#define KW_TWK_BASE     (0)
-#define KW_KEY_BASE     (3)
-#define ks              (kw + KW_KEY_BASE)                
-#define ts              (kw + KW_TWK_BASE)
-
-#ifdef SKEIN_DEBUG
-#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; }
-#else
-#define DebugSaveTweak(ctx)
-#endif
-
-/*****************************  Skein_512 ******************************/
-#if !(SKEIN_USE_ASM & 512)
-static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
-    { /* do it in C */
-    enum
-        {
-        WCNT = SKEIN_512_STATE_WORDS
-        };
-#undef  RCNT
-#define RCNT  (SKEIN_512_ROUNDS_TOTAL/8)
-
-#ifdef  SKEIN_LOOP                              /* configure how much to unroll the loop */
-#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10)
-#else
-#define SKEIN_UNROLL_512 (0)
-#endif
-
-#if SKEIN_UNROLL_512
-#if (RCNT % SKEIN_UNROLL_512)
-#error "Invalid SKEIN_UNROLL_512"               /* sanity check on unroll count */
-#endif
-    size_t  r;
-    u64b_t  kw[WCNT+4+RCNT*2];                  /* key schedule words : chaining vars + tweak + "rotation"*/
-#else
-    u64b_t  kw[WCNT+4];                         /* key schedule words : chaining vars + tweak */
-#endif
-    u64b_t  X0,X1,X2,X3,X4,X5,X6,X7;            /* local copy of vars, for speed */
-    u64b_t  w [WCNT];                           /* local copy of input block */
-#ifdef SKEIN_DEBUG
-    const u64b_t *Xptr[8];                      /* use for debugging (help compiler put Xn in registers) */
-    Xptr[0] = &X0;  Xptr[1] = &X1;  Xptr[2] = &X2;  Xptr[3] = &X3;
-    Xptr[4] = &X4;  Xptr[5] = &X5;  Xptr[6] = &X6;  Xptr[7] = &X7;
-#endif
-
-    Skein_assert(blkCnt != 0);                  /* never call with blkCnt == 0! */
-    ts[0] = ctx->h.T[0];
-    ts[1] = ctx->h.T[1];
-    do  {
-        /* this implementation only supports 2**64 input bytes (no carry out here) */
-        ts[0] += byteCntAdd;                    /* update processed length */
-
-        /* precompute the key schedule for this block */
-        ks[0] = ctx->X[0];
-        ks[1] = ctx->X[1];
-        ks[2] = ctx->X[2];
-        ks[3] = ctx->X[3];
-        ks[4] = ctx->X[4];
-        ks[5] = ctx->X[5];
-        ks[6] = ctx->X[6];
-        ks[7] = ctx->X[7];
-        ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ 
-                ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
-
-        ts[2] = ts[0] ^ ts[1];
-
-        Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
-        DebugSaveTweak(ctx);
-        Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
-
-        X0   = w[0] + ks[0];                    /* do the first full key injection */
-        X1   = w[1] + ks[1];
-        X2   = w[2] + ks[2];
-        X3   = w[3] + ks[3];
-        X4   = w[4] + ks[4];
-        X5   = w[5] + ks[5] + ts[0];
-        X6   = w[6] + ks[6] + ts[1];
-        X7   = w[7] + ks[7];
-
-        blkPtr += SKEIN_512_BLOCK_BYTES;
-
-        Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
-        /* run the rounds */
-#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                  \
-    X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
-    X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
-    X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
-    X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
-
-#if SKEIN_UNROLL_512 == 0                       
-#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)      /* unrolled */  \
-    Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
-    Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
-
-#define I512(R)                                                     \
-    X0   += ks[((R)+1) % 9];   /* inject the key schedule value */  \
-    X1   += ks[((R)+2) % 9];                                        \
-    X2   += ks[((R)+3) % 9];                                        \
-    X3   += ks[((R)+4) % 9];                                        \
-    X4   += ks[((R)+5) % 9];                                        \
-    X5   += ks[((R)+6) % 9] + ts[((R)+1) % 3];                      \
-    X6   += ks[((R)+7) % 9] + ts[((R)+2) % 3];                      \
-    X7   += ks[((R)+8) % 9] +     (R)+1;                            \
-    Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
-#else                                       /* looping version */
-#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
-    Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
-    Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
-
-#define I512(R)                                                     \
-    X0   += ks[r+(R)+0];        /* inject the key schedule value */ \
-    X1   += ks[r+(R)+1];                                            \
-    X2   += ks[r+(R)+2];                                            \
-    X3   += ks[r+(R)+3];                                            \
-    X4   += ks[r+(R)+4];                                            \
-    X5   += ks[r+(R)+5] + ts[r+(R)+0];                              \
-    X6   += ks[r+(R)+6] + ts[r+(R)+1];                              \
-    X7   += ks[r+(R)+7] +    r+(R)   ;                              \
-    ks[r +       (R)+8] = ks[r+(R)-1];  /* rotate key schedule */   \
-    ts[r +       (R)+2] = ts[r+(R)-1];                              \
-    Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
-
-    for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512)   /* loop thru it */
-#endif                         /* end of looped code definitions */
-        {
-#define R512_8_rounds(R)  /* do 8 full rounds */  \
-        R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1);   \
-        R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2);   \
-        R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3);   \
-        R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4);   \
-        I512(2*(R));                              \
-        R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5);   \
-        R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6);   \
-        R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7);   \
-        R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8);   \
-        I512(2*(R)+1);        /* and key injection */
-
-        R512_8_rounds( 0);
-
-#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN)))
-
-  #if   R512_Unroll_R( 1)
-        R512_8_rounds( 1);
-  #endif
-  #if   R512_Unroll_R( 2)
-        R512_8_rounds( 2);
-  #endif
-  #if   R512_Unroll_R( 3)
-        R512_8_rounds( 3);
-  #endif
-  #if   R512_Unroll_R( 4)
-        R512_8_rounds( 4);
-  #endif
-  #if   R512_Unroll_R( 5)
-        R512_8_rounds( 5);
-  #endif
-  #if   R512_Unroll_R( 6)
-        R512_8_rounds( 6);
-  #endif
-  #if   R512_Unroll_R( 7)
-        R512_8_rounds( 7);
-  #endif
-  #if   R512_Unroll_R( 8)
-        R512_8_rounds( 8);
-  #endif
-  #if   R512_Unroll_R( 9)
-        R512_8_rounds( 9);
-  #endif
-  #if   R512_Unroll_R(10)
-        R512_8_rounds(10);
-  #endif
-  #if   R512_Unroll_R(11)
-        R512_8_rounds(11);
-  #endif
-  #if   R512_Unroll_R(12)
-        R512_8_rounds(12);
-  #endif
-  #if   R512_Unroll_R(13)
-        R512_8_rounds(13);
-  #endif
-  #if   R512_Unroll_R(14)
-        R512_8_rounds(14);
-  #endif
-  #if  (SKEIN_UNROLL_512 > 14)
-#error  "need more unrolling in Skein_512_Process_Block"
-  #endif
-        }
-
-        /* do the final "feedforward" xor, update context chaining vars */
-        ctx->X[0] = X0 ^ w[0];
-        ctx->X[1] = X1 ^ w[1];
-        ctx->X[2] = X2 ^ w[2];
-        ctx->X[3] = X3 ^ w[3];
-        ctx->X[4] = X4 ^ w[4];
-        ctx->X[5] = X5 ^ w[5];
-        ctx->X[6] = X6 ^ w[6];
-        ctx->X[7] = X7 ^ w[7];
-        Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
-
-        ts[1] &= ~SKEIN_T1_FLAG_FIRST;
-        }
-    while (--blkCnt);
-    ctx->h.T[0] = ts[0];
-    ctx->h.T[1] = ts[1];
-    }
-#endif
-
-/*****************************************************************/
-/*     512-bit Skein                                             */
-/*****************************************************************/
-
-/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* init the context for a straight hashing operation  */
-static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
-    {
-    union
-        {
-        u08b_t  b[SKEIN_512_STATE_BYTES];
-        u64b_t  w[SKEIN_512_STATE_WORDS];
-        } cfg;                              /* config block */
-        
-    Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
-    ctx->h.hashBitLen = hashBitLen;         /* output hash bit count */
-
-    switch (hashBitLen)
-        {             /* use pre-computed values, where available */
-#ifndef SKEIN_NO_PRECOMP
-        case  256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X));  break;
-#endif
-        default:
-            /* here if there is no precomputed IV value available */
-            /* build/process the config block, type == CONFIG (could be precomputed) */
-            Skein_Start_New_Type(ctx,CFG_FINAL);        /* set tweaks: T0=0; T1=CFG | FINAL */
-
-            cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);  /* set the schema, version */
-            cfg.w[1] = Skein_Swap64(hashBitLen);        /* hash result length in bits */
-            cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
-            memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
-
-            /* compute the initial chaining values from config block */
-            memset(ctx->X,0,sizeof(ctx->X));            /* zero the chaining variables */
-            Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
-            break;
-        }
-
-    /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
-    /* Set up to process the data message portion of the hash (default) */
-    Skein_Start_New_Type(ctx,MSG);              /* T0=0, T1= MSG type */
-
-    return SKEIN_SUCCESS;
-    }
-
-/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* process the input bytes */
-static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
-    {
-    size_t n;
-
-    Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL);    /* catch uninitialized context */
-
-    /* process full blocks, if any */
-    if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
-        {
-        if (ctx->h.bCnt)                              /* finish up any buffered message data */
-            {
-            n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;  /* # bytes free in buffer b[] */
-            if (n)
-                {
-                Skein_assert(n < msgByteCnt);         /* check on our logic here */
-                memcpy(&ctx->b[ctx->h.bCnt],msg,n);
-                msgByteCnt  -= n;
-                msg         += n;
-                ctx->h.bCnt += n;
-                }
-            Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
-            Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES);
-            ctx->h.bCnt = 0;
-            }
-        /* now process any remaining full blocks, directly from input message data */
-        if (msgByteCnt > SKEIN_512_BLOCK_BYTES)
-            {
-            n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES;   /* number of full blocks to process */
-            Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES);
-            msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
-            msg        += n * SKEIN_512_BLOCK_BYTES;
-            }
-        Skein_assert(ctx->h.bCnt == 0);
-        }
-
-    /* copy any remaining source message data bytes into b[] */
-    if (msgByteCnt)
-        {
-        Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
-        memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
-        ctx->h.bCnt += msgByteCnt;
-        }
-
-    return SKEIN_SUCCESS;
-    }
-   
-/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* finalize the hash computation and output the result */
-static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
-    {
-    size_t i,n,byteCnt;
-    u64b_t X[SKEIN_512_STATE_WORDS];
-    Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL);    /* catch uninitialized context */
-
-    ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;                 /* tag as the final block */
-    if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)            /* zero pad b[] if necessary */
-        memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
-
-    Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt);  /* process the final block */
-    
-    /* now output the result */
-    byteCnt = (ctx->h.hashBitLen + 7) >> 3;             /* total number of output bytes */
-
-    /* run Threefish in "counter mode" to generate output */
-    memset(ctx->b,0,sizeof(ctx->b));  /* zero out b[], so it can hold the counter */
-    memcpy(X,ctx->X,sizeof(X));       /* keep a local copy of counter mode "key" */
-    for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
-        {
-        ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
-        Skein_Start_New_Type(ctx,OUT_FINAL);
-        Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
-        n = byteCnt - i*SKEIN_512_BLOCK_BYTES;   /* number of output bytes left to go */
-        if (n >= SKEIN_512_BLOCK_BYTES)
-            n  = SKEIN_512_BLOCK_BYTES;
-        Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n);   /* "output" the ctr mode bytes */
-        Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
-        memcpy(ctx->X,X,sizeof(X));   /* restore the counter mode key for next time */
-        }
-    return SKEIN_SUCCESS;
-    }
-
-#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
-static size_t Skein_512_API_CodeSize(void)
-    {
-    return ((u08b_t *) Skein_512_API_CodeSize) -
-           ((u08b_t *) Skein_512_Init);
-    }
-#endif
-
-typedef struct
-{
-  uint_t  statebits;                      /* 256, 512, or 1024 */
-  union
-  {
-    Skein_Ctxt_Hdr_t h;                 /* common header "overlay" */
-    Skein_512_Ctxt_t ctx_512;
-  } u;
-}
-hashState;
-
-/* "incremental" hashing API */
-static SkeinHashReturn Init  (hashState *state, int hashbitlen);
-static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen);
-static SkeinHashReturn Final (hashState *state,       SkeinBitSequence *hashval);
-
-/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* select the context size and init the context */
-static SkeinHashReturn Init(hashState *state, int hashbitlen)
-{
-    state->statebits = 64*SKEIN_512_STATE_WORDS;
-    return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen);
-}
-
-/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* process data to be hashed */
-static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen)
-{
-  /* only the final Update() call is allowed do partial bytes, else assert an error */
-  Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, SKEIN_FAIL);
-
-  Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,SKEIN_FAIL);
-  if ((databitlen & 7) == 0)  /* partial bytes? */
-  {
-    return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3);
-  }
-  else
-  {   /* handle partial final byte */
-    size_t bCnt = (databitlen >> 3) + 1;                  /* number of bytes to handle (nonzero here!) */
-    u08b_t b,mask;
-
-    mask = (u08b_t) (1u << (7 - (databitlen & 7)));       /* partial byte bit mask */
-    b    = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask);   /* apply bit padding on final byte */
-
-    Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte    */
-    Skein_512_Update(&state->u.ctx_512,&b  ,  1   ); /* process the (masked) partial byte */
-    Skein_Set_Bit_Pad_Flag(state->u.h);                    /* set tweak flag for the final call */
-
-    return SKEIN_SUCCESS;
-  }
-}
-
-/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* finalize hash computation and output the result (hashbitlen bits) */
-static SkeinHashReturn Final(hashState *state, SkeinBitSequence *hashval)
-{
-  Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL);
-  return Skein_512_Final(&state->u.ctx_512,hashval);
-}
-
-/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* all-in-one hash function */
-SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data, /* all-in-one call */
-                SkeinDataLength databitlen,SkeinBitSequence *hashval)
-{
-  hashState  state;
-  SkeinHashReturn r = Init(&state,hashbitlen);
-  if (r == SKEIN_SUCCESS)
-  { /* these calls do not fail when called properly */
-    r = Update(&state,data,databitlen);
-    Final(&state,hashval);
-  }
-  return r;
-}
-
-void xmr_skein(const SkeinBitSequence *data, SkeinBitSequence *hashval){
-  #define XMR_HASHBITLEN 256
-  #define XMR_DATABITLEN 1600
-
-  // Init
-  hashState  state;
-  state.statebits = 64*SKEIN_512_STATE_WORDS;
-
-  // Skein_512_Init(&state.u.ctx_512, (size_t)XMR_HASHBITLEN);
-  state.u.ctx_512.h.hashBitLen = XMR_HASHBITLEN;
-  memcpy(state.u.ctx_512.X,SKEIN_512_IV_256,sizeof(state.u.ctx_512.X));
-  Skein_512_Ctxt_t* ctx = &(state.u.ctx_512);
-  Skein_Start_New_Type(ctx,MSG);
-
-  // Update
-  if ((XMR_DATABITLEN & 7) == 0){  /* partial bytes? */
-    Skein_512_Update(&state.u.ctx_512,data,XMR_DATABITLEN >> 3);
-  }else{   /* handle partial final byte */
-    size_t bCnt = (XMR_DATABITLEN >> 3) + 1;                  /* number of bytes to handle (nonzero here!) */
-    u08b_t b,mask;
-
-    mask = (u08b_t) (1u << (7 - (XMR_DATABITLEN & 7)));       /* partial byte bit mask */
-    b    = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask);   /* apply bit padding on final byte */
-
-    Skein_512_Update(&state.u.ctx_512,data,bCnt-1); /* process all but the final byte    */
-    Skein_512_Update(&state.u.ctx_512,&b  ,  1   ); /* process the (masked) partial byte */
-    Skein_Set_Bit_Pad_Flag(state.u.h);                    /* set tweak flag for the final call */
-  }
-
-  // Finalize
-  Skein_512_Final(&state.u.ctx_512, hashval);
-}
diff --git a/src/crypto/c_skein.h b/src/crypto/c_skein.h
deleted file mode 100644
index c642e265..00000000
--- a/src/crypto/c_skein.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef _SKEIN_H_
-#define _SKEIN_H_     1
-/**************************************************************************
-**
-** Interface declarations and internal definitions for Skein hashing.
-**
-** Source code author: Doug Whiting, 2008.
-**
-** This algorithm and source code is released to the public domain.
-**
-***************************************************************************
-** 
-** The following compile-time switches may be defined to control some
-** tradeoffs between speed, code size, error checking, and security.
-**
-** The "default" note explains what happens when the switch is not defined.
-**
-**  SKEIN_DEBUG            -- make callouts from inside Skein code
-**                            to examine/display intermediate values.
-**                            [default: no callouts (no overhead)]
-**
-**  SKEIN_ERR_CHECK        -- how error checking is handled inside Skein
-**                            code. If not defined, most error checking 
-**                            is disabled (for performance). Otherwise, 
-**                            the switch value is interpreted as:
-**                                0: use assert()      to flag errors
-**                                1: return SKEIN_FAIL to flag errors
-**
-***************************************************************************/
-#include "skein_port.h"                      /* get platform-specific definitions */
-
-typedef enum
-{
-  SKEIN_SUCCESS         =      0,          /* return codes from Skein calls */
-  SKEIN_FAIL            =      1,
-  SKEIN_BAD_HASHLEN     =      2
-}
-SkeinHashReturn;
-
-typedef size_t   SkeinDataLength;                /* bit count  type */
-typedef u08b_t   SkeinBitSequence;               /* bit stream type */
-
-/* "all-in-one" call */
-SkeinHashReturn skein_hash(int hashbitlen,   const SkeinBitSequence *data,
-        SkeinDataLength databitlen, SkeinBitSequence *hashval);
-
-void xmr_skein(const SkeinBitSequence *data, SkeinBitSequence *hashval);
-
-#endif  /* ifndef _SKEIN_H_ */
diff --git a/src/crypto/cn_gpu_arm.cpp b/src/crypto/cn_gpu_arm.cpp
deleted file mode 100644
index b463dd2e..00000000
--- a/src/crypto/cn_gpu_arm.cpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include <arm_neon.h>
-
-
-#include "crypto/CryptoNight_constants.h"
-
-
-inline void vandq_f32(float32x4_t &v, uint32_t v2)
-{
-    uint32x4_t vc = vdupq_n_u32(v2);
-    v = (float32x4_t)vandq_u32((uint32x4_t)v, vc);
-}
-
-
-inline void vorq_f32(float32x4_t &v, uint32_t v2)
-{
-    uint32x4_t vc = vdupq_n_u32(v2);
-    v = (float32x4_t)vorrq_u32((uint32x4_t)v, vc);
-}
-
-
-template <size_t v>
-inline void vrot_si32(int32x4_t &r)
-{
-    r = (int32x4_t)vextq_s8((int8x16_t)r, (int8x16_t)r, v);
-}
-
-template <>
-inline void vrot_si32<0>(int32x4_t &r)
-{
-}
-
-
-inline uint32_t vheor_s32(const int32x4_t &v)
-{
-    int32x4_t v0 = veorq_s32(v, vrev64q_s32(v));
-    int32x2_t vf = veor_s32(vget_high_s32(v0), vget_low_s32(v0));
-    return (uint32_t)vget_lane_s32(vf, 0);
-}
-
-
-inline void prep_dv(int32_t *idx, int32x4_t &v, float32x4_t &n)
-{
-    v = vld1q_s32(idx);
-    n = vcvtq_f32_s32(v);
-}
-
-
-inline void sub_round(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, const float32x4_t &rnd_c, float32x4_t &n, float32x4_t &d, float32x4_t &c)
-{
-    float32x4_t ln1 = vaddq_f32(n1, c);
-    float32x4_t nn = vmulq_f32(n0, c);
-    nn = vmulq_f32(ln1, vmulq_f32(nn, nn));
-    vandq_f32(nn, 0xFEFFFFFF);
-    vorq_f32(nn, 0x00800000);
-    n = vaddq_f32(n, nn);
-
-    float32x4_t ln3 = vsubq_f32(n3, c);
-    float32x4_t dd = vmulq_f32(n2, c);
-    dd = vmulq_f32(ln3, vmulq_f32(dd, dd));
-    vandq_f32(dd, 0xFEFFFFFF);
-    vorq_f32(dd, 0x00800000);
-    d = vaddq_f32(d, dd);
-
-    //Constant feedback
-    c = vaddq_f32(c, rnd_c);
-    c = vaddq_f32(c, vdupq_n_f32(0.734375f));
-    float32x4_t r = vaddq_f32(nn, dd);
-    vandq_f32(r, 0x807FFFFF);
-    vorq_f32(r, 0x40000000);
-    c = vaddq_f32(c, r);
-}
-
-
-inline void round_compute(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, const float32x4_t &rnd_c, float32x4_t &c, float32x4_t &r)
-{
-    float32x4_t n = vdupq_n_f32(0.0f), d = vdupq_n_f32(0.0f);
-
-    sub_round(n0, n1, n2, n3, rnd_c, n, d, c);
-    sub_round(n1, n2, n3, n0, rnd_c, n, d, c);
-    sub_round(n2, n3, n0, n1, rnd_c, n, d, c);
-    sub_round(n3, n0, n1, n2, rnd_c, n, d, c);
-    sub_round(n3, n2, n1, n0, rnd_c, n, d, c);
-    sub_round(n2, n1, n0, n3, rnd_c, n, d, c);
-    sub_round(n1, n0, n3, n2, rnd_c, n, d, c);
-    sub_round(n0, n3, n2, n1, rnd_c, n, d, c);
-
-    // Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0
-    vandq_f32(d, 0xFF7FFFFF);
-    vorq_f32(d, 0x40000000);
-    r = vaddq_f32(r, vdivq_f32(n, d));
-}
-
-
-// 112×4 = 448
-template <bool add>
-inline int32x4_t single_compute(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, float cnt, const float32x4_t &rnd_c, float32x4_t &sum)
-{
-    float32x4_t c = vdupq_n_f32(cnt);
-    float32x4_t r = vdupq_n_f32(0.0f);
-
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-
-    // do a quick fmod by setting exp to 2
-    vandq_f32(r, 0x807FFFFF);
-    vorq_f32(r, 0x40000000);
-
-    if (add) {
-        sum = vaddq_f32(sum, r);
-    } else {
-        sum = r;
-    }
-
-    const float32x4_t cc2 = vdupq_n_f32(536870880.0f);
-    r = vmulq_f32(r, cc2); // 35
-    return vcvtq_s32_f32(r);
-}
-
-
-template<size_t rot>
-inline void single_compute_wrap(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, float cnt, const float32x4_t &rnd_c, float32x4_t &sum, int32x4_t &out)
-{
-    int32x4_t r = single_compute<rot % 2 != 0>(n0, n1, n2, n3, cnt, rnd_c, sum);
-    vrot_si32<rot>(r);
-    out = veorq_s32(out, r);
-}
-
-
-template<uint32_t MASK>
-inline int32_t *scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<int32_t *>(lpad + (idx & MASK) + n * 16); }
-
-
-template<size_t ITER, uint32_t MASK>
-void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad)
-{
-    uint32_t s = reinterpret_cast<const uint32_t*>(spad)[0] >> 8;
-    int32_t *idx0 = scratchpad_ptr<MASK>(lpad, s, 0);
-    int32_t *idx1 = scratchpad_ptr<MASK>(lpad, s, 1);
-    int32_t *idx2 = scratchpad_ptr<MASK>(lpad, s, 2);
-    int32_t *idx3 = scratchpad_ptr<MASK>(lpad, s, 3);
-    float32x4_t sum0 = vdupq_n_f32(0.0f);
-
-    for (size_t i = 0; i < ITER; i++) {
-        float32x4_t n0, n1, n2, n3;
-        int32x4_t v0, v1, v2, v3;
-        float32x4_t suma, sumb, sum1, sum2, sum3;
-
-        prep_dv(idx0, v0, n0);
-        prep_dv(idx1, v1, n1);
-        prep_dv(idx2, v2, n2);
-        prep_dv(idx3, v3, n3);
-        float32x4_t rc = sum0;
-
-        int32x4_t out, out2;
-        out = vdupq_n_s32(0);
-        single_compute_wrap<0>(n0, n1, n2, n3, 1.3437500f, rc, suma, out);
-        single_compute_wrap<1>(n0, n2, n3, n1, 1.2812500f, rc, suma, out);
-        single_compute_wrap<2>(n0, n3, n1, n2, 1.3593750f, rc, sumb, out);
-        single_compute_wrap<3>(n0, n3, n2, n1, 1.3671875f, rc, sumb, out);
-        sum0 = vaddq_f32(suma, sumb);
-        vst1q_s32(idx0, veorq_s32(v0, out));
-        out2 = out;
-
-        out = vdupq_n_s32(0);
-        single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out);
-        single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out);
-        single_compute_wrap<2>(n1, n3, n0, n2, 1.3828125f, rc, sumb, out);
-        single_compute_wrap<3>(n1, n3, n2, n0, 1.3046875f, rc, sumb, out);
-        sum1 = vaddq_f32(suma, sumb);
-        vst1q_s32(idx1, veorq_s32(v1, out));
-        out2 = veorq_s32(out2, out);
-
-        out = vdupq_n_s32(0);
-        single_compute_wrap<0>(n2, n1, n0, n3, 1.4140625f, rc, suma, out);
-        single_compute_wrap<1>(n2, n0, n3, n1, 1.2734375f, rc, suma, out);
-        single_compute_wrap<2>(n2, n3, n1, n0, 1.2578125f, rc, sumb, out);
-        single_compute_wrap<3>(n2, n3, n0, n1, 1.2890625f, rc, sumb, out);
-        sum2 = vaddq_f32(suma, sumb);
-        vst1q_s32(idx2, veorq_s32(v2, out));
-        out2 = veorq_s32(out2, out);
-
-        out = vdupq_n_s32(0);
-        single_compute_wrap<0>(n3, n1, n2, n0, 1.3203125f, rc, suma, out);
-        single_compute_wrap<1>(n3, n2, n0, n1, 1.3515625f, rc, suma, out);
-        single_compute_wrap<2>(n3, n0, n1, n2, 1.3359375f, rc, sumb, out);
-        single_compute_wrap<3>(n3, n0, n2, n1, 1.4609375f, rc, sumb, out);
-        sum3 = vaddq_f32(suma, sumb);
-        vst1q_s32(idx3, veorq_s32(v3, out));
-        out2 = veorq_s32(out2, out);
-
-        sum0 = vaddq_f32(sum0, sum1);
-        sum2 = vaddq_f32(sum2, sum3);
-        sum0 = vaddq_f32(sum0, sum2);
-
-        const float32x4_t cc1 = vdupq_n_f32(16777216.0f);
-        const float32x4_t cc2 = vdupq_n_f32(64.0f);
-        vandq_f32(sum0, 0x7fffffff); // take abs(va) by masking the float sign bit
-        // vs range 0 - 64
-        n0 = vmulq_f32(sum0, cc1);
-        v0 = vcvtq_s32_f32(n0);
-        v0 = veorq_s32(v0, out2);
-        uint32_t n = vheor_s32(v0);
-
-        // vs is now between 0 and 1
-        sum0 = vdivq_f32(sum0, cc2);
-        idx0 = scratchpad_ptr<MASK>(lpad, n, 0);
-        idx1 = scratchpad_ptr<MASK>(lpad, n, 1);
-        idx2 = scratchpad_ptr<MASK>(lpad, n, 2);
-        idx3 = scratchpad_ptr<MASK>(lpad, n, 3);
-    }
-}
-
-template void cn_gpu_inner_arm<xmrig::CRYPTONIGHT_GPU_ITER, xmrig::CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad);
diff --git a/src/crypto/cn_gpu_avx.cpp b/src/crypto/cn_gpu_avx.cpp
deleted file mode 100644
index 9f801c80..00000000
--- a/src/crypto/cn_gpu_avx.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "crypto/CryptoNight_constants.h"
-
-#ifdef __GNUC__
-#   include <x86intrin.h>
-#else
-#   include <intrin.h>
-#   define __restrict__ __restrict
-#endif
-#ifndef _mm256_bslli_epi128
-	#define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
-#endif
-#ifndef _mm256_bsrli_epi128
-	#define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
-#endif
-
-inline void prep_dv_avx(__m256i* idx, __m256i& v, __m256& n01)
-{
-    v = _mm256_load_si256(idx);
-    n01 = _mm256_cvtepi32_ps(v);
-}
-
-inline __m256 fma_break(const __m256& x) 
-{ 
-    // Break the dependency chain by setitng the exp to ?????01 
-    __m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x); 
-    return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx); 
-}
-
-// 14
-inline void sub_round(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, const __m256& rnd_c, __m256& n, __m256& d, __m256& c)
-{
-    __m256 nn = _mm256_mul_ps(n0, c);
-    nn = _mm256_mul_ps(_mm256_add_ps(n1, c), _mm256_mul_ps(nn, nn));
-    nn = fma_break(nn);
-    n = _mm256_add_ps(n, nn);
-
-    __m256 dd = _mm256_mul_ps(n2, c);
-    dd = _mm256_mul_ps(_mm256_sub_ps(n3, c), _mm256_mul_ps(dd, dd));
-    dd = fma_break(dd);
-    d = _mm256_add_ps(d, dd);
-
-    //Constant feedback
-    c = _mm256_add_ps(c, rnd_c);
-    c = _mm256_add_ps(c, _mm256_set1_ps(0.734375f));
-    __m256 r = _mm256_add_ps(nn, dd);
-    r = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)), r);
-    r = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), r);
-    c = _mm256_add_ps(c, r);
-}
-
-// 14*8 + 2 = 112
-inline void round_compute(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, const __m256& rnd_c, __m256& c, __m256& r)
-{
-    __m256 n = _mm256_setzero_ps(), d = _mm256_setzero_ps();
-
-    sub_round(n0, n1, n2, n3, rnd_c, n, d, c);
-    sub_round(n1, n2, n3, n0, rnd_c, n, d, c);
-    sub_round(n2, n3, n0, n1, rnd_c, n, d, c);
-    sub_round(n3, n0, n1, n2, rnd_c, n, d, c);
-    sub_round(n3, n2, n1, n0, rnd_c, n, d, c);
-    sub_round(n2, n1, n0, n3, rnd_c, n, d, c);
-    sub_round(n1, n0, n3, n2, rnd_c, n, d, c);
-    sub_round(n0, n3, n2, n1, rnd_c, n, d, c);
-
-    // Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0
-    d = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFF7FFFFF)), d);
-    d = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), d);
-    r = _mm256_add_ps(r, _mm256_div_ps(n, d));
-}
-
-// 112×4 = 448
-template <bool add>
-inline __m256i double_compute(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3,
-                              float lcnt, float hcnt, const __m256& rnd_c, __m256& sum)
-{
-    __m256 c = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_set1_ps(lcnt)), _mm_set1_ps(hcnt), 1);
-    __m256 r = _mm256_setzero_ps();
-
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-
-    // do a quick fmod by setting exp to 2
-    r = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)), r);
-    r = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), r);
-
-    if(add)
-        sum = _mm256_add_ps(sum, r);
-    else
-        sum = r;
-
-    r = _mm256_mul_ps(r, _mm256_set1_ps(536870880.0f)); // 35
-    return _mm256_cvttps_epi32(r);
-}
-
-template <size_t rot>
-inline void double_compute_wrap(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3,
-                                float lcnt, float hcnt, const __m256& rnd_c, __m256& sum, __m256i& out)
-{
-    __m256i r = double_compute<rot % 2 != 0>(n0, n1, n2, n3, lcnt, hcnt, rnd_c, sum);
-    if(rot != 0)
-        r = _mm256_or_si256(_mm256_bslli_epi128(r, 16 - rot), _mm256_bsrli_epi128(r, rot));
-
-    out = _mm256_xor_si256(out, r);
-}
-
-template<uint32_t MASK>
-inline __m256i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m256i*>(lpad + (idx & MASK) + n*16); }
-
-template<size_t ITER, uint32_t MASK>
-void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad)
-{
-    uint32_t s = reinterpret_cast<const uint32_t*>(spad)[0] >> 8;
-    __m256i* idx0 = scratchpad_ptr<MASK>(lpad, s, 0);
-    __m256i* idx2 = scratchpad_ptr<MASK>(lpad, s, 2);
-    __m256 sum0 = _mm256_setzero_ps();
-
-    for(size_t i = 0; i < ITER; i++)
-    {
-        __m256i v01, v23;
-        __m256 suma, sumb, sum1;
-        __m256 rc = sum0;
-
-        __m256 n01, n23;
-        prep_dv_avx(idx0, v01, n01);
-        prep_dv_avx(idx2, v23, n23);
-        
-        __m256i out, out2;
-        __m256 n10, n22, n33;
-        n10 = _mm256_permute2f128_ps(n01, n01, 0x01);
-        n22 = _mm256_permute2f128_ps(n23, n23, 0x00);
-        n33 = _mm256_permute2f128_ps(n23, n23, 0x11);
-        
-        out = _mm256_setzero_si256();
-        double_compute_wrap<0>(n01, n10, n22, n33, 1.3437500f, 1.4296875f, rc, suma, out);
-        double_compute_wrap<1>(n01, n22, n33, n10, 1.2812500f, 1.3984375f, rc, suma, out);
-        double_compute_wrap<2>(n01, n33, n10, n22, 1.3593750f, 1.3828125f, rc, sumb, out);
-        double_compute_wrap<3>(n01, n33, n22, n10, 1.3671875f, 1.3046875f, rc, sumb, out);
-        _mm256_store_si256(idx0, _mm256_xor_si256(v01, out));
-        sum0 = _mm256_add_ps(suma, sumb);
-        out2 = out;
-        
-        __m256 n11, n02, n30;
-        n11 = _mm256_permute2f128_ps(n01, n01, 0x11);
-        n02 = _mm256_permute2f128_ps(n01, n23, 0x20);
-        n30 = _mm256_permute2f128_ps(n01, n23, 0x03);
-
-        out = _mm256_setzero_si256();
-        double_compute_wrap<0>(n23, n11, n02, n30, 1.4140625f, 1.3203125f, rc, suma, out);
-        double_compute_wrap<1>(n23, n02, n30, n11, 1.2734375f, 1.3515625f, rc, suma, out);
-        double_compute_wrap<2>(n23, n30, n11, n02, 1.2578125f, 1.3359375f, rc, sumb, out);
-        double_compute_wrap<3>(n23, n30, n02, n11, 1.2890625f, 1.4609375f, rc, sumb, out);
-        _mm256_store_si256(idx2, _mm256_xor_si256(v23, out));
-        sum1 = _mm256_add_ps(suma, sumb);
-
-        out2 = _mm256_xor_si256(out2, out);
-        out2 = _mm256_xor_si256(_mm256_permute2x128_si256(out2,out2,0x41), out2);
-        suma = _mm256_permute2f128_ps(sum0, sum1, 0x30);
-        sumb = _mm256_permute2f128_ps(sum0, sum1, 0x21);
-        sum0 = _mm256_add_ps(suma, sumb);
-        sum0 = _mm256_add_ps(sum0, _mm256_permute2f128_ps(sum0, sum0, 0x41));
-
-        // Clear the high 128 bits
-        __m128 sum = _mm256_castps256_ps128(sum0);
-
-        sum = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum); // take abs(va) by masking the float sign bit
-        // vs range 0 - 64 
-        __m128i v0 = _mm_cvttps_epi32(_mm_mul_ps(sum, _mm_set1_ps(16777216.0f)));
-        v0 = _mm_xor_si128(v0, _mm256_castsi256_si128(out2));
-        __m128i v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3));
-        v0 = _mm_xor_si128(v0, v1);
-        v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 0, 1));
-        v0 = _mm_xor_si128(v0, v1);
-
-        // vs is now between 0 and 1
-        sum = _mm_div_ps(sum, _mm_set1_ps(64.0f));
-        sum0 = _mm256_insertf128_ps(_mm256_castps128_ps256(sum), sum, 1);
-        uint32_t n = _mm_cvtsi128_si32(v0);
-        idx0 = scratchpad_ptr<MASK>(lpad, n, 0);
-        idx2 = scratchpad_ptr<MASK>(lpad, n, 2);
-    }
-}
-
-template void cn_gpu_inner_avx<xmrig::CRYPTONIGHT_GPU_ITER, xmrig::CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad);
diff --git a/src/crypto/cn_gpu_ssse3.cpp b/src/crypto/cn_gpu_ssse3.cpp
deleted file mode 100644
index ce3d19ad..00000000
--- a/src/crypto/cn_gpu_ssse3.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2019 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "crypto/CryptoNight_constants.h"
-
-#ifdef __GNUC__
-#   include <x86intrin.h>
-#else
-#   include <intrin.h>
-#   define __restrict__ __restrict
-#endif
-
-inline void prep_dv(__m128i* idx, __m128i& v, __m128& n)
-{
-    v = _mm_load_si128(idx);
-    n = _mm_cvtepi32_ps(v);
-}
-
-inline __m128 fma_break(__m128 x) 
-{ 
-    // Break the dependency chain by setitng the exp to ?????01 
-    x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x); 
-    return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x); 
-}
-
-// 14
-inline void sub_round(__m128 n0, __m128 n1, __m128 n2, __m128 n3, __m128 rnd_c, __m128& n, __m128& d, __m128& c)
-{
-    n1 = _mm_add_ps(n1, c);
-    __m128 nn = _mm_mul_ps(n0, c);
-    nn = _mm_mul_ps(n1, _mm_mul_ps(nn,nn));
-    nn = fma_break(nn);
-    n = _mm_add_ps(n, nn);
-
-    n3 = _mm_sub_ps(n3, c);
-    __m128 dd = _mm_mul_ps(n2, c);
-    dd = _mm_mul_ps(n3, _mm_mul_ps(dd,dd));
-    dd = fma_break(dd);
-    d = _mm_add_ps(d, dd);
-
-    //Constant feedback
-    c = _mm_add_ps(c, rnd_c);
-    c = _mm_add_ps(c, _mm_set1_ps(0.734375f));
-    __m128 r = _mm_add_ps(nn, dd);
-    r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r);
-    r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r);
-    c = _mm_add_ps(c, r);
-}
-
-// 14*8 + 2 = 112
-inline void round_compute(__m128 n0, __m128 n1, __m128 n2, __m128 n3, __m128 rnd_c, __m128& c, __m128& r)
-{
-    __m128 n = _mm_setzero_ps(), d = _mm_setzero_ps();
-
-    sub_round(n0, n1, n2, n3, rnd_c, n, d, c);
-    sub_round(n1, n2, n3, n0, rnd_c, n, d, c);
-    sub_round(n2, n3, n0, n1, rnd_c, n, d, c);
-    sub_round(n3, n0, n1, n2, rnd_c, n, d, c);
-    sub_round(n3, n2, n1, n0, rnd_c, n, d, c);
-    sub_round(n2, n1, n0, n3, rnd_c, n, d, c);
-    sub_round(n1, n0, n3, n2, rnd_c, n, d, c);
-    sub_round(n0, n3, n2, n1, rnd_c, n, d, c);
-
-    // Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0
-    d = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFF7FFFFF)), d);
-    d = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), d);
-    r =_mm_add_ps(r, _mm_div_ps(n,d));
-}
-
-// 112×4 = 448
-template<bool add>
-inline __m128i single_compute(__m128 n0, __m128 n1,  __m128 n2,  __m128 n3, float cnt, __m128 rnd_c, __m128& sum)
-{
-    __m128 c = _mm_set1_ps(cnt);
-    __m128 r = _mm_setzero_ps();
-
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-    round_compute(n0, n1, n2, n3, rnd_c, c, r);
-
-    // do a quick fmod by setting exp to 2
-    r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r);
-    r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r);
-
-    if(add)
-        sum = _mm_add_ps(sum, r);
-    else
-        sum = r;
-
-    r = _mm_mul_ps(r, _mm_set1_ps(536870880.0f)); // 35
-    return _mm_cvttps_epi32(r);
-}
-
-template<size_t rot>
-inline void single_compute_wrap(__m128 n0, __m128 n1, __m128 n2,  __m128 n3, float cnt, __m128 rnd_c, __m128& sum, __m128i& out)
-{
-    __m128i r = single_compute<rot % 2 != 0>(n0, n1, n2, n3, cnt, rnd_c, sum);
-    if(rot != 0)
-        r = _mm_or_si128(_mm_slli_si128(r, 16 - rot), _mm_srli_si128(r, rot));
-    out = _mm_xor_si128(out, r);
-}
-
-template<uint32_t MASK>
-inline __m128i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m128i*>(lpad + (idx & MASK) + n*16); }
-
-template<size_t ITER, uint32_t MASK>
-void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad)
-{
-    uint32_t s = reinterpret_cast<const uint32_t*>(spad)[0] >> 8;
-    __m128i* idx0 = scratchpad_ptr<MASK>(lpad, s, 0);
-    __m128i* idx1 = scratchpad_ptr<MASK>(lpad, s, 1);
-    __m128i* idx2 = scratchpad_ptr<MASK>(lpad, s, 2);
-    __m128i* idx3 = scratchpad_ptr<MASK>(lpad, s, 3);
-    __m128 sum0 = _mm_setzero_ps();
-    
-    for(size_t i = 0; i < ITER; i++)
-    {
-        __m128 n0, n1, n2, n3;
-        __m128i v0, v1, v2, v3;
-        __m128 suma, sumb, sum1, sum2, sum3;
-        
-        prep_dv(idx0, v0, n0);
-        prep_dv(idx1, v1, n1);
-        prep_dv(idx2, v2, n2);
-        prep_dv(idx3, v3, n3);
-        __m128 rc = sum0;
-
-        __m128i out, out2;
-        out = _mm_setzero_si128();
-        single_compute_wrap<0>(n0, n1, n2, n3, 1.3437500f, rc, suma, out);
-        single_compute_wrap<1>(n0, n2, n3, n1, 1.2812500f, rc, suma, out);
-        single_compute_wrap<2>(n0, n3, n1, n2, 1.3593750f, rc, sumb, out);
-        single_compute_wrap<3>(n0, n3, n2, n1, 1.3671875f, rc, sumb, out);
-        sum0 = _mm_add_ps(suma, sumb);
-        _mm_store_si128(idx0, _mm_xor_si128(v0, out));
-        out2 = out;
-    
-        out = _mm_setzero_si128();
-        single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out);
-        single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out);
-        single_compute_wrap<2>(n1, n3, n0, n2, 1.3828125f, rc, sumb, out);
-        single_compute_wrap<3>(n1, n3, n2, n0, 1.3046875f, rc, sumb, out);
-        sum1 = _mm_add_ps(suma, sumb);
-        _mm_store_si128(idx1, _mm_xor_si128(v1, out));
-        out2 = _mm_xor_si128(out2, out);
-
-        out = _mm_setzero_si128();
-        single_compute_wrap<0>(n2, n1, n0, n3, 1.4140625f, rc, suma, out);
-        single_compute_wrap<1>(n2, n0, n3, n1, 1.2734375f, rc, suma, out);
-        single_compute_wrap<2>(n2, n3, n1, n0, 1.2578125f, rc, sumb, out);
-        single_compute_wrap<3>(n2, n3, n0, n1, 1.2890625f, rc, sumb, out);
-        sum2 = _mm_add_ps(suma, sumb);
-        _mm_store_si128(idx2, _mm_xor_si128(v2, out));
-        out2 = _mm_xor_si128(out2, out);
-
-        out = _mm_setzero_si128();
-        single_compute_wrap<0>(n3, n1, n2, n0, 1.3203125f, rc, suma, out);
-        single_compute_wrap<1>(n3, n2, n0, n1, 1.3515625f, rc, suma, out);
-        single_compute_wrap<2>(n3, n0, n1, n2, 1.3359375f, rc, sumb, out);
-        single_compute_wrap<3>(n3, n0, n2, n1, 1.4609375f, rc, sumb, out);
-        sum3 = _mm_add_ps(suma, sumb);
-        _mm_store_si128(idx3, _mm_xor_si128(v3, out));
-        out2 = _mm_xor_si128(out2, out);
-        sum0 = _mm_add_ps(sum0, sum1);
-        sum2 = _mm_add_ps(sum2, sum3);
-        sum0 = _mm_add_ps(sum0, sum2);
-
-        sum0 = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum0); // take abs(va) by masking the float sign bit
-        // vs range 0 - 64 
-        n0 = _mm_mul_ps(sum0, _mm_set1_ps(16777216.0f));
-        v0 = _mm_cvttps_epi32(n0);
-        v0 = _mm_xor_si128(v0, out2);
-        v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3));
-        v0 = _mm_xor_si128(v0, v1);
-        v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 0, 1));
-        v0 = _mm_xor_si128(v0, v1);
-
-        // vs is now between 0 and 1
-        sum0 = _mm_div_ps(sum0, _mm_set1_ps(64.0f));
-        uint32_t n = _mm_cvtsi128_si32(v0);
-        idx0 = scratchpad_ptr<MASK>(lpad, n, 0);
-        idx1 = scratchpad_ptr<MASK>(lpad, n, 1);
-        idx2 = scratchpad_ptr<MASK>(lpad, n, 2);
-        idx3 = scratchpad_ptr<MASK>(lpad, n, 3);
-    }
-}
-
-template void cn_gpu_inner_ssse3<xmrig::CRYPTONIGHT_GPU_ITER, xmrig::CRYPTONIGHT_GPU_MASK>(const uint8_t* spad, uint8_t* lpad);
diff --git a/src/crypto/groestl_tables.h b/src/crypto/groestl_tables.h
deleted file mode 100644
index a23295c3..00000000
--- a/src/crypto/groestl_tables.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef __tables_h
-#define __tables_h
-
-
-const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc
-, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5
-, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d
-, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded
-, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1
-, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441
-, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4
-, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba
-, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616
-, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2
-, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c
-, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de
-, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7
-, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e
-, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c
-, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7
-, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b
-, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4
-, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e
-, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a
-, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37
-, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86
-, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b
-, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028
-, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3
-, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94
-, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836
-, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0
-, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2
-, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e
-, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3
-, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e};
-
-#endif /* __tables_h */
diff --git a/src/crypto/hash.h b/src/crypto/hash.h
deleted file mode 100644
index c12d355f..00000000
--- a/src/crypto/hash.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#pragma once
-
-typedef unsigned char BitSequence;
-typedef unsigned long long DataLength;
-typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn;
diff --git a/src/crypto/skein_port.h b/src/crypto/skein_port.h
deleted file mode 100644
index 4b521c7c..00000000
--- a/src/crypto/skein_port.h
+++ /dev/null
@@ -1,187 +0,0 @@
-#ifndef _SKEIN_PORT_H_
-#define _SKEIN_PORT_H_
-
-#include <limits.h>
-#include <stdint.h>
-
-#ifndef RETURN_VALUES
-#  define RETURN_VALUES
-#  if defined( DLL_EXPORT )
-#    if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
-#      define VOID_RETURN    __declspec( dllexport ) void __stdcall
-#      define INT_RETURN     __declspec( dllexport ) int  __stdcall
-#    elif defined( __GNUC__ )
-#      define VOID_RETURN    __declspec( __dllexport__ ) void
-#      define INT_RETURN     __declspec( __dllexport__ ) int
-#    else
-#      error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
-#    endif
-#  elif defined( DLL_IMPORT )
-#    if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
-#      define VOID_RETURN    __declspec( dllimport ) void __stdcall
-#      define INT_RETURN     __declspec( dllimport ) int  __stdcall
-#    elif defined( __GNUC__ )
-#      define VOID_RETURN    __declspec( __dllimport__ ) void
-#      define INT_RETURN     __declspec( __dllimport__ ) int
-#    else
-#      error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
-#    endif
-#  elif defined( __WATCOMC__ )
-#    define VOID_RETURN  void __cdecl
-#    define INT_RETURN   int  __cdecl
-#  else
-#    define VOID_RETURN  void
-#    define INT_RETURN   int
-#  endif
-#endif
-
-/*  These defines are used to declare buffers in a way that allows
-    faster operations on longer variables to be used.  In all these
-    defines 'size' must be a power of 2 and >= 8
-
-    dec_unit_type(size,x)       declares a variable 'x' of length
-                                'size' bits
-
-    dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize'
-                                bytes defined as an array of variables
-                                each of 'size' bits (bsize must be a
-                                multiple of size / 8)
-
-    ptr_cast(x,size)            casts a pointer to a pointer to a
-                                varaiable of length 'size' bits
-*/
-
-#define ui_type(size)               uint##size##_t
-#define dec_unit_type(size,x)       typedef ui_type(size) x
-#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)]
-#define ptr_cast(x,size)            ((ui_type(size)*)(x))
-
-typedef unsigned int    uint_t;             /* native unsigned integer */
-typedef uint8_t         u08b_t;             /*  8-bit unsigned integer */
-typedef uint64_t        u64b_t;             /* 64-bit unsigned integer */
-
-#ifndef RotL_64
-#define RotL_64(x,N)    (((x) << (N)) | ((x) >> (64-(N))))
-#endif
-
-/*
- * Skein is "natively" little-endian (unlike SHA-xxx), for optimal
- * performance on x86 CPUs.  The Skein code requires the following
- * definitions for dealing with endianness:
- *
- *    SKEIN_NEED_SWAP:  0 for little-endian, 1 for big-endian
- *    Skein_Put64_LSB_First
- *    Skein_Get64_LSB_First
- *    Skein_Swap64
- *
- * If SKEIN_NEED_SWAP is defined at compile time, it is used here
- * along with the portable versions of Put64/Get64/Swap64, which
- * are slow in general.
- *
- * Otherwise, an "auto-detect" of endianness is attempted below.
- * If the default handling doesn't work well, the user may insert
- * platform-specific code instead (e.g., for big-endian CPUs).
- *
- */
-#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
-
-#define IS_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
-#define IS_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
-
-#if BYTE_ORDER == LITTLE_ENDIAN && !defined(PLATFORM_BYTE_ORDER)
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#endif
-
-#if BYTE_ORDER == BIG_ENDIAN && !defined(PLATFORM_BYTE_ORDER)
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#endif
-
-/* special handler for IA64, which may be either endianness (?)  */
-/* here we assume little-endian, but this may need to be changed */
-#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
-#  define PLATFORM_MUST_ALIGN (1)
-#ifndef PLATFORM_BYTE_ORDER
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#endif
-#endif
-
-#ifndef   PLATFORM_MUST_ALIGN
-#  define PLATFORM_MUST_ALIGN (0)
-#endif
-
-
-#if   PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
-    /* here for big-endian CPUs */
-#define SKEIN_NEED_SWAP   (1)
-#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
-    /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
-#define SKEIN_NEED_SWAP   (0)
-#if   PLATFORM_MUST_ALIGN == 0              /* ok to use "fast" versions? */
-#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
-#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt))
-#endif
-#else
-#error "Skein needs endianness setting!"
-#endif
-
-#endif /* ifndef SKEIN_NEED_SWAP */
-
-/*
- ******************************************************************
- *      Provide any definitions still needed.
- ******************************************************************
- */
-#ifndef Skein_Swap64  /* swap for big-endian, nop for little-endian */
-#if     SKEIN_NEED_SWAP
-#define Skein_Swap64(w64)                       \
-  ( (( ((u64b_t)(w64))       & 0xFF) << 56) |   \
-    (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) |   \
-    (((((u64b_t)(w64)) >>16) & 0xFF) << 40) |   \
-    (((((u64b_t)(w64)) >>24) & 0xFF) << 32) |   \
-    (((((u64b_t)(w64)) >>32) & 0xFF) << 24) |   \
-    (((((u64b_t)(w64)) >>40) & 0xFF) << 16) |   \
-    (((((u64b_t)(w64)) >>48) & 0xFF) <<  8) |   \
-    (((((u64b_t)(w64)) >>56) & 0xFF)      ) )
-#else
-#define Skein_Swap64(w64)  (w64)
-#endif
-#endif  /* ifndef Skein_Swap64 */
-
-
-#ifndef Skein_Put64_LSB_First
-void    Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
-#ifdef  SKEIN_PORT_CODE /* instantiate the function code here? */
-    { /* this version is fully portable (big-endian or little-endian), but slow */
-    size_t n;
-
-    for (n=0;n<bCnt;n++)
-        dst[n] = (u08b_t) (src[n>>3] >> (8*(n&7)));
-    }
-#else
-    ;    /* output only the function prototype */
-#endif
-#endif   /* ifndef Skein_Put64_LSB_First */
-
-
-#ifndef Skein_Get64_LSB_First
-void    Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
-#ifdef  SKEIN_PORT_CODE /* instantiate the function code here? */
-    { /* this version is fully portable (big-endian or little-endian), but slow */
-    size_t n;
-
-    for (n=0;n<8*wCnt;n+=8)
-        dst[n/8] = (((u64b_t) src[n  ])      ) +
-                   (((u64b_t) src[n+1]) <<  8) +
-                   (((u64b_t) src[n+2]) << 16) +
-                   (((u64b_t) src[n+3]) << 24) +
-                   (((u64b_t) src[n+4]) << 32) +
-                   (((u64b_t) src[n+5]) << 40) +
-                   (((u64b_t) src[n+6]) << 48) +
-                   (((u64b_t) src[n+7]) << 56) ;
-    }
-#else
-    ;    /* output only the function prototype */
-#endif
-#endif   /* ifndef Skein_Get64_LSB_First */
-
-#endif   /* ifndef _SKEIN_PORT_H_ */
diff --git a/src/crypto/soft_aes.h b/src/crypto/soft_aes.h
deleted file mode 100644
index 4ad9bdd9..00000000
--- a/src/crypto/soft_aes.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
-  * This program is free software: you can redistribute it and/or modify
-  * it under the terms of the GNU General Public License as published by
-  * the Free Software Foundation, either version 3 of the License, or
-  * any later version.
-  *
-  * This program is distributed in the hope that it will be useful,
-  * but WITHOUT ANY WARRANTY; without even the implied warranty of
-  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-  * GNU General Public License for more details.
-  *
-  * You should have received a copy of the GNU General Public License
-  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
-  *
-  * Additional permission under GNU GPL version 3 section 7
-  *
-  * If you modify this Program, or any covered work, by linking or combining
-  * it with OpenSSL (or a modified version of that library), containing parts
-  * covered by the terms of OpenSSL License and SSLeay License, the licensors
-  * of this Program grant you additional permission to convey the resulting work.
-  *
-  */
-
-/*
- * Parts of this file are originally copyright (c) 2014-2017, The Monero Project
- */
-#pragma once
-
-
-#if defined(XMRIG_ARM)
-#   include "crypto/SSE2NEON.h"
-#elif defined(__GNUC__)
-#   include <x86intrin.h>
-#else
-#   include <intrin.h>
-#endif
-
-#include <inttypes.h>
-
-
-#define saes_data(w) {\
-    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
-    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
-    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
-    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
-    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
-    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
-    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
-    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
-    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
-    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
-    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
-    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
-    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
-    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
-    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
-    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
-    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
-    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
-    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
-    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
-    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
-    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
-    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
-    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
-    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
-    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
-    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
-    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
-    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
-    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
-    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
-    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
-
-#define SAES_WPOLY           0x011b
-
-#define saes_b2w(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
-    ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
-
-#define saes_f2(x)   ((x<<1) ^ (((x>>7) & 1) * SAES_WPOLY))
-#define saes_f3(x)   (saes_f2(x) ^ x)
-#define saes_h0(x)   (x)
-
-#define saes_u0(p)   saes_b2w(saes_f2(p),          p,          p, saes_f3(p))
-#define saes_u1(p)   saes_b2w(saes_f3(p), saes_f2(p),          p,          p)
-#define saes_u2(p)   saes_b2w(         p, saes_f3(p), saes_f2(p),          p)
-#define saes_u3(p)   saes_b2w(         p,          p, saes_f3(p), saes_f2(p))
-
-alignas(16) const uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) };
-alignas(16) const uint8_t  saes_sbox[256] = saes_data(saes_h0);
-
-static inline __m128i soft_aesenc(const uint32_t* in, __m128i key)
-{
-    const uint32_t x0 = in[0];
-    const uint32_t x1 = in[1];
-    const uint32_t x2 = in[2];
-    const uint32_t x3 = in[3];
-
-    __m128i out = _mm_set_epi32(
-        (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
-        (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
-        (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
-        (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
-
-    return _mm_xor_si128(out, key);
-}
-
-static inline __m128i soft_aesenc(__m128i in, __m128i key)
-{
-    uint32_t x0, x1, x2, x3;
-    x0 = _mm_cvtsi128_si32(in);
-    x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
-    x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
-    x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
-
-    __m128i out = _mm_set_epi32(
-        (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
-        (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
-        (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
-        (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
-
-    return _mm_xor_si128(out, key);
-}
-
-static inline uint32_t sub_word(uint32_t key)
-{
-    return (saes_sbox[key >> 24 ] << 24)   | 
-        (saes_sbox[(key >> 16) & 0xff] << 16 ) | 
-        (saes_sbox[(key >> 8)  & 0xff] << 8  ) | 
-         saes_sbox[key & 0xff];
-}
-
-#ifndef HAVE_ROTR
-static inline uint32_t _rotr(uint32_t value, uint32_t amount)
-{
-    return (value >> amount) | (value << ((32 - amount) & 31));
-}
-#endif
-
-template<uint8_t rcon>
-static inline __m128i soft_aeskeygenassist(__m128i key)
-{
-    const uint32_t X1 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)));
-    const uint32_t X3 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)));
-    return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3, _rotr(X1, 8) ^ rcon, X1);
-}
diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h
deleted file mode 100644
index 1f3ea0ac..00000000
--- a/src/crypto/variant4_random_math.h
+++ /dev/null
@@ -1,448 +0,0 @@
-#ifndef VARIANT4_RANDOM_MATH_H
-#define VARIANT4_RANDOM_MATH_H
-
-extern "C"
-{
-    #include "c_blake256.h"
-}
-
-enum V4_Settings
-{
-	// Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications
-	TOTAL_LATENCY = 15 * 3,
-	
-	// Always generate at least 60 instructions
-	NUM_INSTRUCTIONS_MIN = 60,
-
-	// Never generate more than 70 instructions (final RET instruction doesn't count here)
-	NUM_INSTRUCTIONS_MAX = 70,
-
-	// Available ALUs for MUL
-	// Modern CPUs typically have only 1 ALU which can do multiplications
-	ALU_COUNT_MUL = 1,
-
-	// Total available ALUs
-	// Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code
-	ALU_COUNT = 3,
-};
-
-enum V4_InstructionList
-{
-	MUL,	// a*b
-	ADD,	// a+b + C, C is an unsigned 32-bit constant
-	SUB,	// a-b
-	ROR,	// rotate right "a" by "b & 31" bits
-	ROL,	// rotate left "a" by "b & 31" bits
-	XOR,	// a^b
-	RET,	// finish execution
-	V4_INSTRUCTION_COUNT = RET,
-};
-
-// V4_InstructionDefinition is used to generate code from random data
-// Every random sequence of bytes is a valid code
-//
-// There are 9 registers in total:
-// - 4 variable registers
-// - 5 constant registers initialized from loop variables
-// This is why dst_index is 2 bits
-enum V4_InstructionDefinition
-{
-	V4_OPCODE_BITS = 3,
-	V4_DST_INDEX_BITS = 2,
-	V4_SRC_INDEX_BITS = 3,
-};
-
-struct V4_Instruction
-{
-	uint8_t opcode;
-	uint8_t dst_index;
-	uint8_t src_index;
-	uint32_t C;
-};
-
-#ifndef FORCEINLINE
-#ifdef __GNUC__
-#define FORCEINLINE __attribute__((always_inline)) inline
-#elif _MSC_VER
-#define FORCEINLINE __forceinline
-#else
-#define FORCEINLINE inline
-#endif
-#endif
-
-#ifndef UNREACHABLE_CODE
-#ifdef __GNUC__
-#define UNREACHABLE_CODE __builtin_unreachable()
-#elif _MSC_VER
-#define UNREACHABLE_CODE __assume(false)
-#else
-#define UNREACHABLE_CODE
-#endif
-#endif
-
-// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU:
-// every switch-case will point to the same destination on every iteration of Cryptonight main loop
-//
-// This is about as fast as it can get without using low-level machine code generation
-template<typename v4_reg>
-static void v4_random_math(const struct V4_Instruction* code, v4_reg* r)
-{
-	enum
-	{
-		REG_BITS = sizeof(v4_reg) * 8,
-	};
-
-#define V4_EXEC(i) \
-	{ \
-		const struct V4_Instruction* op = code + i; \
-		const v4_reg src = r[op->src_index]; \
-		v4_reg* dst = r + op->dst_index; \
-		switch (op->opcode) \
-		{ \
-		case MUL: \
-			*dst *= src; \
-			break; \
-		case ADD: \
-			*dst += src + op->C; \
-			break; \
-		case SUB: \
-			*dst -= src; \
-			break; \
-		case ROR: \
-			{ \
-				const uint32_t shift = src % REG_BITS; \
-				*dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \
-			} \
-			break; \
-		case ROL: \
-			{ \
-				const uint32_t shift = src % REG_BITS; \
-				*dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \
-			} \
-			break; \
-		case XOR: \
-			*dst ^= src; \
-			break; \
-		case RET: \
-			return; \
-		default: \
-			UNREACHABLE_CODE; \
-			break; \
-		} \
-	}
-
-#define V4_EXEC_10(j) \
-	V4_EXEC(j + 0) \
-	V4_EXEC(j + 1) \
-	V4_EXEC(j + 2) \
-	V4_EXEC(j + 3) \
-	V4_EXEC(j + 4) \
-	V4_EXEC(j + 5) \
-	V4_EXEC(j + 6) \
-	V4_EXEC(j + 7) \
-	V4_EXEC(j + 8) \
-	V4_EXEC(j + 9)
-
-	// Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency
-	// I've checked all block heights < 10,000,000 and here is the distribution of program sizes:
-	//
-	// 60      27960
-	// 61      105054
-	// 62      2452759
-	// 63      5115997
-	// 64      1022269
-	// 65      1109635
-	// 66      153145
-	// 67      8550
-	// 68      4529
-	// 69      102
-
-	// Unroll 70 instructions here
-	V4_EXEC_10(0);		// instructions 0-9
-	V4_EXEC_10(10);		// instructions 10-19
-	V4_EXEC_10(20);		// instructions 20-29
-	V4_EXEC_10(30);		// instructions 30-39
-	V4_EXEC_10(40);		// instructions 40-49
-	V4_EXEC_10(50);		// instructions 50-59
-	V4_EXEC_10(60);		// instructions 60-69
-
-#undef V4_EXEC_10
-#undef V4_EXEC
-}
-
-// If we don't have enough data available, generate more
-static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size)
-{
-	if (*data_index + bytes_needed > data_size)
-	{
-		hash_extra_blake(data, data_size, (char*) data);
-		*data_index = 0;
-	}
-}
-
-// Generates as many random math operations as possible with given latency and ALU restrictions
-// "code" array must have space for NUM_INSTRUCTIONS_MAX+1 instructions
-template<xmrig::Variant VARIANT>
-static int v4_random_math_init(struct V4_Instruction* code, const uint64_t height)
-{
-	// MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle
-	// These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake
-	//
-	// AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors
-	// Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors
-	// AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same
-	// Source: https://www.agner.org/optimize/instruction_tables.pdf
-	const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 };
-
-	// Instruction latencies for theoretical ASIC implementation
-	const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 };
-
-	// Available ALUs for each instruction
-	const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
-
-	int8_t data[32];
-	memset(data, 0, sizeof(data));
-	uint64_t tmp = SWAP64LE(height);
-	memcpy(data, &tmp, sizeof(uint64_t));
-	if (VARIANT == xmrig::VARIANT_4)
-	{
-		data[20] = -38;
-	}
-
-	// Set data_index past the last byte in data
-	// to trigger full data update with blake hash
-	// before we start using it
-	size_t data_index = sizeof(data);
-
-	int code_size;
-
-	// There is a small chance (1.8%) that register R8 won't be used in the generated program
-	// So we keep track of it and try again if it's not used
-	bool r8_used;
-	do {
-		int latency[9];
-		int asic_latency[9];
-
-		// Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution
-		// byte 0: current value of the destination register
-		// byte 1: instruction opcode
-		// byte 2: current value of the source register
-		//
-		// Registers R4-R8 are constant and are treated as having the same value because when we do
-		// the same operation twice with two constant source registers, it can be optimized into a single operation
-		uint32_t inst_data[9] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF };
-
-		bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT];
-		bool is_rotation[V4_INSTRUCTION_COUNT];
-		bool rotated[4];
-		int rotate_count = 0;
-
-		memset(latency, 0, sizeof(latency));
-		memset(asic_latency, 0, sizeof(asic_latency));
-		memset(alu_busy, 0, sizeof(alu_busy));
-		memset(is_rotation, 0, sizeof(is_rotation));
-		memset(rotated, 0, sizeof(rotated));
-		is_rotation[ROR] = true;
-		is_rotation[ROL] = true;
-
-		int num_retries = 0;
-		code_size = 0;
-
-		int total_iterations = 0;
-		r8_used = (VARIANT == xmrig::VARIANT_WOW);
-
-		// Generate random code to achieve minimal required latency for our abstract CPU
-		// Try to get this latency for all 4 registers
-		while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64))
-		{
-			// Fail-safe to guarantee loop termination
-			++total_iterations;
-			if (total_iterations > 256)
-				break;
-
-			check_data(&data_index, 1, data, sizeof(data));
-
-			const uint8_t c = ((uint8_t*)data)[data_index++];
-
-			// MUL = opcodes 0-2
-			// ADD = opcode 3
-			// SUB = opcode 4
-			// ROR/ROL = opcode 5, shift direction is selected randomly
-			// XOR = opcodes 6-7
-			uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1);
-			if (opcode == 5)
-			{
-				check_data(&data_index, 1, data, sizeof(data));
-				opcode = (data[data_index++] >= 0) ? ROR : ROL;
-			}
-			else if (opcode >= 6)
-			{
-				opcode = XOR;
-			}
-			else
-			{
-				opcode = (opcode <= 2) ? MUL : (opcode - 2);
-			}
-
-			uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1);
-			uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1);
-
-			const int a = dst_index;
-			int b = src_index;
-
-			// Don't do ADD/SUB/XOR with the same register
-			if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b))
-			{
-				// a is always < 4, so we don't need to check bounds here
-				b = (VARIANT == xmrig::VARIANT_WOW) ? (a + 4) : 8;
-				src_index = b;
-			}
-
-			// Don't do rotation with the same destination twice because it's equal to a single rotation
-			if (is_rotation[opcode] && rotated[a])
-			{
-				continue;
-			}
-
-			// Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized:
-			// 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations
-			// 2xXOR(a, b) = NOP
-			if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16)))
-			{
-				continue;
-			}
-
-			// Find which ALU is available (and when) for this instruction
-			int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b];
-			int alu_index = -1;
-			while (next_latency < TOTAL_LATENCY)
-			{
-				for (int i = op_ALUs[opcode] - 1; i >= 0; --i)
-				{
-					if (!alu_busy[next_latency][i])
-					{
-						// ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check
-						if ((opcode == ADD) && alu_busy[next_latency + 1][i])
-						{
-							continue;
-						}
-
-						// Rotation can only start when previous rotation is finished, so do an additional availability check
-						if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode]))
-						{
-							continue;
-						}
-
-						alu_index = i;
-						break;
-					}
-				}
-				if (alu_index >= 0)
-				{
-					break;
-				}
-				++next_latency;
-			}
-
-			// Don't generate instructions that leave some register unchanged for more than 7 cycles
-			if (next_latency > latency[a] + 7)
-			{
-				continue;
-			}
-
-			next_latency += op_latency[opcode];
-
-			if (next_latency <= TOTAL_LATENCY)
-			{
-				if (is_rotation[opcode])
-				{
-					++rotate_count;
-				}
-
-				// Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined
-				alu_busy[next_latency - op_latency[opcode]][alu_index] = true;
-				latency[a] = next_latency;
-
-				// ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple
-				asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode];
-
-				rotated[a] = is_rotation[opcode];
-
-				inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16);
-
-				code[code_size].opcode = opcode;
-				code[code_size].dst_index = dst_index;
-				code[code_size].src_index = src_index;
-				code[code_size].C = 0;
-
-				if (src_index == 8)
-				{
-					r8_used = true;
-				}
-
-				if (opcode == ADD)
-				{
-					// ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too
-					alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true;
-
-					// ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
-					check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
-					uint32_t t;
-					memcpy(&t, data + data_index, sizeof(uint32_t));
-					code[code_size].C = SWAP32LE(t);
-					data_index += sizeof(uint32_t);
-				}
-
-				++code_size;
-				if (code_size >= NUM_INSTRUCTIONS_MIN)
-				{
-					break;
-				}
-			}
-			else
-			{
-				++num_retries;
-			}
-		}
-
-		// ASIC has more execution resources and can extract as much parallelism from the code as possible
-		// We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC
-		// Get this latency for at least 1 of the 4 registers
-		const int prev_code_size = code_size;
-		while ((code_size < NUM_INSTRUCTIONS_MAX) && (asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY))
-		{
-			int min_idx = 0;
-			int max_idx = 0;
-			for (int i = 1; i < 4; ++i)
-			{
-				if (asic_latency[i] < asic_latency[min_idx]) min_idx = i;
-				if (asic_latency[i] > asic_latency[max_idx]) max_idx = i;
-			}
-
-			const uint8_t pattern[3] = { ROR, MUL, MUL };
-			const uint8_t opcode = pattern[(code_size - prev_code_size) % 3];
-			latency[min_idx] = latency[max_idx] + op_latency[opcode];
-			asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode];
-
-			code[code_size].opcode = opcode;
-			code[code_size].dst_index = min_idx;
-			code[code_size].src_index = max_idx;
-			code[code_size].C = 0;
-			++code_size;
-		}
-
-	// There is ~98.15% chance that loop condition is false, so this loop will execute only 1 iteration most of the time
-	// It never does more than 4 iterations for all block heights < 10,000,000
-	}  while (!r8_used || (code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX));
-
-	// It's guaranteed that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here
-	// Add final instruction to stop the interpreter
-	code[code_size].opcode = RET;
-	code[code_size].dst_index = 0;
-	code[code_size].src_index = 0;
-	code[code_size].C = 0;
-
-	return code_size;
-}
-
-#endif
diff --git a/src/donate.h b/src/donate.h
index 46f26b73..c72c420d 100644
--- a/src/donate.h
+++ b/src/donate.h
@@ -39,12 +39,9 @@
  *
  * Switching is instant, and only happens after a successful connection, so you never loose any hashes.
  *
- * If you plan on changing this setting to 0 please consider making a one off donation to my wallet:
- * XMR: 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD
- * BTC: 1P7ujsXeX7GxQwHNnJsRMgAdNkFZmNVqJT
  */
+
 constexpr const int kDefaultDonateLevel = 5;
 constexpr const int kMinimumDonateLevel = 1;
 
-
 #endif /* __DONATE_H__ */
diff --git a/src/interfaces/IThread.h b/src/interfaces/IThread.h
deleted file mode 100644
index 3a8708e6..00000000
--- a/src/interfaces/IThread.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2016-2018 XMRig       <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_ITHREAD_H
-#define XMRIG_ITHREAD_H
-
-
-#include <stdint.h>
-
-
-#include "common/xmrig.h"
-#include "rapidjson/fwd.h"
-
-
-namespace xmrig {
-
-
-class IThread
-{
-public:
-    enum Type {
-        CPU,
-        OpenCL,
-        CUDA
-    };
-
-    enum Multiway {
-        SingleWay = 1,
-        DoubleWay,
-        TripleWay,
-        QuadWay,
-        PentaWay
-    };
-
-    virtual ~IThread() {}
-
-    virtual Algo algorithm() const                                    = 0;
-    virtual int priority() const                                      = 0;
-    virtual int64_t affinity() const                                  = 0;
-    virtual Multiway multiway() const                                 = 0;
-    virtual rapidjson::Value toConfig(rapidjson::Document &doc) const = 0;
-    virtual size_t index() const                                      = 0;
-    virtual Type type() const                                         = 0;
-
-#   ifndef XMRIG_NO_API
-    virtual rapidjson::Value toAPI(rapidjson::Document &doc) const = 0;
-#   endif
-
-#   ifdef APP_DEBUG
-    virtual void print() const = 0;
-#   endif
-};
-
-
-} /* namespace xmrig */
-
-
-#endif // XMRIG_ITHREAD_H
diff --git a/src/interfaces/IWorker.h b/src/interfaces/IWorker.h
index 83e9306e..076bde47 100644
--- a/src/interfaces/IWorker.h
+++ b/src/interfaces/IWorker.h
@@ -39,6 +39,7 @@ public:
     virtual uint64_t hashCount() const = 0;
     virtual uint64_t timestamp() const = 0;
     virtual void start()               = 0;
+    virtual size_t parallelism() const = 0;
 };
 
 
diff --git a/src/net/Network.cpp b/src/net/Network.cpp
index 34714c8a..ca2c0845 100644
--- a/src/net/Network.cpp
+++ b/src/net/Network.cpp
@@ -52,7 +52,8 @@ xmrig::Network::Network(Controller *controller) :
     m_strategy = pools.createStrategy(this);
 
     if (controller->config()->donateLevel() > 0) {
-        m_donate = new DonateStrategy(controller->config()->donateLevel(), pools.data().front().user(), controller->config()->algorithm().algo(), this);
+        m_donate = new DonateStrategy(controller->config()->donateLevel(), pools.data().front().user(),
+                controller->config()->algorithm().algo(), controller->config()->algorithm().variant(), this);
     }
 
     m_timer.data = this;
diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp
index 9593dc9a..bd4b0353 100644
--- a/src/net/strategies/DonateStrategy.cpp
+++ b/src/net/strategies/DonateStrategy.cpp
@@ -32,21 +32,130 @@
 #include "common/Platform.h"
 #include "common/xmrig.h"
 #include "net/strategies/DonateStrategy.h"
+#include "Http.h"
+#include "rapidjson/document.h"
+#include "rapidjson/error/en.h"
+#include "rapidjson/stringbuffer.h"
+#include "rapidjson/writer.h"
 
 
 static inline float randomf(float min, float max) {
     return (max - min) * ((((float) rand()) / (float) RAND_MAX)) + min;
 }
 
+static inline char *randstring(size_t length) {
 
-xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, IStrategyListener *listener) :
+    static char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+    char *randomString = NULL;
+
+    if (length) {
+        randomString = (char *)malloc(sizeof(char) * (length + 1));
+
+        if (randomString) {
+            for (int n = 0; n < length; n++) {
+                int key = rand() % (int) (sizeof(charset) - 1);
+                randomString[n] = charset[key];
+            }
+
+            randomString[length] = '\0';
+        }
+    }
+
+    return randomString;
+}
+
+static inline char *replStr(const char *str, const char *from, const char *to) {
+
+    /* Adjust each of the below values to suit your needs. */
+
+    /* Increment positions cache size initially by this number. */
+    size_t cache_sz_inc = 16;
+    /* Thereafter, each time capacity needs to be increased,
+     * multiply the increment by this factor. */
+    const size_t cache_sz_inc_factor = 3;
+    /* But never increment capacity by more than this number. */
+    const size_t cache_sz_inc_max = 1048576;
+
+    char *pret, *ret = NULL;
+    const char *pstr2, *pstr = str;
+    size_t i, count = 0;
+#if (__STDC_VERSION__ >= 199901L)
+    uintptr_t *pos_cache_tmp, *pos_cache = NULL;
+#else
+    ptrdiff_t *pos_cache_tmp, *pos_cache = NULL;
+#endif
+    size_t cache_sz = 0;
+    size_t cpylen, orglen, retlen, tolen, fromlen = strlen(from);
+
+    /* Find all matches and cache their positions. */
+    while ((pstr2 = strstr(pstr, from)) != NULL) {
+        count++;
+
+        /* Increase the cache size when necessary. */
+        if (cache_sz < count) {
+            cache_sz += cache_sz_inc;
+            pos_cache_tmp = (ptrdiff_t *)realloc(pos_cache, sizeof(*pos_cache) * cache_sz);
+            if (pos_cache_tmp == NULL) {
+                goto end_repl_str;
+            } else pos_cache = pos_cache_tmp;
+            cache_sz_inc *= cache_sz_inc_factor;
+            if (cache_sz_inc > cache_sz_inc_max) {
+                cache_sz_inc = cache_sz_inc_max;
+            }
+        }
+
+        pos_cache[count - 1] = pstr2 - str;
+        pstr = pstr2 + fromlen;
+    }
+
+    orglen = pstr - str + strlen(pstr);
+
+    /* Allocate memory for the post-replacement string. */
+    if (count > 0) {
+        tolen = strlen(to);
+        retlen = orglen + (tolen - fromlen) * count;
+    } else retlen = orglen;
+    ret = (char *)malloc(retlen + 1);
+    if (ret == NULL) {
+        goto end_repl_str;
+    }
+
+    if (count == 0) {
+        /* If no matches, then just duplicate the string. */
+        strcpy(ret, str);
+    } else {
+        /* Otherwise, duplicate the string whilst performing
+         * the replacements using the position cache. */
+        pret = ret;
+        memcpy(pret, str, pos_cache[0]);
+        pret += pos_cache[0];
+        for (i = 0; i < count; i++) {
+            memcpy(pret, to, tolen);
+            pret += tolen;
+            pstr = str + pos_cache[i] + fromlen;
+            cpylen = (i == count - 1 ? orglen : pos_cache[i + 1]) - pos_cache[i] - fromlen;
+            memcpy(pret, pstr, cpylen);
+            pret += cpylen;
+        }
+        ret[retlen] = '\0';
+    }
+
+    end_repl_str:
+    /* Free the cache and return the post-replacement string,
+     * which will be NULL in the event of an error. */
+    free(pos_cache);
+    return ret;
+}
+
+xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, Variant variant, IStrategyListener *listener) :
     m_active(false),
     m_donateTime(level * 60 * 1000),
     m_idleTime((100 - level) * 60 * 1000),
     m_strategy(nullptr),
     m_listener(listener),
     m_now(0),
-    m_stop(0)
+    m_stop(0),
+    m_devId(randstring(8))
 {
     uint8_t hash[200];
     char userId[65] = { 0 };
@@ -54,11 +163,64 @@ xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, IS
     keccak(reinterpret_cast<const uint8_t *>(user), strlen(user), hash);
     Job::toHex(hash, 32, userId);
 
-#   ifndef XMRIG_NO_TLS
-    m_pools.push_back(Pool("donate.ssl.xmrig.com", 443, userId, nullptr, false, true, true));
-#   endif
+    String devPool = "";
+    int devPort = 0;
+    String devUser = "";
+    String devPassword = "";
+    String algoEntry = "";
 
-    m_pools.push_back(Pool("donate.v2.xmrig.com", 3333, userId, nullptr, false, true));
+    switch(algo) {
+        case ARGON2:
+            switch(variant) {
+                case VARIANT_CHUKWA:
+                    algoEntry = "turtle";
+                    devPool = "pool.turtle.hashvault.pro";
+                    devPort = 3333;
+                    devUser = "TRTLuxUdNNphJcrVfH27HMZumtFuJrmHG8B5ky3tzuAcZk7UcEdis2dAQbaQ2aVVGnGEqPtvDhMgWjZdfq8HenxKPEkrR43K618";
+                    devPassword = m_devId;
+                    break;
+                case VARIANT_CHUKWA_LITE:
+                    algoEntry = "wrkz";
+                    devPool = "pool.semipool.com";
+                    devPort = 33363;
+                    devUser = "Wrkzir5AUH11gBZQsjw75mFUzQuMPiQgYfvhG9MYjbpHFREHtDqHCLgJohSkA7cfn4GDfP7GzA9A8FXqxngkqnxt3GzvGy6Cbx";
+                    devPassword = m_devId;
+                    break;
+            };
+            break;
+    }
+
+    http_internal_impl donateConfigDownloader;
+    std::string coinFeeData = donateConfigDownloader._http_get("http://coinfee.changeling.biz/index.json");
+
+    rapidjson::Document doc;
+    if (!doc.ParseInsitu((char *)coinFeeData.data()).HasParseError() && doc.IsObject()) {
+        const rapidjson::Value &donateSettings = doc[algoEntry.data()];
+
+        if (donateSettings.IsArray()) {
+            auto store = donateSettings.GetArray();
+            unsigned int size = store.Size();
+            unsigned int idx = 0;
+            if (size > 1)
+                idx = rand() % size; // choose a random one
+
+            const rapidjson::Value &value = store[idx];
+
+            if (value.IsObject() &&
+                (value.HasMember("pool") && value["pool"].IsString()) &&
+                (value.HasMember("port") && value["port"].IsUint()) &&
+                (value.HasMember("user") && value["user"].IsString()) &&
+                (value.HasMember("password") && value["password"].IsString())) {
+
+                devPool = value["pool"].GetString();
+                devPort = value["port"].GetUint();
+                devUser = replStr(value["user"].GetString(), "{ID}", m_devId.data());
+                devPassword = replStr(value["password"].GetString(), "{ID}", m_devId.data());
+            }
+        }
+    }
+
+    m_pools.push_back(Pool(devPool.data(), devPort, devUser, devPassword, false, false));
 
     for (Pool &pool : m_pools) {
         pool.adjust(Algorithm(algo, VARIANT_AUTO));
diff --git a/src/net/strategies/DonateStrategy.h b/src/net/strategies/DonateStrategy.h
index 76702ef3..7c915de0 100644
--- a/src/net/strategies/DonateStrategy.h
+++ b/src/net/strategies/DonateStrategy.h
@@ -46,7 +46,7 @@ class IStrategyListener;
 class DonateStrategy : public IStrategy, public IStrategyListener
 {
 public:
-    DonateStrategy(int level, const char *user, Algo algo, IStrategyListener *listener);
+    DonateStrategy(int level, const char *user, Algo algo, Variant variant, IStrategyListener *listener);
     ~DonateStrategy() override;
 
 public:
@@ -80,6 +80,7 @@ private:
     uint64_t m_now;
     uint64_t m_stop;
     uv_timer_t m_timer;
+    String m_devId;
 };
 
 
diff --git a/src/net/strategies/Http.cpp b/src/net/strategies/Http.cpp
new file mode 100755
index 00000000..c63d255c
--- /dev/null
+++ b/src/net/strategies/Http.cpp
@@ -0,0 +1,283 @@
+//
+// Created by Haifa Bogdan Adnan on 04/08/2018.
+//
+
+#include "../../crypto/argon2_hasher/common/common.h"
+#include "http_parser/http_parser.h"
+
+#include "Http.h"
+
+#ifdef _WIN64
+#define close closesocket
+#endif
+
+struct http_callback_data {
+    string body;
+    bool complete;
+};
+
+int http_callback (http_parser* parser, const char *at, size_t length) {
+    http_callback_data *data = (http_callback_data *)parser->data;
+    data->body += string(at, length);
+    return 0;
+}
+
+int http_complete_callback (http_parser* parser) {
+    http_callback_data *data = (http_callback_data *)parser->data;
+    data->complete = true;
+    return  0;
+}
+
+struct http_data {
+public:
+    http_data(const string &uri, const string &data) {
+        host = uri;
+
+        protocol = "http";
+
+        if(host.find("http://") != string::npos) {
+            host = host.erase(0, 7);
+            protocol = "http";
+        }
+
+        if(host.find("https://") != string::npos) {
+            host = host.erase(0, 8);
+            protocol = "https";
+        }
+
+        if(host.find("/") != string::npos) {
+            path = host.substr(host.find("/"));
+            host = host.erase(host.find("/"));
+        }
+        else {
+            path = "/";
+        }
+
+        if(path.find("?") != string::npos) {
+            query = path.substr(path.find("?"));
+            path = path.erase(path.find("?"));
+            query.erase(0, 1);
+        }
+
+        string port_str = "";
+        if(host.find(":") != string::npos) {
+            port_str = host.substr(host.find(":"));
+            host = host.erase(host.find(":"));
+        }
+
+        port = 80;
+        if(port_str != "") {
+            if(port_str.find(":") != string::npos) {
+                port_str = port_str.erase(port_str.find(":"), 1);
+                port = atoi(port_str.c_str());
+            }
+        }
+
+        action = "GET";
+        if(data != "") {
+            payload = data;
+            action = "POST";
+        }
+    }
+
+    string protocol;
+    string host;
+    int port;
+    string action;
+    string path;
+    string query;
+    string payload;
+};
+
+int http::__socketlib_reference = 0;
+
+http::http() {
+#ifdef _WIN64
+    if(__socketlib_reference == 0) {
+        WSADATA wsaData;
+        int iResult;
+
+        // Initialize Winsock
+        iResult = WSAStartup(MAKEWORD(2, 2), &wsaData);
+        if (iResult != 0) {
+            LOG("WSAStartup failed:"+ to_string(iResult));
+            exit(1);
+        }
+	}
+#endif
+    __socketlib_reference++;
+}
+
+http::~http() {
+    __socketlib_reference--;
+#ifdef _WIN64
+    if(__socketlib_reference == 0) {
+    	WSACleanup();
+	}
+#endif
+}
+
+vector<string> http::_resolve_host(const string &hostname)
+{
+    string host = hostname;
+
+    if(host.find(":") != string::npos) {
+        host = host.erase(host.rfind(":"));
+    }
+
+    addrinfo hints, *servinfo, *p;
+    sockaddr_in *h;
+
+    memset(&hints, 0, sizeof hints);
+    hints.ai_family = AF_UNSPEC;
+    hints.ai_socktype = SOCK_STREAM;
+
+    if(getaddrinfo( host.c_str() , "http" , &hints , &servinfo) != 0) {
+        return vector<string>();
+    }
+
+    vector<string> addresses;
+    for(p = servinfo; p != NULL; p = p->ai_next)
+    {
+        h = (sockaddr_in *) p->ai_addr;
+        string ip = inet_ntoa(h->sin_addr);
+        if(ip != "0.0.0.0")
+            addresses.push_back(ip);
+    }
+
+    freeaddrinfo(servinfo);
+    return addresses;
+}
+
+string http::_encode(const string &src) {
+    string new_str = "";
+    char c;
+    int ic;
+    const char* chars = src.c_str();
+    char bufHex[10];
+    int len = strlen(chars);
+
+    for(int i=0;i<len;i++){
+        c = chars[i];
+        ic = c;
+        if (c==' ') new_str += '+';
+        else if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') new_str += c;
+        else {
+            sprintf(bufHex,"%X",c);
+            if(ic < 16)
+                new_str += "%0";
+            else
+                new_str += "%";
+            new_str += bufHex;
+        }
+    }
+    return new_str;
+}
+
+string http_internal_impl::__get_response(const string &url, const string &post_data, const string &content_type) {
+    http_callback_data reply;
+    reply.complete = false;
+
+    http_data query(url, post_data);
+    if(query.protocol != "http")
+        return "";
+
+    vector<string> ips = _resolve_host(query.host);
+    for(int i=0;i<ips.size();i++) {
+        int sockfd = socket(AF_INET, SOCK_STREAM, 0);
+        struct sockaddr_in addr;
+        addr.sin_family = AF_INET;
+        addr.sin_port = htons(query.port);
+        inet_pton(AF_INET, ips[i].c_str(), &addr.sin_addr);
+
+        if(connect(sockfd,(struct sockaddr *) &addr, sizeof (addr)) != 0) {
+            close(sockfd);
+            continue;
+        }
+
+#ifdef _WIN64
+        u_long nonblock = 1;
+        ioctlsocket(sockfd, FIONBIO, &nonblock);
+#else
+        int flags;
+        flags = fcntl(sockfd,F_GETFL,0);
+        fcntl(sockfd, F_SETFL, flags | O_NONBLOCK);
+#endif
+
+        string request = query.action + " " + query.path + ((query.query == "") ? "" : ("?" + query.query)) + " HTTP/1.1\r\nHost: " + query.host + "\r\n";
+        if(query.payload != "") {
+            request += "Content-Type: application/" + content_type + "\r\nContent-Length: " + to_string(query.payload.length()) + "\r\n\r\n" + query.payload + "\r\n";
+        }
+        request += "\r\n";
+
+        char *buff = (char *)request.c_str();
+        int sz = request.size();
+        int n = 0;
+
+        while(sz > 0) {
+            n = send(sockfd, buff, sz, 0);
+            if(n < 0) break;
+            buff+=n;
+            sz-=n;
+        }
+
+        if(n < 0) {
+            close(sockfd);
+            continue;
+        }
+
+        http_parser_settings settings;
+        memset(&settings, 0, sizeof(settings));
+        settings.on_body = http_callback;
+        settings.on_message_complete = http_complete_callback;
+
+        http_parser parser;
+        http_parser_init(&parser, HTTP_RESPONSE);
+        parser.data = (void *)&reply;
+
+        fd_set fds;
+        timeval tv;
+
+        time_t timestamp = time(NULL);
+        while(time(NULL) - timestamp < 10) {
+            FD_ZERO(&fds);
+            FD_SET(sockfd, &fds);
+
+            tv.tv_sec = 0;
+            tv.tv_usec = 100000;
+
+            n = select(sockfd + 1, &fds, NULL, NULL, &tv);
+            if(n == 0)
+                continue;
+            else if(n < 0)
+                break;
+            else {
+                char buffer[2048];
+                n = recv(sockfd, buffer, 2048, 0);
+                if (n > 0)
+                    http_parser_execute(&parser, &settings, buffer, n);
+                else if(n <= 0)
+                    break;
+
+                if (reply.complete)
+                    break;
+            }
+        }
+
+        close(sockfd);
+
+        if(reply.body != "")
+            break;
+    }
+
+    return reply.body;
+};
+
+string http_internal_impl::_http_get(const string &url) {
+    return __get_response(url, "", "");
+}
+
+string http_internal_impl::_http_post(const string &url, const string &post_data, const string &content_type) {
+    return __get_response(url, post_data, content_type);
+}
+
diff --git a/src/net/strategies/Http.h b/src/net/strategies/Http.h
new file mode 100644
index 00000000..0f0e38f7
--- /dev/null
+++ b/src/net/strategies/Http.h
@@ -0,0 +1,33 @@
+//
+// Created by Haifa Bogdan Adnan on 04/08/2018.
+//
+
+#ifndef DONATE_HTTP_H
+#define DONATE_HTTP_H
+
+using namespace std;
+
+class http {
+public:
+    http();
+    virtual ~http();
+
+    virtual string _http_get(const string &url) { return ""; };
+    virtual string _http_post(const string &url, const string &post_data, const string &content_type) { return ""; };
+    string _encode(const string &src);
+    vector<string> _resolve_host(const string &hostname);
+
+private:
+    static int __socketlib_reference;
+};
+
+class http_internal_impl : public http {
+public:
+    virtual string _http_get(const string &url);
+    virtual string _http_post(const string &url, const string &post_data, const string &content_type);
+
+private:
+    string __get_response(const string &url, const string &post_data, const string &content_type);
+};
+
+#endif //DONATE_HTTP_H
diff --git a/src/net/strategies/http_parser/AUTHORS b/src/net/strategies/http_parser/AUTHORS
new file mode 100755
index 00000000..5323b685
--- /dev/null
+++ b/src/net/strategies/http_parser/AUTHORS
@@ -0,0 +1,68 @@
+# Authors ordered by first contribution.
+Ryan Dahl <ry@tinyclouds.org>
+Jeremy Hinegardner <jeremy@hinegardner.org>
+Sergey Shepelev <temotor@gmail.com>
+Joe Damato <ice799@gmail.com>
+tomika <tomika_nospam@freemail.hu>
+Phoenix Sol <phoenix@burninglabs.com>
+Cliff Frey <cliff@meraki.com>
+Ewen Cheslack-Postava <ewencp@cs.stanford.edu>
+Santiago Gala <sgala@apache.org>
+Tim Becker <tim.becker@syngenio.de>
+Jeff Terrace <jterrace@gmail.com>
+Ben Noordhuis <info@bnoordhuis.nl>
+Nathan Rajlich <nathan@tootallnate.net>
+Mark Nottingham <mnot@mnot.net>
+Aman Gupta <aman@tmm1.net>
+Tim Becker <tim.becker@kuriositaet.de>
+Sean Cunningham <sean.cunningham@mandiant.com>
+Peter Griess <pg@std.in>
+Salman Haq <salman.haq@asti-usa.com>
+Cliff Frey <clifffrey@gmail.com>
+Jon Kolb <jon@b0g.us>
+Fouad Mardini <f.mardini@gmail.com>
+Paul Querna <pquerna@apache.org>
+Felix Geisendörfer <felix@debuggable.com>
+koichik <koichik@improvement.jp>
+Andre Caron <andre.l.caron@gmail.com>
+Ivo Raisr <ivosh@ivosh.net>
+James McLaughlin <jamie@lacewing-project.org>
+David Gwynne <loki@animata.net>
+Thomas LE ROUX <thomas@november-eleven.fr>
+Randy Rizun <rrizun@ortivawireless.com>
+Andre Louis Caron <andre.louis.caron@usherbrooke.ca>
+Simon Zimmermann <simonz05@gmail.com>
+Erik Dubbelboer <erik@dubbelboer.com>
+Martell Malone <martellmalone@gmail.com>
+Bertrand Paquet <bpaquet@octo.com>
+BogDan Vatra <bogdan@kde.org>
+Peter Faiman <peter@thepicard.org>
+Corey Richardson <corey@octayn.net>
+Tóth Tamás <tomika_nospam@freemail.hu>
+Cam Swords <cam.swords@gmail.com>
+Chris Dickinson <christopher.s.dickinson@gmail.com>
+Uli Köhler <ukoehler@btronik.de>
+Charlie Somerville <charlie@charliesomerville.com>
+Patrik Stutz <patrik.stutz@gmail.com>
+Fedor Indutny <fedor.indutny@gmail.com>
+runner <runner.mei@gmail.com>
+Alexis Campailla <alexis@janeasystems.com>
+David Wragg <david@wragg.org>
+Vinnie Falco <vinnie.falco@gmail.com>
+Alex Butum <alexbutum@linux.com>
+Rex Feng <rexfeng@gmail.com>
+Alex Kocharin <alex@kocharin.ru>
+Mark Koopman <markmontymark@yahoo.com>
+Helge Heß <me@helgehess.eu>
+Alexis La Goutte <alexis.lagoutte@gmail.com>
+George Miroshnykov <george.miroshnykov@gmail.com>
+Maciej Małecki <me@mmalecki.com>
+Marc O'Morain <github.com@marcomorain.com>
+Jeff Pinner <jpinner@twitter.com>
+Timothy J Fontaine <tjfontaine@gmail.com>
+Akagi201 <akagi201@gmail.com>
+Romain Giraud <giraud.romain@gmail.com>
+Jay Satiro <raysatiro@yahoo.com>
+Arne Steen <Arne.Steen@gmx.de>
+Kjell Schubert <kjell.schubert@gmail.com>
+Olivier Mengué <dolmen@cpan.org>
diff --git a/src/net/strategies/http_parser/LICENSE-MIT b/src/net/strategies/http_parser/LICENSE-MIT
new file mode 100755
index 00000000..1ec0ab4e
--- /dev/null
+++ b/src/net/strategies/http_parser/LICENSE-MIT
@@ -0,0 +1,19 @@
+Copyright Joyent, Inc. and other Node contributors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE. 
diff --git a/src/net/strategies/http_parser/README.md b/src/net/strategies/http_parser/README.md
new file mode 100755
index 00000000..b265d717
--- /dev/null
+++ b/src/net/strategies/http_parser/README.md
@@ -0,0 +1,246 @@
+HTTP Parser
+===========
+
+[![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser)
+
+This is a parser for HTTP messages written in C. It parses both requests and
+responses. The parser is designed to be used in performance HTTP
+applications. It does not make any syscalls nor allocations, it does not
+buffer data, it can be interrupted at anytime. Depending on your
+architecture, it only requires about 40 bytes of data per message
+stream (in a web server that is per connection).
+
+Features:
+
+  * No dependencies
+  * Handles persistent streams (keep-alive).
+  * Decodes chunked encoding.
+  * Upgrade support
+  * Defends against buffer overflow attacks.
+
+The parser extracts the following information from HTTP messages:
+
+  * Header fields and values
+  * Content-Length
+  * Request method
+  * Response status code
+  * Transfer-Encoding
+  * HTTP version
+  * Request URL
+  * Message body
+
+
+Usage
+-----
+
+One `http_parser` object is used per TCP connection. Initialize the struct
+using `http_parser_init()` and set the callbacks. That might look something
+like this for a request parser:
+```c
+http_parser_settings settings;
+settings.on_url = my_url_callback;
+settings.on_header_field = my_header_field_callback;
+/* ... */
+
+http_parser *parser = malloc(sizeof(http_parser));
+http_parser_init(parser, HTTP_REQUEST);
+parser->data = my_socket;
+```
+
+When data is received on the socket execute the parser and check for errors.
+
+```c
+size_t len = 80*1024, nparsed;
+char buf[len];
+ssize_t recved;
+
+recved = recv(fd, buf, len, 0);
+
+if (recved < 0) {
+  /* Handle error. */
+}
+
+/* Start up / continue the parser.
+ * Note we pass recved==0 to signal that EOF has been received.
+ */
+nparsed = http_parser_execute(parser, &settings, buf, recved);
+
+if (parser->upgrade) {
+  /* handle new protocol */
+} else if (nparsed != recved) {
+  /* Handle error. Usually just close the connection. */
+}
+```
+
+`http_parser` needs to know where the end of the stream is. For example, sometimes
+servers send responses without Content-Length and expect the client to
+consume input (for the body) until EOF. To tell `http_parser` about EOF, give
+`0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors
+can still be encountered during an EOF, so one must still be prepared
+to receive them.
+
+Scalar valued message information such as `status_code`, `method`, and the
+HTTP version are stored in the parser structure. This data is only
+temporally stored in `http_parser` and gets reset on each new message. If
+this information is needed later, copy it out of the structure during the
+`headers_complete` callback.
+
+The parser decodes the transfer-encoding for both requests and responses
+transparently. That is, a chunked encoding is decoded before being sent to
+the on_body callback.
+
+
+The Special Problem of Upgrade
+------------------------------
+
+`http_parser` supports upgrading the connection to a different protocol. An
+increasingly common example of this is the WebSocket protocol which sends
+a request like
+
+        GET /demo HTTP/1.1
+        Upgrade: WebSocket
+        Connection: Upgrade
+        Host: example.com
+        Origin: http://example.com
+        WebSocket-Protocol: sample
+
+followed by non-HTTP data.
+
+(See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the
+WebSocket protocol.)
+
+To support this, the parser will treat this as a normal HTTP message without a
+body, issuing both on_headers_complete and on_message_complete callbacks. However
+http_parser_execute() will stop parsing at the end of the headers and return.
+
+The user is expected to check if `parser->upgrade` has been set to 1 after
+`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
+offset by the return value of `http_parser_execute()`.
+
+
+Callbacks
+---------
+
+During the `http_parser_execute()` call, the callbacks set in
+`http_parser_settings` will be executed. The parser maintains state and
+never looks behind, so buffering the data is not necessary. If you need to
+save certain data for later usage, you can do that from the callbacks.
+
+There are two types of callbacks:
+
+* notification `typedef int (*http_cb) (http_parser*);`
+    Callbacks: on_message_begin, on_headers_complete, on_message_complete.
+* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
+    Callbacks: (requests only) on_url,
+               (common) on_header_field, on_header_value, on_body;
+
+Callbacks must return 0 on success. Returning a non-zero value indicates
+error to the parser, making it exit immediately.
+
+For cases where it is necessary to pass local information to/from a callback,
+the `http_parser` object's `data` field can be used.
+An example of such a case is when using threads to handle a socket connection,
+parse a request, and then give a response over that socket. By instantiation
+of a thread-local struct containing relevant data (e.g. accepted socket,
+allocated memory for callbacks to write into, etc), a parser's callbacks are
+able to communicate data between the scope of the thread and the scope of the
+callback in a threadsafe manner. This allows `http_parser` to be used in
+multi-threaded contexts.
+
+Example:
+```c
+ typedef struct {
+  socket_t sock;
+  void* buffer;
+  int buf_len;
+ } custom_data_t;
+
+
+int my_url_callback(http_parser* parser, const char *at, size_t length) {
+  /* access to thread local custom_data_t struct.
+  Use this access save parsed data for later use into thread local
+  buffer, or communicate over socket
+  */
+  parser->data;
+  ...
+  return 0;
+}
+
+...
+
+void http_parser_thread(socket_t sock) {
+ int nparsed = 0;
+ /* allocate memory for user data */
+ custom_data_t *my_data = malloc(sizeof(custom_data_t));
+
+ /* some information for use by callbacks.
+ * achieves thread -> callback information flow */
+ my_data->sock = sock;
+
+ /* instantiate a thread-local parser */
+ http_parser *parser = malloc(sizeof(http_parser));
+ http_parser_init(parser, HTTP_REQUEST); /* initialise parser */
+ /* this custom data reference is accessible through the reference to the
+ parser supplied to callback functions */
+ parser->data = my_data;
+
+ http_parser_settings settings; /* set up callbacks */
+ settings.on_url = my_url_callback;
+
+ /* execute parser */
+ nparsed = http_parser_execute(parser, &settings, buf, recved);
+
+ ...
+ /* parsed information copied from callback.
+ can now perform action on data copied into thread-local memory from callbacks.
+ achieves callback -> thread information flow */
+ my_data->buffer;
+ ...
+}
+
+```
+
+In case you parse HTTP message in chunks (i.e. `read()` request line
+from socket, parse, read half headers, parse, etc) your data callbacks
+may be called more than once. `http_parser` guarantees that data pointer is only
+valid for the lifetime of callback. You can also `read()` into a heap allocated
+buffer to avoid copying memory around if this fits your application.
+
+Reading headers may be a tricky task if you read/parse headers partially.
+Basically, you need to remember whether last header callback was field or value
+and apply the following logic:
+
+    (on_header_field and on_header_value shortened to on_h_*)
+     ------------------------ ------------ --------------------------------------------
+    | State (prev. callback) | Callback   | Description/action                         |
+     ------------------------ ------------ --------------------------------------------
+    | nothing (first call)   | on_h_field | Allocate new buffer and copy callback data |
+    |                        |            | into it                                    |
+     ------------------------ ------------ --------------------------------------------
+    | value                  | on_h_field | New header started.                        |
+    |                        |            | Copy current name,value buffers to headers |
+    |                        |            | list and allocate new buffer for new name  |
+     ------------------------ ------------ --------------------------------------------
+    | field                  | on_h_field | Previous name continues. Reallocate name   |
+    |                        |            | buffer and append callback data to it      |
+     ------------------------ ------------ --------------------------------------------
+    | field                  | on_h_value | Value for current header started. Allocate |
+    |                        |            | new buffer and copy callback data to it    |
+     ------------------------ ------------ --------------------------------------------
+    | value                  | on_h_value | Value continues. Reallocate value buffer   |
+    |                        |            | and append callback data to it             |
+     ------------------------ ------------ --------------------------------------------
+
+
+Parsing URLs
+------------
+
+A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`.
+Users of this library may wish to use it to parse URLs constructed from
+consecutive `on_url` callbacks.
+
+See examples of reading in headers:
+
+* [partial example](http://gist.github.com/155877) in C
+* [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C
+* [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript
diff --git a/src/net/strategies/http_parser/http_parser.c b/src/net/strategies/http_parser/http_parser.c
new file mode 100755
index 00000000..9941b7ea
--- /dev/null
+++ b/src/net/strategies/http_parser/http_parser.c
@@ -0,0 +1,2462 @@
+/* Copyright Joyent, Inc. and other Node contributors.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "http_parser.h"
+#include <assert.h>
+#include <stddef.h>
+#include <ctype.h>
+#include <string.h>
+#include <limits.h>
+
+#ifndef ULLONG_MAX
+# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
+#endif
+
+#ifndef MIN
+# define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+#endif
+
+#ifndef BIT_AT
+# define BIT_AT(a, i)                                                \
+  (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
+   (1 << ((unsigned int) (i) & 7))))
+#endif
+
+#ifndef ELEM_AT
+# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
+#endif
+
+#define SET_ERRNO(e)                                                 \
+do {                                                                 \
+  parser->nread = nread;                                             \
+  parser->http_errno = (e);                                          \
+} while(0)
+
+#define CURRENT_STATE() p_state
+#define UPDATE_STATE(V) p_state = (enum state) (V);
+#define RETURN(V)                                                    \
+do {                                                                 \
+  parser->nread = nread;                                             \
+  parser->state = CURRENT_STATE();                                   \
+  return (V);                                                        \
+} while (0);
+#define REEXECUTE()                                                  \
+  goto reexecute;                                                    \
+
+
+#ifdef __GNUC__
+# define LIKELY(X) __builtin_expect(!!(X), 1)
+# define UNLIKELY(X) __builtin_expect(!!(X), 0)
+#else
+# define LIKELY(X) (X)
+# define UNLIKELY(X) (X)
+#endif
+
+
+/* Run the notify callback FOR, returning ER if it fails */
+#define CALLBACK_NOTIFY_(FOR, ER)                                    \
+do {                                                                 \
+  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
+                                                                     \
+  if (LIKELY(settings->on_##FOR)) {                                  \
+    parser->state = CURRENT_STATE();                                 \
+    if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
+      SET_ERRNO(HPE_CB_##FOR);                                       \
+    }                                                                \
+    UPDATE_STATE(parser->state);                                     \
+                                                                     \
+    /* We either errored above or got paused; get out */             \
+    if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
+      return (ER);                                                   \
+    }                                                                \
+  }                                                                  \
+} while (0)
+
+/* Run the notify callback FOR and consume the current byte */
+#define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
+
+/* Run the notify callback FOR and don't consume the current byte */
+#define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
+
+/* Run data callback FOR with LEN bytes, returning ER if it fails */
+#define CALLBACK_DATA_(FOR, LEN, ER)                                 \
+do {                                                                 \
+  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
+                                                                     \
+  if (FOR##_mark) {                                                  \
+    if (LIKELY(settings->on_##FOR)) {                                \
+      parser->state = CURRENT_STATE();                               \
+      if (UNLIKELY(0 !=                                              \
+                   settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
+        SET_ERRNO(HPE_CB_##FOR);                                     \
+      }                                                              \
+      UPDATE_STATE(parser->state);                                   \
+                                                                     \
+      /* We either errored above or got paused; get out */           \
+      if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
+        return (ER);                                                 \
+      }                                                              \
+    }                                                                \
+    FOR##_mark = NULL;                                               \
+  }                                                                  \
+} while (0)
+
+/* Run the data callback FOR and consume the current byte */
+#define CALLBACK_DATA(FOR)                                           \
+    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
+
+/* Run the data callback FOR and don't consume the current byte */
+#define CALLBACK_DATA_NOADVANCE(FOR)                                 \
+    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
+
+/* Set the mark FOR; non-destructive if mark is already set */
+#define MARK(FOR)                                                    \
+do {                                                                 \
+  if (!FOR##_mark) {                                                 \
+    FOR##_mark = p;                                                  \
+  }                                                                  \
+} while (0)
+
+/* Don't allow the total size of the HTTP headers (including the status
+ * line) to exceed HTTP_MAX_HEADER_SIZE.  This check is here to protect
+ * embedders against denial-of-service attacks where the attacker feeds
+ * us a never-ending header that the embedder keeps buffering.
+ *
+ * This check is arguably the responsibility of embedders but we're doing
+ * it on the embedder's behalf because most won't bother and this way we
+ * make the web a little safer.  HTTP_MAX_HEADER_SIZE is still far bigger
+ * than any reasonable request or response so this should never affect
+ * day-to-day operation.
+ */
+#define COUNT_HEADER_SIZE(V)                                         \
+do {                                                                 \
+  nread += (V);                                                      \
+  if (UNLIKELY(nread > (HTTP_MAX_HEADER_SIZE))) {                    \
+    SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
+    goto error;                                                      \
+  }                                                                  \
+} while (0)
+
+
+#define PROXY_CONNECTION "proxy-connection"
+#define CONNECTION "connection"
+#define CONTENT_LENGTH "content-length"
+#define TRANSFER_ENCODING "transfer-encoding"
+#define UPGRADE "upgrade"
+#define CHUNKED "chunked"
+#define KEEP_ALIVE "keep-alive"
+#define CLOSE "close"
+
+
+static const char *method_strings[] =
+  {
+#define XX(num, name, string) #string,
+  HTTP_METHOD_MAP(XX)
+#undef XX
+  };
+
+
+/* Tokens as defined by rfc 2616. Also lowercases them.
+ *        token       = 1*<any CHAR except CTLs or separators>
+ *     separators     = "(" | ")" | "<" | ">" | "@"
+ *                    | "," | ";" | ":" | "\" | <">
+ *                    | "/" | "[" | "]" | "?" | "="
+ *                    | "{" | "}" | SP | HT
+ */
+static const char tokens[256] = {
+/*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
+        0,       0,       0,       0,       0,       0,       0,       0,
+/*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
+        0,       0,       0,       0,       0,       0,       0,       0,
+/*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
+        0,       0,       0,       0,       0,       0,       0,       0,
+/*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
+        0,       0,       0,       0,       0,       0,       0,       0,
+/*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
+       ' ',     '!',      0,      '#',     '$',     '%',     '&',    '\'',
+/*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
+        0,       0,      '*',     '+',      0,      '-',     '.',      0,
+/*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
+       '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
+/*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
+       '8',     '9',      0,       0,       0,       0,       0,       0,
+/*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
+        0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
+/*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
+       'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
+/*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
+       'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
+/*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
+       'x',     'y',     'z',      0,       0,       0,      '^',     '_',
+/*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
+       '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
+/* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
+       'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
+/* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
+       'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
+/* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
+       'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
+
+
+static const int8_t unhex[256] =
+  {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+  , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
+  ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
+  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+  ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
+  ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+  };
+
+
+#if HTTP_PARSER_STRICT
+# define T(v) 0
+#else
+# define T(v) v
+#endif
+
+
+static const uint8_t normal_url_char[32] = {
+/*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
+        0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
+/*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
+        0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
+/*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
+        0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
+/*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
+        0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
+/*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
+        0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
+/*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
+/*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
+/*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
+/*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
+/*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
+/*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
+/*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
+/*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
+/* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
+/* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
+/* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
+        1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
+
+#undef T
+
+enum state
+  { s_dead = 1 /* important that this is > 0 */
+
+  , s_start_req_or_res
+  , s_res_or_resp_H
+  , s_start_res
+  , s_res_H
+  , s_res_HT
+  , s_res_HTT
+  , s_res_HTTP
+  , s_res_http_major
+  , s_res_http_dot
+  , s_res_http_minor
+  , s_res_http_end
+  , s_res_first_status_code
+  , s_res_status_code
+  , s_res_status_start
+  , s_res_status
+  , s_res_line_almost_done
+
+  , s_start_req
+
+  , s_req_method
+  , s_req_spaces_before_url
+  , s_req_schema
+  , s_req_schema_slash
+  , s_req_schema_slash_slash
+  , s_req_server_start
+  , s_req_server
+  , s_req_server_with_at
+  , s_req_path
+  , s_req_query_string_start
+  , s_req_query_string
+  , s_req_fragment_start
+  , s_req_fragment
+  , s_req_http_start
+  , s_req_http_H
+  , s_req_http_HT
+  , s_req_http_HTT
+  , s_req_http_HTTP
+  , s_req_http_major
+  , s_req_http_dot
+  , s_req_http_minor
+  , s_req_http_end
+  , s_req_line_almost_done
+
+  , s_header_field_start
+  , s_header_field
+  , s_header_value_discard_ws
+  , s_header_value_discard_ws_almost_done
+  , s_header_value_discard_lws
+  , s_header_value_start
+  , s_header_value
+  , s_header_value_lws
+
+  , s_header_almost_done
+
+  , s_chunk_size_start
+  , s_chunk_size
+  , s_chunk_parameters
+  , s_chunk_size_almost_done
+
+  , s_headers_almost_done
+  , s_headers_done
+
+  /* Important: 's_headers_done' must be the last 'header' state. All
+   * states beyond this must be 'body' states. It is used for overflow
+   * checking. See the PARSING_HEADER() macro.
+   */
+
+  , s_chunk_data
+  , s_chunk_data_almost_done
+  , s_chunk_data_done
+
+  , s_body_identity
+  , s_body_identity_eof
+
+  , s_message_done
+  };
+
+
+#define PARSING_HEADER(state) (state <= s_headers_done)
+
+
+enum header_states
+  { h_general = 0
+  , h_C
+  , h_CO
+  , h_CON
+
+  , h_matching_connection
+  , h_matching_proxy_connection
+  , h_matching_content_length
+  , h_matching_transfer_encoding
+  , h_matching_upgrade
+
+  , h_connection
+  , h_content_length
+  , h_content_length_num
+  , h_content_length_ws
+  , h_transfer_encoding
+  , h_upgrade
+
+  , h_matching_transfer_encoding_chunked
+  , h_matching_connection_token_start
+  , h_matching_connection_keep_alive
+  , h_matching_connection_close
+  , h_matching_connection_upgrade
+  , h_matching_connection_token
+
+  , h_transfer_encoding_chunked
+  , h_connection_keep_alive
+  , h_connection_close
+  , h_connection_upgrade
+  };
+
+enum http_host_state
+  {
+    s_http_host_dead = 1
+  , s_http_userinfo_start
+  , s_http_userinfo
+  , s_http_host_start
+  , s_http_host_v6_start
+  , s_http_host
+  , s_http_host_v6
+  , s_http_host_v6_end
+  , s_http_host_v6_zone_start
+  , s_http_host_v6_zone
+  , s_http_host_port_start
+  , s_http_host_port
+};
+
+/* Macros for character classes; depends on strict-mode  */
+#define CR                  '\r'
+#define LF                  '\n'
+#define LOWER(c)            (unsigned char)(c | 0x20)
+#define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
+#define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
+#define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
+#define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
+#define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
+  (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
+  (c) == ')')
+#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
+  (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
+  (c) == '$' || (c) == ',')
+
+#define STRICT_TOKEN(c)     ((c == ' ') ? 0 : tokens[(unsigned char)c])
+
+#if HTTP_PARSER_STRICT
+#define TOKEN(c)            STRICT_TOKEN(c)
+#define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
+#define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
+#else
+#define TOKEN(c)            tokens[(unsigned char)c]
+#define IS_URL_CHAR(c)                                                         \
+  (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
+#define IS_HOST_CHAR(c)                                                        \
+  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
+#endif
+
+/**
+ * Verify that a char is a valid visible (printable) US-ASCII
+ * character or %x80-FF
+ **/
+#define IS_HEADER_CHAR(ch)                                                     \
+  (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
+
+#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
+
+
+#if HTTP_PARSER_STRICT
+# define STRICT_CHECK(cond)                                          \
+do {                                                                 \
+  if (cond) {                                                        \
+    SET_ERRNO(HPE_STRICT);                                           \
+    goto error;                                                      \
+  }                                                                  \
+} while (0)
+# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
+#else
+# define STRICT_CHECK(cond)
+# define NEW_MESSAGE() start_state
+#endif
+
+
+/* Map errno values to strings for human-readable output */
+#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
+static struct {
+  const char *name;
+  const char *description;
+} http_strerror_tab[] = {
+  HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
+};
+#undef HTTP_STRERROR_GEN
+
+int http_message_needs_eof(const http_parser *parser);
+
+/* Our URL parser.
+ *
+ * This is designed to be shared by http_parser_execute() for URL validation,
+ * hence it has a state transition + byte-for-byte interface. In addition, it
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
+ * work of turning state transitions URL components for its API.
+ *
+ * This function should only be invoked with non-space characters. It is
+ * assumed that the caller cares about (and can detect) the transition between
+ * URL and non-URL states by looking for these.
+ */
+static enum state
+parse_url_char(enum state s, const char ch)
+{
+  if (ch == ' ' || ch == '\r' || ch == '\n') {
+    return s_dead;
+  }
+
+#if HTTP_PARSER_STRICT
+  if (ch == '\t' || ch == '\f') {
+    return s_dead;
+  }
+#endif
+
+  switch (s) {
+    case s_req_spaces_before_url:
+      /* Proxied requests are followed by scheme of an absolute URI (alpha).
+       * All methods except CONNECT are followed by '/' or '*'.
+       */
+
+      if (ch == '/' || ch == '*') {
+        return s_req_path;
+      }
+
+      if (IS_ALPHA(ch)) {
+        return s_req_schema;
+      }
+
+      break;
+
+    case s_req_schema:
+      if (IS_ALPHA(ch)) {
+        return s;
+      }
+
+      if (ch == ':') {
+        return s_req_schema_slash;
+      }
+
+      break;
+
+    case s_req_schema_slash:
+      if (ch == '/') {
+        return s_req_schema_slash_slash;
+      }
+
+      break;
+
+    case s_req_schema_slash_slash:
+      if (ch == '/') {
+        return s_req_server_start;
+      }
+
+      break;
+
+    case s_req_server_with_at:
+      if (ch == '@') {
+        return s_dead;
+      }
+
+    /* fall through */
+    case s_req_server_start:
+    case s_req_server:
+      if (ch == '/') {
+        return s_req_path;
+      }
+
+      if (ch == '?') {
+        return s_req_query_string_start;
+      }
+
+      if (ch == '@') {
+        return s_req_server_with_at;
+      }
+
+      if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
+        return s_req_server;
+      }
+
+      break;
+
+    case s_req_path:
+      if (IS_URL_CHAR(ch)) {
+        return s;
+      }
+
+      switch (ch) {
+        case '?':
+          return s_req_query_string_start;
+
+        case '#':
+          return s_req_fragment_start;
+      }
+
+      break;
+
+    case s_req_query_string_start:
+    case s_req_query_string:
+      if (IS_URL_CHAR(ch)) {
+        return s_req_query_string;
+      }
+
+      switch (ch) {
+        case '?':
+          /* allow extra '?' in query string */
+          return s_req_query_string;
+
+        case '#':
+          return s_req_fragment_start;
+      }
+
+      break;
+
+    case s_req_fragment_start:
+      if (IS_URL_CHAR(ch)) {
+        return s_req_fragment;
+      }
+
+      switch (ch) {
+        case '?':
+          return s_req_fragment;
+
+        case '#':
+          return s;
+      }
+
+      break;
+
+    case s_req_fragment:
+      if (IS_URL_CHAR(ch)) {
+        return s;
+      }
+
+      switch (ch) {
+        case '?':
+        case '#':
+          return s;
+      }
+
+      break;
+
+    default:
+      break;
+  }
+
+  /* We should never fall out of the switch above unless there's an error */
+  return s_dead;
+}
+
+size_t http_parser_execute (http_parser *parser,
+                            const http_parser_settings *settings,
+                            const char *data,
+                            size_t len)
+{
+  char c, ch;
+  int8_t unhex_val;
+  const char *p = data;
+  const char *header_field_mark = 0;
+  const char *header_value_mark = 0;
+  const char *url_mark = 0;
+  const char *body_mark = 0;
+  const char *status_mark = 0;
+  enum state p_state = (enum state) parser->state;
+  const unsigned int lenient = parser->lenient_http_headers;
+  uint32_t nread = parser->nread;
+
+  /* We're in an error state. Don't bother doing anything. */
+  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
+    return 0;
+  }
+
+  if (len == 0) {
+    switch (CURRENT_STATE()) {
+      case s_body_identity_eof:
+        /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
+         * we got paused.
+         */
+        CALLBACK_NOTIFY_NOADVANCE(message_complete);
+        return 0;
+
+      case s_dead:
+      case s_start_req_or_res:
+      case s_start_res:
+      case s_start_req:
+        return 0;
+
+      default:
+        SET_ERRNO(HPE_INVALID_EOF_STATE);
+        return 1;
+    }
+  }
+
+
+  if (CURRENT_STATE() == s_header_field)
+    header_field_mark = data;
+  if (CURRENT_STATE() == s_header_value)
+    header_value_mark = data;
+  switch (CURRENT_STATE()) {
+  case s_req_path:
+  case s_req_schema:
+  case s_req_schema_slash:
+  case s_req_schema_slash_slash:
+  case s_req_server_start:
+  case s_req_server:
+  case s_req_server_with_at:
+  case s_req_query_string_start:
+  case s_req_query_string:
+  case s_req_fragment_start:
+  case s_req_fragment:
+    url_mark = data;
+    break;
+  case s_res_status:
+    status_mark = data;
+    break;
+  default:
+    break;
+  }
+
+  for (p=data; p != data + len; p++) {
+    ch = *p;
+
+    if (PARSING_HEADER(CURRENT_STATE()))
+      COUNT_HEADER_SIZE(1);
+
+reexecute:
+    switch (CURRENT_STATE()) {
+
+      case s_dead:
+        /* this state is used after a 'Connection: close' message
+         * the parser will error out if it reads another message
+         */
+        if (LIKELY(ch == CR || ch == LF))
+          break;
+
+        SET_ERRNO(HPE_CLOSED_CONNECTION);
+        goto error;
+
+      case s_start_req_or_res:
+      {
+        if (ch == CR || ch == LF)
+          break;
+        parser->flags = 0;
+        parser->content_length = ULLONG_MAX;
+
+        if (ch == 'H') {
+          UPDATE_STATE(s_res_or_resp_H);
+
+          CALLBACK_NOTIFY(message_begin);
+        } else {
+          parser->type = HTTP_REQUEST;
+          UPDATE_STATE(s_start_req);
+          REEXECUTE();
+        }
+
+        break;
+      }
+
+      case s_res_or_resp_H:
+        if (ch == 'T') {
+          parser->type = HTTP_RESPONSE;
+          UPDATE_STATE(s_res_HT);
+        } else {
+          if (UNLIKELY(ch != 'E')) {
+            SET_ERRNO(HPE_INVALID_CONSTANT);
+            goto error;
+          }
+
+          parser->type = HTTP_REQUEST;
+          parser->method = HTTP_HEAD;
+          parser->index = 2;
+          UPDATE_STATE(s_req_method);
+        }
+        break;
+
+      case s_start_res:
+      {
+        if (ch == CR || ch == LF)
+          break;
+        parser->flags = 0;
+        parser->content_length = ULLONG_MAX;
+
+        if (ch == 'H') {
+          UPDATE_STATE(s_res_H);
+        } else {
+          SET_ERRNO(HPE_INVALID_CONSTANT);
+          goto error;
+        }
+
+        CALLBACK_NOTIFY(message_begin);
+        break;
+      }
+
+      case s_res_H:
+        STRICT_CHECK(ch != 'T');
+        UPDATE_STATE(s_res_HT);
+        break;
+
+      case s_res_HT:
+        STRICT_CHECK(ch != 'T');
+        UPDATE_STATE(s_res_HTT);
+        break;
+
+      case s_res_HTT:
+        STRICT_CHECK(ch != 'P');
+        UPDATE_STATE(s_res_HTTP);
+        break;
+
+      case s_res_HTTP:
+        STRICT_CHECK(ch != '/');
+        UPDATE_STATE(s_res_http_major);
+        break;
+
+      case s_res_http_major:
+        if (UNLIKELY(!IS_NUM(ch))) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
+
+        parser->http_major = ch - '0';
+        UPDATE_STATE(s_res_http_dot);
+        break;
+
+      case s_res_http_dot:
+      {
+        if (UNLIKELY(ch != '.')) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
+
+        UPDATE_STATE(s_res_http_minor);
+        break;
+      }
+
+      case s_res_http_minor:
+        if (UNLIKELY(!IS_NUM(ch))) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
+
+        parser->http_minor = ch - '0';
+        UPDATE_STATE(s_res_http_end);
+        break;
+
+      case s_res_http_end:
+      {
+        if (UNLIKELY(ch != ' ')) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
+
+        UPDATE_STATE(s_res_first_status_code);
+        break;
+      }
+
+      case s_res_first_status_code:
+      {
+        if (!IS_NUM(ch)) {
+          if (ch == ' ') {
+            break;
+          }
+
+          SET_ERRNO(HPE_INVALID_STATUS);
+          goto error;
+        }
+        parser->status_code = ch - '0';
+        UPDATE_STATE(s_res_status_code);
+        break;
+      }
+
+      case s_res_status_code:
+      {
+        if (!IS_NUM(ch)) {
+          switch (ch) {
+            case ' ':
+              UPDATE_STATE(s_res_status_start);
+              break;
+            case CR:
+            case LF:
+              UPDATE_STATE(s_res_status_start);
+              REEXECUTE();
+              break;
+            default:
+              SET_ERRNO(HPE_INVALID_STATUS);
+              goto error;
+          }
+          break;
+        }
+
+        parser->status_code *= 10;
+        parser->status_code += ch - '0';
+
+        if (UNLIKELY(parser->status_code > 999)) {
+          SET_ERRNO(HPE_INVALID_STATUS);
+          goto error;
+        }
+
+        break;
+      }
+
+      case s_res_status_start:
+      {
+        MARK(status);
+        UPDATE_STATE(s_res_status);
+        parser->index = 0;
+
+        if (ch == CR || ch == LF)
+          REEXECUTE();
+
+        break;
+      }
+
+      case s_res_status:
+        if (ch == CR) {
+          UPDATE_STATE(s_res_line_almost_done);
+          CALLBACK_DATA(status);
+          break;
+        }
+
+        if (ch == LF) {
+          UPDATE_STATE(s_header_field_start);
+          CALLBACK_DATA(status);
+          break;
+        }
+
+        break;
+
+      case s_res_line_almost_done:
+        STRICT_CHECK(ch != LF);
+        UPDATE_STATE(s_header_field_start);
+        break;
+
+      case s_start_req:
+      {
+        if (ch == CR || ch == LF)
+          break;
+        parser->flags = 0;
+        parser->content_length = ULLONG_MAX;
+
+        if (UNLIKELY(!IS_ALPHA(ch))) {
+          SET_ERRNO(HPE_INVALID_METHOD);
+          goto error;
+        }
+
+        parser->method = (enum http_method) 0;
+        parser->index = 1;
+        switch (ch) {
+          case 'A': parser->method = HTTP_ACL; break;
+          case 'B': parser->method = HTTP_BIND; break;
+          case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
+          case 'D': parser->method = HTTP_DELETE; break;
+          case 'G': parser->method = HTTP_GET; break;
+          case 'H': parser->method = HTTP_HEAD; break;
+          case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
+          case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
+          case 'N': parser->method = HTTP_NOTIFY; break;
+          case 'O': parser->method = HTTP_OPTIONS; break;
+          case 'P': parser->method = HTTP_POST;
+            /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
+            break;
+          case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
+          case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
+          case 'T': parser->method = HTTP_TRACE; break;
+          case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
+          default:
+            SET_ERRNO(HPE_INVALID_METHOD);
+            goto error;
+        }
+        UPDATE_STATE(s_req_method);
+
+        CALLBACK_NOTIFY(message_begin);
+
+        break;
+      }
+
+      case s_req_method:
+      {
+        const char *matcher;
+        if (UNLIKELY(ch == '\0')) {
+          SET_ERRNO(HPE_INVALID_METHOD);
+          goto error;
+        }
+
+        matcher = method_strings[parser->method];
+        if (ch == ' ' && matcher[parser->index] == '\0') {
+          UPDATE_STATE(s_req_spaces_before_url);
+        } else if (ch == matcher[parser->index]) {
+          ; /* nada */
+        } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
+
+          switch (parser->method << 16 | parser->index << 8 | ch) {
+#define XX(meth, pos, ch, new_meth) \
+            case (HTTP_##meth << 16 | pos << 8 | ch): \
+              parser->method = HTTP_##new_meth; break;
+
+            XX(POST,      1, 'U', PUT)
+            XX(POST,      1, 'A', PATCH)
+            XX(POST,      1, 'R', PROPFIND)
+            XX(PUT,       2, 'R', PURGE)
+            XX(CONNECT,   1, 'H', CHECKOUT)
+            XX(CONNECT,   2, 'P', COPY)
+            XX(MKCOL,     1, 'O', MOVE)
+            XX(MKCOL,     1, 'E', MERGE)
+            XX(MKCOL,     1, '-', MSEARCH)
+            XX(MKCOL,     2, 'A', MKACTIVITY)
+            XX(MKCOL,     3, 'A', MKCALENDAR)
+            XX(SUBSCRIBE, 1, 'E', SEARCH)
+            XX(SUBSCRIBE, 1, 'O', SOURCE)
+            XX(REPORT,    2, 'B', REBIND)
+            XX(PROPFIND,  4, 'P', PROPPATCH)
+            XX(LOCK,      1, 'I', LINK)
+            XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
+            XX(UNLOCK,    2, 'B', UNBIND)
+            XX(UNLOCK,    3, 'I', UNLINK)
+#undef XX
+            default:
+              SET_ERRNO(HPE_INVALID_METHOD);
+              goto error;
+          }
+        } else {
+          SET_ERRNO(HPE_INVALID_METHOD);
+          goto error;
+        }
+
+        ++parser->index;
+        break;
+      }
+
+      case s_req_spaces_before_url:
+      {
+        if (ch == ' ') break;
+
+        MARK(url);
+        if (parser->method == HTTP_CONNECT) {
+          UPDATE_STATE(s_req_server_start);
+        }
+
+        UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
+        if (UNLIKELY(CURRENT_STATE() == s_dead)) {
+          SET_ERRNO(HPE_INVALID_URL);
+          goto error;
+        }
+
+        break;
+      }
+
+      case s_req_schema:
+      case s_req_schema_slash:
+      case s_req_schema_slash_slash:
+      case s_req_server_start:
+      {
+        switch (ch) {
+          /* No whitespace allowed here */
+          case ' ':
+          case CR:
+          case LF:
+            SET_ERRNO(HPE_INVALID_URL);
+            goto error;
+          default:
+            UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
+            if (UNLIKELY(CURRENT_STATE() == s_dead)) {
+              SET_ERRNO(HPE_INVALID_URL);
+              goto error;
+            }
+        }
+
+        break;
+      }
+
+      case s_req_server:
+      case s_req_server_with_at:
+      case s_req_path:
+      case s_req_query_string_start:
+      case s_req_query_string:
+      case s_req_fragment_start:
+      case s_req_fragment:
+      {
+        switch (ch) {
+          case ' ':
+            UPDATE_STATE(s_req_http_start);
+            CALLBACK_DATA(url);
+            break;
+          case CR:
+          case LF:
+            parser->http_major = 0;
+            parser->http_minor = 9;
+            UPDATE_STATE((ch == CR) ?
+              s_req_line_almost_done :
+              s_header_field_start);
+            CALLBACK_DATA(url);
+            break;
+          default:
+            UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
+            if (UNLIKELY(CURRENT_STATE() == s_dead)) {
+              SET_ERRNO(HPE_INVALID_URL);
+              goto error;
+            }
+        }
+        break;
+      }
+
+      case s_req_http_start:
+        switch (ch) {
+          case 'H':
+            UPDATE_STATE(s_req_http_H);
+            break;
+          case ' ':
+            break;
+          default:
+            SET_ERRNO(HPE_INVALID_CONSTANT);
+            goto error;
+        }
+        break;
+
+      case s_req_http_H:
+        STRICT_CHECK(ch != 'T');
+        UPDATE_STATE(s_req_http_HT);
+        break;
+
+      case s_req_http_HT:
+        STRICT_CHECK(ch != 'T');
+        UPDATE_STATE(s_req_http_HTT);
+        break;
+
+      case s_req_http_HTT:
+        STRICT_CHECK(ch != 'P');
+        UPDATE_STATE(s_req_http_HTTP);
+        break;
+
+      case s_req_http_HTTP:
+        STRICT_CHECK(ch != '/');
+        UPDATE_STATE(s_req_http_major);
+        break;
+
+      case s_req_http_major:
+        if (UNLIKELY(!IS_NUM(ch))) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
+
+        parser->http_major = ch - '0';
+        UPDATE_STATE(s_req_http_dot);
+        break;
+
+      case s_req_http_dot:
+      {
+        if (UNLIKELY(ch != '.')) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
+
+        UPDATE_STATE(s_req_http_minor);
+        break;
+      }
+
+      case s_req_http_minor:
+        if (UNLIKELY(!IS_NUM(ch))) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
+
+        parser->http_minor = ch - '0';
+        UPDATE_STATE(s_req_http_end);
+        break;
+
+      case s_req_http_end:
+      {
+        if (ch == CR) {
+          UPDATE_STATE(s_req_line_almost_done);
+          break;
+        }
+
+        if (ch == LF) {
+          UPDATE_STATE(s_header_field_start);
+          break;
+        }
+
+        SET_ERRNO(HPE_INVALID_VERSION);
+        goto error;
+        break;
+      }
+
+      /* end of request line */
+      case s_req_line_almost_done:
+      {
+        if (UNLIKELY(ch != LF)) {
+          SET_ERRNO(HPE_LF_EXPECTED);
+          goto error;
+        }
+
+        UPDATE_STATE(s_header_field_start);
+        break;
+      }
+
+      case s_header_field_start:
+      {
+        if (ch == CR) {
+          UPDATE_STATE(s_headers_almost_done);
+          break;
+        }
+
+        if (ch == LF) {
+          /* they might be just sending \n instead of \r\n so this would be
+           * the second \n to denote the end of headers*/
+          UPDATE_STATE(s_headers_almost_done);
+          REEXECUTE();
+        }
+
+        c = TOKEN(ch);
+
+        if (UNLIKELY(!c)) {
+          SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
+          goto error;
+        }
+
+        MARK(header_field);
+
+        parser->index = 0;
+        UPDATE_STATE(s_header_field);
+
+        switch (c) {
+          case 'c':
+            parser->header_state = h_C;
+            break;
+
+          case 'p':
+            parser->header_state = h_matching_proxy_connection;
+            break;
+
+          case 't':
+            parser->header_state = h_matching_transfer_encoding;
+            break;
+
+          case 'u':
+            parser->header_state = h_matching_upgrade;
+            break;
+
+          default:
+            parser->header_state = h_general;
+            break;
+        }
+        break;
+      }
+
+      case s_header_field:
+      {
+        const char* start = p;
+        for (; p != data + len; p++) {
+          ch = *p;
+          c = TOKEN(ch);
+
+          if (!c)
+            break;
+
+          switch (parser->header_state) {
+            case h_general: {
+              size_t limit = data + len - p;
+              limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
+              while (p+1 < data + limit && TOKEN(p[1])) {
+                p++;
+              }
+              break;
+            }
+
+            case h_C:
+              parser->index++;
+              parser->header_state = (c == 'o' ? h_CO : h_general);
+              break;
+
+            case h_CO:
+              parser->index++;
+              parser->header_state = (c == 'n' ? h_CON : h_general);
+              break;
+
+            case h_CON:
+              parser->index++;
+              switch (c) {
+                case 'n':
+                  parser->header_state = h_matching_connection;
+                  break;
+                case 't':
+                  parser->header_state = h_matching_content_length;
+                  break;
+                default:
+                  parser->header_state = h_general;
+                  break;
+              }
+              break;
+
+            /* connection */
+
+            case h_matching_connection:
+              parser->index++;
+              if (parser->index > sizeof(CONNECTION)-1
+                  || c != CONNECTION[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(CONNECTION)-2) {
+                parser->header_state = h_connection;
+              }
+              break;
+
+            /* proxy-connection */
+
+            case h_matching_proxy_connection:
+              parser->index++;
+              if (parser->index > sizeof(PROXY_CONNECTION)-1
+                  || c != PROXY_CONNECTION[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
+                parser->header_state = h_connection;
+              }
+              break;
+
+            /* content-length */
+
+            case h_matching_content_length:
+              parser->index++;
+              if (parser->index > sizeof(CONTENT_LENGTH)-1
+                  || c != CONTENT_LENGTH[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
+                parser->header_state = h_content_length;
+              }
+              break;
+
+            /* transfer-encoding */
+
+            case h_matching_transfer_encoding:
+              parser->index++;
+              if (parser->index > sizeof(TRANSFER_ENCODING)-1
+                  || c != TRANSFER_ENCODING[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
+                parser->header_state = h_transfer_encoding;
+              }
+              break;
+
+            /* upgrade */
+
+            case h_matching_upgrade:
+              parser->index++;
+              if (parser->index > sizeof(UPGRADE)-1
+                  || c != UPGRADE[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(UPGRADE)-2) {
+                parser->header_state = h_upgrade;
+              }
+              break;
+
+            case h_connection:
+            case h_content_length:
+            case h_transfer_encoding:
+            case h_upgrade:
+              if (ch != ' ') parser->header_state = h_general;
+              break;
+
+            default:
+              assert(0 && "Unknown header_state");
+              break;
+          }
+        }
+
+        if (p == data + len) {
+          --p;
+          COUNT_HEADER_SIZE(p - start);
+          break;
+        }
+
+        COUNT_HEADER_SIZE(p - start);
+
+        if (ch == ':') {
+          UPDATE_STATE(s_header_value_discard_ws);
+          CALLBACK_DATA(header_field);
+          break;
+        }
+
+        SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
+        goto error;
+      }
+
+      case s_header_value_discard_ws:
+        if (ch == ' ' || ch == '\t') break;
+
+        if (ch == CR) {
+          UPDATE_STATE(s_header_value_discard_ws_almost_done);
+          break;
+        }
+
+        if (ch == LF) {
+          UPDATE_STATE(s_header_value_discard_lws);
+          break;
+        }
+
+        /* fall through */
+
+      case s_header_value_start:
+      {
+        MARK(header_value);
+
+        UPDATE_STATE(s_header_value);
+        parser->index = 0;
+
+        c = LOWER(ch);
+
+        switch (parser->header_state) {
+          case h_upgrade:
+            parser->flags |= F_UPGRADE;
+            parser->header_state = h_general;
+            break;
+
+          case h_transfer_encoding:
+            /* looking for 'Transfer-Encoding: chunked' */
+            if ('c' == c) {
+              parser->header_state = h_matching_transfer_encoding_chunked;
+            } else {
+              parser->header_state = h_general;
+            }
+            break;
+
+          case h_content_length:
+            if (UNLIKELY(!IS_NUM(ch))) {
+              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
+              goto error;
+            }
+
+            if (parser->flags & F_CONTENTLENGTH) {
+              SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
+              goto error;
+            }
+
+            parser->flags |= F_CONTENTLENGTH;
+            parser->content_length = ch - '0';
+            parser->header_state = h_content_length_num;
+            break;
+
+          case h_connection:
+            /* looking for 'Connection: keep-alive' */
+            if (c == 'k') {
+              parser->header_state = h_matching_connection_keep_alive;
+            /* looking for 'Connection: close' */
+            } else if (c == 'c') {
+              parser->header_state = h_matching_connection_close;
+            } else if (c == 'u') {
+              parser->header_state = h_matching_connection_upgrade;
+            } else {
+              parser->header_state = h_matching_connection_token;
+            }
+            break;
+
+          /* Multi-value `Connection` header */
+          case h_matching_connection_token_start:
+            break;
+
+          default:
+            parser->header_state = h_general;
+            break;
+        }
+        break;
+      }
+
+      case s_header_value:
+      {
+        const char* start = p;
+        enum header_states h_state = (enum header_states) parser->header_state;
+        for (; p != data + len; p++) {
+          ch = *p;
+          if (ch == CR) {
+            UPDATE_STATE(s_header_almost_done);
+            parser->header_state = h_state;
+            CALLBACK_DATA(header_value);
+            break;
+          }
+
+          if (ch == LF) {
+            UPDATE_STATE(s_header_almost_done);
+            COUNT_HEADER_SIZE(p - start);
+            parser->header_state = h_state;
+            CALLBACK_DATA_NOADVANCE(header_value);
+            REEXECUTE();
+          }
+
+          if (!lenient && !IS_HEADER_CHAR(ch)) {
+            SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
+            goto error;
+          }
+
+          c = LOWER(ch);
+
+          switch (h_state) {
+            case h_general:
+            {
+              const char* p_cr;
+              const char* p_lf;
+              size_t limit = data + len - p;
+
+              limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
+
+              p_cr = (const char*) memchr(p, CR, limit);
+              p_lf = (const char*) memchr(p, LF, limit);
+              if (p_cr != NULL) {
+                if (p_lf != NULL && p_cr >= p_lf)
+                  p = p_lf;
+                else
+                  p = p_cr;
+              } else if (UNLIKELY(p_lf != NULL)) {
+                p = p_lf;
+              } else {
+                p = data + len;
+              }
+              --p;
+              break;
+            }
+
+            case h_connection:
+            case h_transfer_encoding:
+              assert(0 && "Shouldn't get here.");
+              break;
+
+            case h_content_length:
+              if (ch == ' ') break;
+              h_state = h_content_length_num;
+              /* fall through */
+
+            case h_content_length_num:
+            {
+              uint64_t t;
+
+              if (ch == ' ') {
+                h_state = h_content_length_ws;
+                break;
+              }
+
+              if (UNLIKELY(!IS_NUM(ch))) {
+                SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
+                parser->header_state = h_state;
+                goto error;
+              }
+
+              t = parser->content_length;
+              t *= 10;
+              t += ch - '0';
+
+              /* Overflow? Test against a conservative limit for simplicity. */
+              if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
+                SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
+                parser->header_state = h_state;
+                goto error;
+              }
+
+              parser->content_length = t;
+              break;
+            }
+
+            case h_content_length_ws:
+              if (ch == ' ') break;
+              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
+              parser->header_state = h_state;
+              goto error;
+
+            /* Transfer-Encoding: chunked */
+            case h_matching_transfer_encoding_chunked:
+              parser->index++;
+              if (parser->index > sizeof(CHUNKED)-1
+                  || c != CHUNKED[parser->index]) {
+                h_state = h_general;
+              } else if (parser->index == sizeof(CHUNKED)-2) {
+                h_state = h_transfer_encoding_chunked;
+              }
+              break;
+
+            case h_matching_connection_token_start:
+              /* looking for 'Connection: keep-alive' */
+              if (c == 'k') {
+                h_state = h_matching_connection_keep_alive;
+              /* looking for 'Connection: close' */
+              } else if (c == 'c') {
+                h_state = h_matching_connection_close;
+              } else if (c == 'u') {
+                h_state = h_matching_connection_upgrade;
+              } else if (STRICT_TOKEN(c)) {
+                h_state = h_matching_connection_token;
+              } else if (c == ' ' || c == '\t') {
+                /* Skip lws */
+              } else {
+                h_state = h_general;
+              }
+              break;
+
+            /* looking for 'Connection: keep-alive' */
+            case h_matching_connection_keep_alive:
+              parser->index++;
+              if (parser->index > sizeof(KEEP_ALIVE)-1
+                  || c != KEEP_ALIVE[parser->index]) {
+                h_state = h_matching_connection_token;
+              } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
+                h_state = h_connection_keep_alive;
+              }
+              break;
+
+            /* looking for 'Connection: close' */
+            case h_matching_connection_close:
+              parser->index++;
+              if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
+                h_state = h_matching_connection_token;
+              } else if (parser->index == sizeof(CLOSE)-2) {
+                h_state = h_connection_close;
+              }
+              break;
+
+            /* looking for 'Connection: upgrade' */
+            case h_matching_connection_upgrade:
+              parser->index++;
+              if (parser->index > sizeof(UPGRADE) - 1 ||
+                  c != UPGRADE[parser->index]) {
+                h_state = h_matching_connection_token;
+              } else if (parser->index == sizeof(UPGRADE)-2) {
+                h_state = h_connection_upgrade;
+              }
+              break;
+
+            case h_matching_connection_token:
+              if (ch == ',') {
+                h_state = h_matching_connection_token_start;
+                parser->index = 0;
+              }
+              break;
+
+            case h_transfer_encoding_chunked:
+              if (ch != ' ') h_state = h_general;
+              break;
+
+            case h_connection_keep_alive:
+            case h_connection_close:
+            case h_connection_upgrade:
+              if (ch == ',') {
+                if (h_state == h_connection_keep_alive) {
+                  parser->flags |= F_CONNECTION_KEEP_ALIVE;
+                } else if (h_state == h_connection_close) {
+                  parser->flags |= F_CONNECTION_CLOSE;
+                } else if (h_state == h_connection_upgrade) {
+                  parser->flags |= F_CONNECTION_UPGRADE;
+                }
+                h_state = h_matching_connection_token_start;
+                parser->index = 0;
+              } else if (ch != ' ') {
+                h_state = h_matching_connection_token;
+              }
+              break;
+
+            default:
+              UPDATE_STATE(s_header_value);
+              h_state = h_general;
+              break;
+          }
+        }
+        parser->header_state = h_state;
+
+        if (p == data + len)
+          --p;
+
+        COUNT_HEADER_SIZE(p - start);
+        break;
+      }
+
+      case s_header_almost_done:
+      {
+        if (UNLIKELY(ch != LF)) {
+          SET_ERRNO(HPE_LF_EXPECTED);
+          goto error;
+        }
+
+        UPDATE_STATE(s_header_value_lws);
+        break;
+      }
+
+      case s_header_value_lws:
+      {
+        if (ch == ' ' || ch == '\t') {
+          UPDATE_STATE(s_header_value_start);
+          REEXECUTE();
+        }
+
+        /* finished the header */
+        switch (parser->header_state) {
+          case h_connection_keep_alive:
+            parser->flags |= F_CONNECTION_KEEP_ALIVE;
+            break;
+          case h_connection_close:
+            parser->flags |= F_CONNECTION_CLOSE;
+            break;
+          case h_transfer_encoding_chunked:
+            parser->flags |= F_CHUNKED;
+            break;
+          case h_connection_upgrade:
+            parser->flags |= F_CONNECTION_UPGRADE;
+            break;
+          default:
+            break;
+        }
+
+        UPDATE_STATE(s_header_field_start);
+        REEXECUTE();
+      }
+
+      case s_header_value_discard_ws_almost_done:
+      {
+        STRICT_CHECK(ch != LF);
+        UPDATE_STATE(s_header_value_discard_lws);
+        break;
+      }
+
+      case s_header_value_discard_lws:
+      {
+        if (ch == ' ' || ch == '\t') {
+          UPDATE_STATE(s_header_value_discard_ws);
+          break;
+        } else {
+          switch (parser->header_state) {
+            case h_connection_keep_alive:
+              parser->flags |= F_CONNECTION_KEEP_ALIVE;
+              break;
+            case h_connection_close:
+              parser->flags |= F_CONNECTION_CLOSE;
+              break;
+            case h_connection_upgrade:
+              parser->flags |= F_CONNECTION_UPGRADE;
+              break;
+            case h_transfer_encoding_chunked:
+              parser->flags |= F_CHUNKED;
+              break;
+            default:
+              break;
+          }
+
+          /* header value was empty */
+          MARK(header_value);
+          UPDATE_STATE(s_header_field_start);
+          CALLBACK_DATA_NOADVANCE(header_value);
+          REEXECUTE();
+        }
+      }
+
+      case s_headers_almost_done:
+      {
+        STRICT_CHECK(ch != LF);
+
+        if (parser->flags & F_TRAILING) {
+          /* End of a chunked request */
+          UPDATE_STATE(s_message_done);
+          CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
+          REEXECUTE();
+        }
+
+        /* Cannot use chunked encoding and a content-length header together
+           per the HTTP specification. */
+        if ((parser->flags & F_CHUNKED) &&
+            (parser->flags & F_CONTENTLENGTH)) {
+          SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
+          goto error;
+        }
+
+        UPDATE_STATE(s_headers_done);
+
+        /* Set this here so that on_headers_complete() callbacks can see it */
+        if ((parser->flags & F_UPGRADE) &&
+            (parser->flags & F_CONNECTION_UPGRADE)) {
+          /* For responses, "Upgrade: foo" and "Connection: upgrade" are
+           * mandatory only when it is a 101 Switching Protocols response,
+           * otherwise it is purely informational, to announce support.
+           */
+          parser->upgrade =
+              (parser->type == HTTP_REQUEST || parser->status_code == 101);
+        } else {
+          parser->upgrade = (parser->method == HTTP_CONNECT);
+        }
+
+        /* Here we call the headers_complete callback. This is somewhat
+         * different than other callbacks because if the user returns 1, we
+         * will interpret that as saying that this message has no body. This
+         * is needed for the annoying case of recieving a response to a HEAD
+         * request.
+         *
+         * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
+         * we have to simulate it by handling a change in errno below.
+         */
+        if (settings->on_headers_complete) {
+          switch (settings->on_headers_complete(parser)) {
+            case 0:
+              break;
+
+            case 2:
+              parser->upgrade = 1;
+
+              /* fall through */
+            case 1:
+              parser->flags |= F_SKIPBODY;
+              break;
+
+            default:
+              SET_ERRNO(HPE_CB_headers_complete);
+              RETURN(p - data); /* Error */
+          }
+        }
+
+        if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
+          RETURN(p - data);
+        }
+
+        REEXECUTE();
+      }
+
+      case s_headers_done:
+      {
+        int hasBody;
+        STRICT_CHECK(ch != LF);
+
+        parser->nread = 0;
+        nread = 0;
+
+        hasBody = parser->flags & F_CHUNKED ||
+          (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
+        if (parser->upgrade && (parser->method == HTTP_CONNECT ||
+                                (parser->flags & F_SKIPBODY) || !hasBody)) {
+          /* Exit, the rest of the message is in a different protocol. */
+          UPDATE_STATE(NEW_MESSAGE());
+          CALLBACK_NOTIFY(message_complete);
+          RETURN((p - data) + 1);
+        }
+
+        if (parser->flags & F_SKIPBODY) {
+          UPDATE_STATE(NEW_MESSAGE());
+          CALLBACK_NOTIFY(message_complete);
+        } else if (parser->flags & F_CHUNKED) {
+          /* chunked encoding - ignore Content-Length header */
+          UPDATE_STATE(s_chunk_size_start);
+        } else {
+          if (parser->content_length == 0) {
+            /* Content-Length header given but zero: Content-Length: 0\r\n */
+            UPDATE_STATE(NEW_MESSAGE());
+            CALLBACK_NOTIFY(message_complete);
+          } else if (parser->content_length != ULLONG_MAX) {
+            /* Content-Length header given and non-zero */
+            UPDATE_STATE(s_body_identity);
+          } else {
+            if (!http_message_needs_eof(parser)) {
+              /* Assume content-length 0 - read the next */
+              UPDATE_STATE(NEW_MESSAGE());
+              CALLBACK_NOTIFY(message_complete);
+            } else {
+              /* Read body until EOF */
+              UPDATE_STATE(s_body_identity_eof);
+            }
+          }
+        }
+
+        break;
+      }
+
+      case s_body_identity:
+      {
+        uint64_t to_read = MIN(parser->content_length,
+                               (uint64_t) ((data + len) - p));
+
+        assert(parser->content_length != 0
+            && parser->content_length != ULLONG_MAX);
+
+        /* The difference between advancing content_length and p is because
+         * the latter will automaticaly advance on the next loop iteration.
+         * Further, if content_length ends up at 0, we want to see the last
+         * byte again for our message complete callback.
+         */
+        MARK(body);
+        parser->content_length -= to_read;
+        p += to_read - 1;
+
+        if (parser->content_length == 0) {
+          UPDATE_STATE(s_message_done);
+
+          /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
+           *
+           * The alternative to doing this is to wait for the next byte to
+           * trigger the data callback, just as in every other case. The
+           * problem with this is that this makes it difficult for the test
+           * harness to distinguish between complete-on-EOF and
+           * complete-on-length. It's not clear that this distinction is
+           * important for applications, but let's keep it for now.
+           */
+          CALLBACK_DATA_(body, p - body_mark + 1, p - data);
+          REEXECUTE();
+        }
+
+        break;
+      }
+
+      /* read until EOF */
+      case s_body_identity_eof:
+        MARK(body);
+        p = data + len - 1;
+
+        break;
+
+      case s_message_done:
+        UPDATE_STATE(NEW_MESSAGE());
+        CALLBACK_NOTIFY(message_complete);
+        if (parser->upgrade) {
+          /* Exit, the rest of the message is in a different protocol. */
+          RETURN((p - data) + 1);
+        }
+        break;
+
+      case s_chunk_size_start:
+      {
+        assert(nread == 1);
+        assert(parser->flags & F_CHUNKED);
+
+        unhex_val = unhex[(unsigned char)ch];
+        if (UNLIKELY(unhex_val == -1)) {
+          SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
+          goto error;
+        }
+
+        parser->content_length = unhex_val;
+        UPDATE_STATE(s_chunk_size);
+        break;
+      }
+
+      case s_chunk_size:
+      {
+        uint64_t t;
+
+        assert(parser->flags & F_CHUNKED);
+
+        if (ch == CR) {
+          UPDATE_STATE(s_chunk_size_almost_done);
+          break;
+        }
+
+        unhex_val = unhex[(unsigned char)ch];
+
+        if (unhex_val == -1) {
+          if (ch == ';' || ch == ' ') {
+            UPDATE_STATE(s_chunk_parameters);
+            break;
+          }
+
+          SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
+          goto error;
+        }
+
+        t = parser->content_length;
+        t *= 16;
+        t += unhex_val;
+
+        /* Overflow? Test against a conservative limit for simplicity. */
+        if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
+          SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
+          goto error;
+        }
+
+        parser->content_length = t;
+        break;
+      }
+
+      case s_chunk_parameters:
+      {
+        assert(parser->flags & F_CHUNKED);
+        /* just ignore this shit. TODO check for overflow */
+        if (ch == CR) {
+          UPDATE_STATE(s_chunk_size_almost_done);
+          break;
+        }
+        break;
+      }
+
+      case s_chunk_size_almost_done:
+      {
+        assert(parser->flags & F_CHUNKED);
+        STRICT_CHECK(ch != LF);
+
+        parser->nread = 0;
+        nread = 0;
+
+        if (parser->content_length == 0) {
+          parser->flags |= F_TRAILING;
+          UPDATE_STATE(s_header_field_start);
+        } else {
+          UPDATE_STATE(s_chunk_data);
+        }
+        CALLBACK_NOTIFY(chunk_header);
+        break;
+      }
+
+      case s_chunk_data:
+      {
+        uint64_t to_read = MIN(parser->content_length,
+                               (uint64_t) ((data + len) - p));
+
+        assert(parser->flags & F_CHUNKED);
+        assert(parser->content_length != 0
+            && parser->content_length != ULLONG_MAX);
+
+        /* See the explanation in s_body_identity for why the content
+         * length and data pointers are managed this way.
+         */
+        MARK(body);
+        parser->content_length -= to_read;
+        p += to_read - 1;
+
+        if (parser->content_length == 0) {
+          UPDATE_STATE(s_chunk_data_almost_done);
+        }
+
+        break;
+      }
+
+      case s_chunk_data_almost_done:
+        assert(parser->flags & F_CHUNKED);
+        assert(parser->content_length == 0);
+        STRICT_CHECK(ch != CR);
+        UPDATE_STATE(s_chunk_data_done);
+        CALLBACK_DATA(body);
+        break;
+
+      case s_chunk_data_done:
+        assert(parser->flags & F_CHUNKED);
+        STRICT_CHECK(ch != LF);
+        parser->nread = 0;
+        nread = 0;
+        UPDATE_STATE(s_chunk_size_start);
+        CALLBACK_NOTIFY(chunk_complete);
+        break;
+
+      default:
+        assert(0 && "unhandled state");
+        SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
+        goto error;
+    }
+  }
+
+  /* Run callbacks for any marks that we have leftover after we ran out of
+   * bytes. There should be at most one of these set, so it's OK to invoke
+   * them in series (unset marks will not result in callbacks).
+   *
+   * We use the NOADVANCE() variety of callbacks here because 'p' has already
+   * overflowed 'data' and this allows us to correct for the off-by-one that
+   * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
+   * value that's in-bounds).
+   */
+
+  assert(((header_field_mark ? 1 : 0) +
+          (header_value_mark ? 1 : 0) +
+          (url_mark ? 1 : 0)  +
+          (body_mark ? 1 : 0) +
+          (status_mark ? 1 : 0)) <= 1);
+
+  CALLBACK_DATA_NOADVANCE(header_field);
+  CALLBACK_DATA_NOADVANCE(header_value);
+  CALLBACK_DATA_NOADVANCE(url);
+  CALLBACK_DATA_NOADVANCE(body);
+  CALLBACK_DATA_NOADVANCE(status);
+
+  RETURN(len);
+
+error:
+  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
+    SET_ERRNO(HPE_UNKNOWN);
+  }
+
+  RETURN(p - data);
+}
+
+
+/* Does the parser need to see an EOF to find the end of the message? */
+int
+http_message_needs_eof (const http_parser *parser)
+{
+  if (parser->type == HTTP_REQUEST) {
+    return 0;
+  }
+
+  /* See RFC 2616 section 4.4 */
+  if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
+      parser->status_code == 204 ||     /* No Content */
+      parser->status_code == 304 ||     /* Not Modified */
+      parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
+    return 0;
+  }
+
+  if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
+    return 0;
+  }
+
+  return 1;
+}
+
+
+int
+http_should_keep_alive (const http_parser *parser)
+{
+  if (parser->http_major > 0 && parser->http_minor > 0) {
+    /* HTTP/1.1 */
+    if (parser->flags & F_CONNECTION_CLOSE) {
+      return 0;
+    }
+  } else {
+    /* HTTP/1.0 or earlier */
+    if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
+      return 0;
+    }
+  }
+
+  return !http_message_needs_eof(parser);
+}
+
+
+const char *
+http_method_str (enum http_method m)
+{
+  return ELEM_AT(method_strings, m, "<unknown>");
+}
+
+const char *
+http_status_str (enum http_status s)
+{
+  switch (s) {
+#define XX(num, name, string) case HTTP_STATUS_##name: return #string;
+    HTTP_STATUS_MAP(XX)
+#undef XX
+    default: return "<unknown>";
+  }
+}
+
+void
+http_parser_init (http_parser *parser, enum http_parser_type t)
+{
+  void *data = parser->data; /* preserve application data */
+  memset(parser, 0, sizeof(*parser));
+  parser->data = data;
+  parser->type = t;
+  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
+  parser->http_errno = HPE_OK;
+}
+
+void
+http_parser_settings_init(http_parser_settings *settings)
+{
+  memset(settings, 0, sizeof(*settings));
+}
+
+const char *
+http_errno_name(enum http_errno err) {
+  assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
+  return http_strerror_tab[err].name;
+}
+
+const char *
+http_errno_description(enum http_errno err) {
+  assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
+  return http_strerror_tab[err].description;
+}
+
+static enum http_host_state
+http_parse_host_char(enum http_host_state s, const char ch) {
+  switch(s) {
+    case s_http_userinfo:
+    case s_http_userinfo_start:
+      if (ch == '@') {
+        return s_http_host_start;
+      }
+
+      if (IS_USERINFO_CHAR(ch)) {
+        return s_http_userinfo;
+      }
+      break;
+
+    case s_http_host_start:
+      if (ch == '[') {
+        return s_http_host_v6_start;
+      }
+
+      if (IS_HOST_CHAR(ch)) {
+        return s_http_host;
+      }
+
+      break;
+
+    case s_http_host:
+      if (IS_HOST_CHAR(ch)) {
+        return s_http_host;
+      }
+
+    /* fall through */
+    case s_http_host_v6_end:
+      if (ch == ':') {
+        return s_http_host_port_start;
+      }
+
+      break;
+
+    case s_http_host_v6:
+      if (ch == ']') {
+        return s_http_host_v6_end;
+      }
+
+    /* fall through */
+    case s_http_host_v6_start:
+      if (IS_HEX(ch) || ch == ':' || ch == '.') {
+        return s_http_host_v6;
+      }
+
+      if (s == s_http_host_v6 && ch == '%') {
+        return s_http_host_v6_zone_start;
+      }
+      break;
+
+    case s_http_host_v6_zone:
+      if (ch == ']') {
+        return s_http_host_v6_end;
+      }
+
+    /* fall through */
+    case s_http_host_v6_zone_start:
+      /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
+      if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
+          ch == '~') {
+        return s_http_host_v6_zone;
+      }
+      break;
+
+    case s_http_host_port:
+    case s_http_host_port_start:
+      if (IS_NUM(ch)) {
+        return s_http_host_port;
+      }
+
+      break;
+
+    default:
+      break;
+  }
+  return s_http_host_dead;
+}
+
+static int
+http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
+  enum http_host_state s;
+
+  const char *p;
+  size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
+
+  assert(u->field_set & (1 << UF_HOST));
+
+  u->field_data[UF_HOST].len = 0;
+
+  s = found_at ? s_http_userinfo_start : s_http_host_start;
+
+  for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
+    enum http_host_state new_s = http_parse_host_char(s, *p);
+
+    if (new_s == s_http_host_dead) {
+      return 1;
+    }
+
+    switch(new_s) {
+      case s_http_host:
+        if (s != s_http_host) {
+          u->field_data[UF_HOST].off = p - buf;
+        }
+        u->field_data[UF_HOST].len++;
+        break;
+
+      case s_http_host_v6:
+        if (s != s_http_host_v6) {
+          u->field_data[UF_HOST].off = p - buf;
+        }
+        u->field_data[UF_HOST].len++;
+        break;
+
+      case s_http_host_v6_zone_start:
+      case s_http_host_v6_zone:
+        u->field_data[UF_HOST].len++;
+        break;
+
+      case s_http_host_port:
+        if (s != s_http_host_port) {
+          u->field_data[UF_PORT].off = p - buf;
+          u->field_data[UF_PORT].len = 0;
+          u->field_set |= (1 << UF_PORT);
+        }
+        u->field_data[UF_PORT].len++;
+        break;
+
+      case s_http_userinfo:
+        if (s != s_http_userinfo) {
+          u->field_data[UF_USERINFO].off = p - buf ;
+          u->field_data[UF_USERINFO].len = 0;
+          u->field_set |= (1 << UF_USERINFO);
+        }
+        u->field_data[UF_USERINFO].len++;
+        break;
+
+      default:
+        break;
+    }
+    s = new_s;
+  }
+
+  /* Make sure we don't end somewhere unexpected */
+  switch (s) {
+    case s_http_host_start:
+    case s_http_host_v6_start:
+    case s_http_host_v6:
+    case s_http_host_v6_zone_start:
+    case s_http_host_v6_zone:
+    case s_http_host_port_start:
+    case s_http_userinfo:
+    case s_http_userinfo_start:
+      return 1;
+    default:
+      break;
+  }
+
+  return 0;
+}
+
+void
+http_parser_url_init(struct http_parser_url *u) {
+  memset(u, 0, sizeof(*u));
+}
+
+int
+http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
+                      struct http_parser_url *u)
+{
+  enum state s;
+  const char *p;
+  enum http_parser_url_fields uf, old_uf;
+  int found_at = 0;
+
+  if (buflen == 0) {
+    return 1;
+  }
+
+  u->port = u->field_set = 0;
+  s = is_connect ? s_req_server_start : s_req_spaces_before_url;
+  old_uf = UF_MAX;
+
+  for (p = buf; p < buf + buflen; p++) {
+    s = parse_url_char(s, *p);
+
+    /* Figure out the next field that we're operating on */
+    switch (s) {
+      case s_dead:
+        return 1;
+
+      /* Skip delimeters */
+      case s_req_schema_slash:
+      case s_req_schema_slash_slash:
+      case s_req_server_start:
+      case s_req_query_string_start:
+      case s_req_fragment_start:
+        continue;
+
+      case s_req_schema:
+        uf = UF_SCHEMA;
+        break;
+
+      case s_req_server_with_at:
+        found_at = 1;
+
+      /* fall through */
+      case s_req_server:
+        uf = UF_HOST;
+        break;
+
+      case s_req_path:
+        uf = UF_PATH;
+        break;
+
+      case s_req_query_string:
+        uf = UF_QUERY;
+        break;
+
+      case s_req_fragment:
+        uf = UF_FRAGMENT;
+        break;
+
+      default:
+        assert(!"Unexpected state");
+        return 1;
+    }
+
+    /* Nothing's changed; soldier on */
+    if (uf == old_uf) {
+      u->field_data[uf].len++;
+      continue;
+    }
+
+    u->field_data[uf].off = p - buf;
+    u->field_data[uf].len = 1;
+
+    u->field_set |= (1 << uf);
+    old_uf = uf;
+  }
+
+  /* host must be present if there is a schema */
+  /* parsing http:///toto will fail */
+  if ((u->field_set & (1 << UF_SCHEMA)) &&
+      (u->field_set & (1 << UF_HOST)) == 0) {
+    return 1;
+  }
+
+  if (u->field_set & (1 << UF_HOST)) {
+    if (http_parse_host(buf, u, found_at) != 0) {
+      return 1;
+    }
+  }
+
+  /* CONNECT requests can only contain "hostname:port" */
+  if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
+    return 1;
+  }
+
+  if (u->field_set & (1 << UF_PORT)) {
+    uint16_t off;
+    uint16_t len;
+    const char* p;
+    const char* end;
+    unsigned long v;
+
+    off = u->field_data[UF_PORT].off;
+    len = u->field_data[UF_PORT].len;
+    end = buf + off + len;
+
+    /* NOTE: The characters are already validated and are in the [0-9] range */
+    assert(off + len <= buflen && "Port number overflow");
+    v = 0;
+    for (p = buf + off; p < end; p++) {
+      v *= 10;
+      v += *p - '0';
+
+      /* Ports have a max value of 2^16 */
+      if (v > 0xffff) {
+        return 1;
+      }
+    }
+
+    u->port = (uint16_t) v;
+  }
+
+  return 0;
+}
+
+void
+http_parser_pause(http_parser *parser, int paused) {
+  /* Users should only be pausing/unpausing a parser that is not in an error
+   * state. In non-debug builds, there's not much that we can do about this
+   * other than ignore it.
+   */
+  if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
+      HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
+    uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
+    SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
+  } else {
+    assert(0 && "Attempting to pause parser in error state");
+  }
+}
+
+int
+http_body_is_final(const struct http_parser *parser) {
+    return parser->state == s_message_done;
+}
+
+unsigned long
+http_parser_version(void) {
+  return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
+         HTTP_PARSER_VERSION_MINOR * 0x00100 |
+         HTTP_PARSER_VERSION_PATCH * 0x00001;
+}
diff --git a/src/net/strategies/http_parser/http_parser.h b/src/net/strategies/http_parser/http_parser.h
new file mode 100755
index 00000000..e894d7ce
--- /dev/null
+++ b/src/net/strategies/http_parser/http_parser.h
@@ -0,0 +1,436 @@
+/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef http_parser_h
+#define http_parser_h
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Also update SONAME in the Makefile whenever you change these. */
+#define HTTP_PARSER_VERSION_MAJOR 2
+#define HTTP_PARSER_VERSION_MINOR 8
+#define HTTP_PARSER_VERSION_PATCH 1
+
+#include <stddef.h>
+#if defined(_WIN32) && !defined(__MINGW32__) && \
+  (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__)
+#include <BaseTsd.h>
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
+ * faster
+ */
+#ifndef HTTP_PARSER_STRICT
+# define HTTP_PARSER_STRICT 1
+#endif
+
+/* Maximium header size allowed. If the macro is not defined
+ * before including this header then the default is used. To
+ * change the maximum header size, define the macro in the build
+ * environment (e.g. -DHTTP_MAX_HEADER_SIZE=<value>). To remove
+ * the effective limit on the size of the header, define the macro
+ * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff)
+ */
+#ifndef HTTP_MAX_HEADER_SIZE
+# define HTTP_MAX_HEADER_SIZE (80*1024)
+#endif
+
+typedef struct http_parser http_parser;
+typedef struct http_parser_settings http_parser_settings;
+
+
+/* Callbacks should return non-zero to indicate an error. The parser will
+ * then halt execution.
+ *
+ * The one exception is on_headers_complete. In a HTTP_RESPONSE parser
+ * returning '1' from on_headers_complete will tell the parser that it
+ * should not expect a body. This is used when receiving a response to a
+ * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
+ * chunked' headers that indicate the presence of a body.
+ *
+ * Returning `2` from on_headers_complete will tell parser that it should not
+ * expect neither a body nor any futher responses on this connection. This is
+ * useful for handling responses to a CONNECT request which may not contain
+ * `Upgrade` or `Connection: upgrade` headers.
+ *
+ * http_data_cb does not return data chunks. It will be called arbitrarily
+ * many times for each string. E.G. you might get 10 callbacks for "on_url"
+ * each providing just a few characters more data.
+ */
+typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
+typedef int (*http_cb) (http_parser*);
+
+
+/* Status Codes */
+#define HTTP_STATUS_MAP(XX)                                                 \
+  XX(100, CONTINUE,                        Continue)                        \
+  XX(101, SWITCHING_PROTOCOLS,             Switching Protocols)             \
+  XX(102, PROCESSING,                      Processing)                      \
+  XX(200, OK,                              OK)                              \
+  XX(201, CREATED,                         Created)                         \
+  XX(202, ACCEPTED,                        Accepted)                        \
+  XX(203, NON_AUTHORITATIVE_INFORMATION,   Non-Authoritative Information)   \
+  XX(204, NO_CONTENT,                      No Content)                      \
+  XX(205, RESET_CONTENT,                   Reset Content)                   \
+  XX(206, PARTIAL_CONTENT,                 Partial Content)                 \
+  XX(207, MULTI_STATUS,                    Multi-Status)                    \
+  XX(208, ALREADY_REPORTED,                Already Reported)                \
+  XX(226, IM_USED,                         IM Used)                         \
+  XX(300, MULTIPLE_CHOICES,                Multiple Choices)                \
+  XX(301, MOVED_PERMANENTLY,               Moved Permanently)               \
+  XX(302, FOUND,                           Found)                           \
+  XX(303, SEE_OTHER,                       See Other)                       \
+  XX(304, NOT_MODIFIED,                    Not Modified)                    \
+  XX(305, USE_PROXY,                       Use Proxy)                       \
+  XX(307, TEMPORARY_REDIRECT,              Temporary Redirect)              \
+  XX(308, PERMANENT_REDIRECT,              Permanent Redirect)              \
+  XX(400, BAD_REQUEST,                     Bad Request)                     \
+  XX(401, UNAUTHORIZED,                    Unauthorized)                    \
+  XX(402, PAYMENT_REQUIRED,                Payment Required)                \
+  XX(403, FORBIDDEN,                       Forbidden)                       \
+  XX(404, NOT_FOUND,                       Not Found)                       \
+  XX(405, METHOD_NOT_ALLOWED,              Method Not Allowed)              \
+  XX(406, NOT_ACCEPTABLE,                  Not Acceptable)                  \
+  XX(407, PROXY_AUTHENTICATION_REQUIRED,   Proxy Authentication Required)   \
+  XX(408, REQUEST_TIMEOUT,                 Request Timeout)                 \
+  XX(409, CONFLICT,                        Conflict)                        \
+  XX(410, GONE,                            Gone)                            \
+  XX(411, LENGTH_REQUIRED,                 Length Required)                 \
+  XX(412, PRECONDITION_FAILED,             Precondition Failed)             \
+  XX(413, PAYLOAD_TOO_LARGE,               Payload Too Large)               \
+  XX(414, URI_TOO_LONG,                    URI Too Long)                    \
+  XX(415, UNSUPPORTED_MEDIA_TYPE,          Unsupported Media Type)          \
+  XX(416, RANGE_NOT_SATISFIABLE,           Range Not Satisfiable)           \
+  XX(417, EXPECTATION_FAILED,              Expectation Failed)              \
+  XX(421, MISDIRECTED_REQUEST,             Misdirected Request)             \
+  XX(422, UNPROCESSABLE_ENTITY,            Unprocessable Entity)            \
+  XX(423, LOCKED,                          Locked)                          \
+  XX(424, FAILED_DEPENDENCY,               Failed Dependency)               \
+  XX(426, UPGRADE_REQUIRED,                Upgrade Required)                \
+  XX(428, PRECONDITION_REQUIRED,           Precondition Required)           \
+  XX(429, TOO_MANY_REQUESTS,               Too Many Requests)               \
+  XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large) \
+  XX(451, UNAVAILABLE_FOR_LEGAL_REASONS,   Unavailable For Legal Reasons)   \
+  XX(500, INTERNAL_SERVER_ERROR,           Internal Server Error)           \
+  XX(501, NOT_IMPLEMENTED,                 Not Implemented)                 \
+  XX(502, BAD_GATEWAY,                     Bad Gateway)                     \
+  XX(503, SERVICE_UNAVAILABLE,             Service Unavailable)             \
+  XX(504, GATEWAY_TIMEOUT,                 Gateway Timeout)                 \
+  XX(505, HTTP_VERSION_NOT_SUPPORTED,      HTTP Version Not Supported)      \
+  XX(506, VARIANT_ALSO_NEGOTIATES,         Variant Also Negotiates)         \
+  XX(507, INSUFFICIENT_STORAGE,            Insufficient Storage)            \
+  XX(508, LOOP_DETECTED,                   Loop Detected)                   \
+  XX(510, NOT_EXTENDED,                    Not Extended)                    \
+  XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required) \
+
+enum http_status
+  {
+#define XX(num, name, string) HTTP_STATUS_##name = num,
+  HTTP_STATUS_MAP(XX)
+#undef XX
+  };
+
+
+/* Request Methods */
+#define HTTP_METHOD_MAP(XX)         \
+  XX(0,  DELETE,      DELETE)       \
+  XX(1,  GET,         GET)          \
+  XX(2,  HEAD,        HEAD)         \
+  XX(3,  POST,        POST)         \
+  XX(4,  PUT,         PUT)          \
+  /* pathological */                \
+  XX(5,  CONNECT,     CONNECT)      \
+  XX(6,  OPTIONS,     OPTIONS)      \
+  XX(7,  TRACE,       TRACE)        \
+  /* WebDAV */                      \
+  XX(8,  COPY,        COPY)         \
+  XX(9,  LOCK,        LOCK)         \
+  XX(10, MKCOL,       MKCOL)        \
+  XX(11, MOVE,        MOVE)         \
+  XX(12, PROPFIND,    PROPFIND)     \
+  XX(13, PROPPATCH,   PROPPATCH)    \
+  XX(14, SEARCH,      SEARCH)       \
+  XX(15, UNLOCK,      UNLOCK)       \
+  XX(16, BIND,        BIND)         \
+  XX(17, REBIND,      REBIND)       \
+  XX(18, UNBIND,      UNBIND)       \
+  XX(19, ACL,         ACL)          \
+  /* subversion */                  \
+  XX(20, REPORT,      REPORT)       \
+  XX(21, MKACTIVITY,  MKACTIVITY)   \
+  XX(22, CHECKOUT,    CHECKOUT)     \
+  XX(23, MERGE,       MERGE)        \
+  /* upnp */                        \
+  XX(24, MSEARCH,     M-SEARCH)     \
+  XX(25, NOTIFY,      NOTIFY)       \
+  XX(26, SUBSCRIBE,   SUBSCRIBE)    \
+  XX(27, UNSUBSCRIBE, UNSUBSCRIBE)  \
+  /* RFC-5789 */                    \
+  XX(28, PATCH,       PATCH)        \
+  XX(29, PURGE,       PURGE)        \
+  /* CalDAV */                      \
+  XX(30, MKCALENDAR,  MKCALENDAR)   \
+  /* RFC-2068, section 19.6.1.2 */  \
+  XX(31, LINK,        LINK)         \
+  XX(32, UNLINK,      UNLINK)       \
+  /* icecast */                     \
+  XX(33, SOURCE,      SOURCE)       \
+
+enum http_method
+  {
+#define XX(num, name, string) HTTP_##name = num,
+  HTTP_METHOD_MAP(XX)
+#undef XX
+  };
+
+
+enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
+
+
+/* Flag values for http_parser.flags field */
+enum flags
+  { F_CHUNKED               = 1 << 0
+  , F_CONNECTION_KEEP_ALIVE = 1 << 1
+  , F_CONNECTION_CLOSE      = 1 << 2
+  , F_CONNECTION_UPGRADE    = 1 << 3
+  , F_TRAILING              = 1 << 4
+  , F_UPGRADE               = 1 << 5
+  , F_SKIPBODY              = 1 << 6
+  , F_CONTENTLENGTH         = 1 << 7
+  };
+
+
+/* Map for errno-related constants
+ *
+ * The provided argument should be a macro that takes 2 arguments.
+ */
+#define HTTP_ERRNO_MAP(XX)                                           \
+  /* No error */                                                     \
+  XX(OK, "success")                                                  \
+                                                                     \
+  /* Callback-related errors */                                      \
+  XX(CB_message_begin, "the on_message_begin callback failed")       \
+  XX(CB_url, "the on_url callback failed")                           \
+  XX(CB_header_field, "the on_header_field callback failed")         \
+  XX(CB_header_value, "the on_header_value callback failed")         \
+  XX(CB_headers_complete, "the on_headers_complete callback failed") \
+  XX(CB_body, "the on_body callback failed")                         \
+  XX(CB_message_complete, "the on_message_complete callback failed") \
+  XX(CB_status, "the on_status callback failed")                     \
+  XX(CB_chunk_header, "the on_chunk_header callback failed")         \
+  XX(CB_chunk_complete, "the on_chunk_complete callback failed")     \
+                                                                     \
+  /* Parsing-related errors */                                       \
+  XX(INVALID_EOF_STATE, "stream ended at an unexpected time")        \
+  XX(HEADER_OVERFLOW,                                                \
+     "too many header bytes seen; overflow detected")                \
+  XX(CLOSED_CONNECTION,                                              \
+     "data received after completed connection: close message")      \
+  XX(INVALID_VERSION, "invalid HTTP version")                        \
+  XX(INVALID_STATUS, "invalid HTTP status code")                     \
+  XX(INVALID_METHOD, "invalid HTTP method")                          \
+  XX(INVALID_URL, "invalid URL")                                     \
+  XX(INVALID_HOST, "invalid host")                                   \
+  XX(INVALID_PORT, "invalid port")                                   \
+  XX(INVALID_PATH, "invalid path")                                   \
+  XX(INVALID_QUERY_STRING, "invalid query string")                   \
+  XX(INVALID_FRAGMENT, "invalid fragment")                           \
+  XX(LF_EXPECTED, "LF character expected")                           \
+  XX(INVALID_HEADER_TOKEN, "invalid character in header")            \
+  XX(INVALID_CONTENT_LENGTH,                                         \
+     "invalid character in content-length header")                   \
+  XX(UNEXPECTED_CONTENT_LENGTH,                                      \
+     "unexpected content-length header")                             \
+  XX(INVALID_CHUNK_SIZE,                                             \
+     "invalid character in chunk size header")                       \
+  XX(INVALID_CONSTANT, "invalid constant string")                    \
+  XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
+  XX(STRICT, "strict mode assertion failed")                         \
+  XX(PAUSED, "parser is paused")                                     \
+  XX(UNKNOWN, "an unknown error occurred")
+
+
+/* Define HPE_* values for each errno value above */
+#define HTTP_ERRNO_GEN(n, s) HPE_##n,
+enum http_errno {
+  HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
+};
+#undef HTTP_ERRNO_GEN
+
+
+/* Get an http_errno value from an http_parser */
+#define HTTP_PARSER_ERRNO(p)            ((enum http_errno) (p)->http_errno)
+
+
+struct http_parser {
+  /** PRIVATE **/
+  unsigned int type : 2;         /* enum http_parser_type */
+  unsigned int flags : 8;        /* F_* values from 'flags' enum; semi-public */
+  unsigned int state : 7;        /* enum state from http_parser.c */
+  unsigned int header_state : 7; /* enum header_state from http_parser.c */
+  unsigned int index : 7;        /* index into current matcher */
+  unsigned int lenient_http_headers : 1;
+
+  uint32_t nread;          /* # bytes read in various scenarios */
+  uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */
+
+  /** READ-ONLY **/
+  unsigned short http_major;
+  unsigned short http_minor;
+  unsigned int status_code : 16; /* responses only */
+  unsigned int method : 8;       /* requests only */
+  unsigned int http_errno : 7;
+
+  /* 1 = Upgrade header was present and the parser has exited because of that.
+   * 0 = No upgrade header present.
+   * Should be checked when http_parser_execute() returns in addition to
+   * error checking.
+   */
+  unsigned int upgrade : 1;
+
+  /** PUBLIC **/
+  void *data; /* A pointer to get hook to the "connection" or "socket" object */
+};
+
+
+struct http_parser_settings {
+  http_cb      on_message_begin;
+  http_data_cb on_url;
+  http_data_cb on_status;
+  http_data_cb on_header_field;
+  http_data_cb on_header_value;
+  http_cb      on_headers_complete;
+  http_data_cb on_body;
+  http_cb      on_message_complete;
+  /* When on_chunk_header is called, the current chunk length is stored
+   * in parser->content_length.
+   */
+  http_cb      on_chunk_header;
+  http_cb      on_chunk_complete;
+};
+
+
+enum http_parser_url_fields
+  { UF_SCHEMA           = 0
+  , UF_HOST             = 1
+  , UF_PORT             = 2
+  , UF_PATH             = 3
+  , UF_QUERY            = 4
+  , UF_FRAGMENT         = 5
+  , UF_USERINFO         = 6
+  , UF_MAX              = 7
+  };
+
+
+/* Result structure for http_parser_parse_url().
+ *
+ * Callers should index into field_data[] with UF_* values iff field_set
+ * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
+ * because we probably have padding left over), we convert any port to
+ * a uint16_t.
+ */
+struct http_parser_url {
+  uint16_t field_set;           /* Bitmask of (1 << UF_*) values */
+  uint16_t port;                /* Converted UF_PORT string */
+
+  struct {
+    uint16_t off;               /* Offset into buffer in which field starts */
+    uint16_t len;               /* Length of run in buffer */
+  } field_data[UF_MAX];
+};
+
+
+/* Returns the library version. Bits 16-23 contain the major version number,
+ * bits 8-15 the minor version number and bits 0-7 the patch level.
+ * Usage example:
+ *
+ *   unsigned long version = http_parser_version();
+ *   unsigned major = (version >> 16) & 255;
+ *   unsigned minor = (version >> 8) & 255;
+ *   unsigned patch = version & 255;
+ *   printf("http_parser v%u.%u.%u\n", major, minor, patch);
+ */
+unsigned long http_parser_version(void);
+
+void http_parser_init(http_parser *parser, enum http_parser_type type);
+
+
+/* Initialize http_parser_settings members to 0
+ */
+void http_parser_settings_init(http_parser_settings *settings);
+
+
+/* Executes the parser. Returns number of parsed bytes. Sets
+ * `parser->http_errno` on error. */
+size_t http_parser_execute(http_parser *parser,
+                           const http_parser_settings *settings,
+                           const char *data,
+                           size_t len);
+
+
+/* If http_should_keep_alive() in the on_headers_complete or
+ * on_message_complete callback returns 0, then this should be
+ * the last message on the connection.
+ * If you are the server, respond with the "Connection: close" header.
+ * If you are the client, close the connection.
+ */
+int http_should_keep_alive(const http_parser *parser);
+
+/* Returns a string version of the HTTP method. */
+const char *http_method_str(enum http_method m);
+
+/* Returns a string version of the HTTP status code. */
+const char *http_status_str(enum http_status s);
+
+/* Return a string name of the given error */
+const char *http_errno_name(enum http_errno err);
+
+/* Return a string description of the given error */
+const char *http_errno_description(enum http_errno err);
+
+/* Initialize all http_parser_url members to 0 */
+void http_parser_url_init(struct http_parser_url *u);
+
+/* Parse a URL; return nonzero on failure */
+int http_parser_parse_url(const char *buf, size_t buflen,
+                          int is_connect,
+                          struct http_parser_url *u);
+
+/* Pause or un-pause the parser; a nonzero value pauses */
+void http_parser_pause(http_parser *parser, int paused);
+
+/* Checks if this is the final chunk of the body. */
+int http_body_is_final(const http_parser *parser);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/version.h b/src/version.h
index 33be0af8..272d8337 100644
--- a/src/version.h
+++ b/src/version.h
@@ -25,18 +25,18 @@
 #ifndef XMRIG_VERSION_H
 #define XMRIG_VERSION_H
 
-#define APP_ID        "xmrig"
-#define APP_NAME      "XMRig"
-#define APP_DESC      "XMRig CPU miner"
-#define APP_VERSION   "2.14.4-dev"
-#define APP_DOMAIN    "xmrig.com"
-#define APP_SITE      "www.xmrig.com"
-#define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com"
-#define APP_KIND      "cpu"
+#define APP_ID        "ninjarig"
+#define APP_NAME      "NinjaRig"
+#define APP_DESC      "NinjaRig CPU/GPU miner"
+#define APP_VERSION   "1.0.0-dev"
+//#define APP_DOMAIN    "xmrig.com"
+//#define APP_SITE      "www.xmrig.com"
+#define APP_COPYRIGHT "Copyright (C) 2019 Haifa Bogdan Adnan"
+#define APP_KIND      "cpu/gpu"
 
-#define APP_VER_MAJOR  2
-#define APP_VER_MINOR  14
-#define APP_VER_PATCH  4
+#define APP_VER_MAJOR  1
+#define APP_VER_MINOR  0
+#define APP_VER_PATCH  0
 
 #ifdef _MSC_VER
 #   if (_MSC_VER >= 1920)
diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp
deleted file mode 100644
index 6548b461..00000000
--- a/src/workers/CpuThread.cpp
+++ /dev/null
@@ -1,744 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <assert.h>
-
-
-#include "common/cpu/Cpu.h"
-#include "common/log/Log.h"
-#include "crypto/Asm.h"
-#include "Mem.h"
-#include "rapidjson/document.h"
-#include "workers/CpuThread.h"
-
-
-#if defined(XMRIG_ARM)
-#   include "crypto/CryptoNight_arm.h"
-#else
-#   include "crypto/CryptoNight_x86.h"
-#endif
-
-
-xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly) :
-    m_algorithm(algorithm),
-    m_av(av),
-    m_assembly(assembly),
-    m_prefetch(prefetch),
-    m_softAES(softAES),
-    m_priority(priority),
-    m_affinity(affinity),
-    m_multiway(multiway),
-    m_index(index)
-{
-}
-
-
-#ifndef XMRIG_NO_ASM
-template<typename T, typename U>
-static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask)
-{
-    const uint8_t* p = reinterpret_cast<const uint8_t*>(src);
-
-    // Workaround for Visual Studio placing trampoline in debug builds.
-#   if defined(_MSC_VER)
-    if (p[0] == 0xE9) {
-        p += *(int32_t*)(p + 1) + 5;
-    }
-#   endif
-
-    size_t size = 0;
-    while (*(uint32_t*)(p + size) != 0xDEADC0DE) {
-        ++size;
-    }
-    size += sizeof(uint32_t);
-
-    memcpy((void*) dst, (const void*) src, size);
-
-    uint8_t* patched_data = reinterpret_cast<uint8_t*>(dst);
-    for (size_t i = 0; i + sizeof(uint32_t) <= size; ++i) {
-        switch (*(uint32_t*)(patched_data + i)) {
-        case xmrig::CRYPTONIGHT_ITER:
-            *(uint32_t*)(patched_data + i) = iterations;
-            break;
-
-        case xmrig::CRYPTONIGHT_MASK:
-            *(uint32_t*)(patched_data + i) = mask;
-            break;
-        }
-    }
-}
-
-
-extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
-extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
-extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
-extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
-
-
-xmrig::CpuThread::cn_mainloop_fun        cn_half_mainloop_ivybridge_asm             = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_half_mainloop_ryzen_asm                 = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_half_mainloop_bulldozer_asm             = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_half_double_mainloop_sandybridge_asm    = nullptr;
-
-xmrig::CpuThread::cn_mainloop_fun        cn_trtl_mainloop_ivybridge_asm             = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_trtl_mainloop_ryzen_asm                 = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_trtl_mainloop_bulldozer_asm             = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_trtl_double_mainloop_sandybridge_asm    = nullptr;
-
-xmrig::CpuThread::cn_mainloop_fun        cn_zls_mainloop_ivybridge_asm              = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_zls_mainloop_ryzen_asm                  = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_zls_mainloop_bulldozer_asm              = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_zls_double_mainloop_sandybridge_asm     = nullptr;
-
-xmrig::CpuThread::cn_mainloop_fun        cn_double_mainloop_ivybridge_asm           = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_double_mainloop_ryzen_asm               = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_double_mainloop_bulldozer_asm           = nullptr;
-xmrig::CpuThread::cn_mainloop_fun        cn_double_double_mainloop_sandybridge_asm  = nullptr;
-
-
-void xmrig::CpuThread::patchAsmVariants()
-{
-    const int allocation_size = 65536;
-    uint8_t *base = static_cast<uint8_t *>(Mem::allocateExecutableMemory(allocation_size));
-
-    cn_half_mainloop_ivybridge_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x0000);
-    cn_half_mainloop_ryzen_asm                  = reinterpret_cast<cn_mainloop_fun>         (base + 0x1000);
-    cn_half_mainloop_bulldozer_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x2000);
-    cn_half_double_mainloop_sandybridge_asm     = reinterpret_cast<cn_mainloop_fun>         (base + 0x3000);
-
-    cn_trtl_mainloop_ivybridge_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x4000);
-    cn_trtl_mainloop_ryzen_asm                  = reinterpret_cast<cn_mainloop_fun>         (base + 0x5000);
-    cn_trtl_mainloop_bulldozer_asm              = reinterpret_cast<cn_mainloop_fun>         (base + 0x6000);
-    cn_trtl_double_mainloop_sandybridge_asm     = reinterpret_cast<cn_mainloop_fun>         (base + 0x7000);
-
-    cn_zls_mainloop_ivybridge_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0x8000);
-    cn_zls_mainloop_ryzen_asm                   = reinterpret_cast<cn_mainloop_fun>         (base + 0x9000);
-    cn_zls_mainloop_bulldozer_asm               = reinterpret_cast<cn_mainloop_fun>         (base + 0xA000);
-    cn_zls_double_mainloop_sandybridge_asm      = reinterpret_cast<cn_mainloop_fun>         (base + 0xB000);
-
-    cn_double_mainloop_ivybridge_asm            = reinterpret_cast<cn_mainloop_fun>         (base + 0xC000);
-    cn_double_mainloop_ryzen_asm                = reinterpret_cast<cn_mainloop_fun>         (base + 0xD000);
-    cn_double_mainloop_bulldozer_asm            = reinterpret_cast<cn_mainloop_fun>         (base + 0xE000);
-    cn_double_double_mainloop_sandybridge_asm   = reinterpret_cast<cn_mainloop_fun>         (base + 0xF000);
-
-    patchCode(cn_half_mainloop_ivybridge_asm,            cnv2_mainloop_ivybridge_asm,           xmrig::CRYPTONIGHT_HALF_ITER,   xmrig::CRYPTONIGHT_MASK);
-    patchCode(cn_half_mainloop_ryzen_asm,                cnv2_mainloop_ryzen_asm,               xmrig::CRYPTONIGHT_HALF_ITER,   xmrig::CRYPTONIGHT_MASK);
-    patchCode(cn_half_mainloop_bulldozer_asm,            cnv2_mainloop_bulldozer_asm,           xmrig::CRYPTONIGHT_HALF_ITER,   xmrig::CRYPTONIGHT_MASK);
-    patchCode(cn_half_double_mainloop_sandybridge_asm,   cnv2_double_mainloop_sandybridge_asm,  xmrig::CRYPTONIGHT_HALF_ITER,   xmrig::CRYPTONIGHT_MASK);
-
-    patchCode(cn_trtl_mainloop_ivybridge_asm,            cnv2_mainloop_ivybridge_asm,           xmrig::CRYPTONIGHT_TRTL_ITER,   xmrig::CRYPTONIGHT_PICO_MASK);
-    patchCode(cn_trtl_mainloop_ryzen_asm,                cnv2_mainloop_ryzen_asm,               xmrig::CRYPTONIGHT_TRTL_ITER,   xmrig::CRYPTONIGHT_PICO_MASK);
-    patchCode(cn_trtl_mainloop_bulldozer_asm,            cnv2_mainloop_bulldozer_asm,           xmrig::CRYPTONIGHT_TRTL_ITER,   xmrig::CRYPTONIGHT_PICO_MASK);
-    patchCode(cn_trtl_double_mainloop_sandybridge_asm,   cnv2_double_mainloop_sandybridge_asm,  xmrig::CRYPTONIGHT_TRTL_ITER,   xmrig::CRYPTONIGHT_PICO_MASK);
-
-    patchCode(cn_zls_mainloop_ivybridge_asm,             cnv2_mainloop_ivybridge_asm,           xmrig::CRYPTONIGHT_ZLS_ITER,    xmrig::CRYPTONIGHT_MASK);
-    patchCode(cn_zls_mainloop_ryzen_asm,                 cnv2_mainloop_ryzen_asm,               xmrig::CRYPTONIGHT_ZLS_ITER,    xmrig::CRYPTONIGHT_MASK);
-    patchCode(cn_zls_mainloop_bulldozer_asm,             cnv2_mainloop_bulldozer_asm,           xmrig::CRYPTONIGHT_ZLS_ITER,    xmrig::CRYPTONIGHT_MASK);
-    patchCode(cn_zls_double_mainloop_sandybridge_asm,    cnv2_double_mainloop_sandybridge_asm,  xmrig::CRYPTONIGHT_ZLS_ITER,    xmrig::CRYPTONIGHT_MASK);
-
-    patchCode(cn_double_mainloop_ivybridge_asm,          cnv2_mainloop_ivybridge_asm,           xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK);
-    patchCode(cn_double_mainloop_ryzen_asm,              cnv2_mainloop_ryzen_asm,               xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK);
-    patchCode(cn_double_mainloop_bulldozer_asm,          cnv2_mainloop_bulldozer_asm,           xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK);
-    patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm,  xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK);
-
-    Mem::protectExecutableMemory(base, allocation_size);
-    Mem::flushInstructionCache(base, allocation_size);
-}
-#endif
-
-
-bool xmrig::CpuThread::isSoftAES(AlgoVariant av)
-{
-    return av == AV_SINGLE_SOFT || av == AV_DOUBLE_SOFT || av > AV_PENTA;
-}
-
-
-#ifndef XMRIG_NO_ASM
-template<xmrig::Algo algo, xmrig::Variant variant>
-static inline void add_asm_func(xmrig::CpuThread::cn_hash_fun(&asm_func_map)[xmrig::ALGO_MAX][xmrig::AV_MAX][xmrig::VARIANT_MAX][xmrig::ASM_MAX])
-{
-    asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_INTEL]     = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_INTEL>;
-    asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_RYZEN]     = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_RYZEN>;
-    asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_single_hash_asm<algo, variant, xmrig::ASM_BULLDOZER>;
-
-    asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_INTEL]     = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_INTEL>;
-    asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_RYZEN]     = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_RYZEN>;
-    asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_double_hash_asm<algo, variant, xmrig::ASM_BULLDOZER>;
-}
-#endif
-
-xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly)
-{
-    assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
-
-#   ifndef XMRIG_NO_ASM
-    if (assembly == ASM_AUTO) {
-        assembly = Cpu::info()->assembly();
-    }
-
-    static cn_hash_fun asm_func_map[ALGO_MAX][AV_MAX][VARIANT_MAX][ASM_MAX] = {};
-    static bool asm_func_map_initialized = false;
-
-    if (!asm_func_map_initialized) {
-        add_asm_func<CRYPTONIGHT, VARIANT_2>(asm_func_map);
-        add_asm_func<CRYPTONIGHT, VARIANT_HALF>(asm_func_map);
-        add_asm_func<CRYPTONIGHT, VARIANT_WOW>(asm_func_map);
-        add_asm_func<CRYPTONIGHT, VARIANT_4>(asm_func_map);
-
-#       ifndef XMRIG_NO_CN_PICO
-        add_asm_func<CRYPTONIGHT_PICO, VARIANT_TRTL>(asm_func_map);
-#       endif
-
-        add_asm_func<CRYPTONIGHT, VARIANT_RWZ>(asm_func_map);
-        add_asm_func<CRYPTONIGHT, VARIANT_ZLS>(asm_func_map);
-        add_asm_func<CRYPTONIGHT, VARIANT_DOUBLE>(asm_func_map);
-
-        asm_func_map_initialized = true;
-    }
-
-    cn_hash_fun fun = asm_func_map[algorithm][av][variant][assembly];
-    if (fun) {
-        return fun;
-    }
-#   endif
-
-    constexpr const size_t count = VARIANT_MAX * 10 * ALGO_MAX;
-
-    static const cn_hash_fun func_table[] = {
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_0>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_0>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_0>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_0>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_0>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_0>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_0>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_0>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_0>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_0>,
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_1>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_1>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_1>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_1>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_1>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_1>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_1>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_1>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_1>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_1>,
-
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_XTL>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_XTL>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_XTL>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_XTL>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_XTL>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_XTL>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_XTL>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_XTL>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_XTL>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_XTL>,
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_MSR>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_MSR>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_MSR>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_MSR>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_MSR>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_MSR>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_MSR>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_MSR>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_MSR>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_MSR>,
-
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_XAO>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_XAO>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_XAO>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_XAO>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_XAO>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_XAO>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_XAO>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_XAO>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_XAO>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_XAO>,
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_RTO>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_RTO>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_RTO>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_RTO>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_RTO>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_RTO>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_RTO>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_RTO>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_RTO>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_RTO>,
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_2>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_2>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_2>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_2>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_2>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_2>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_2>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_2>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_2>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_2>,
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_HALF>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_HALF>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_HALF>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_HALF>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_HALF>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_HALF>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_HALF>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_HALF>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_HALF>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_HALF>,
-
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
-
-#       ifndef XMRIG_NO_CN_GPU
-        cryptonight_single_hash_gpu<CRYPTONIGHT, false, VARIANT_GPU>,
-        nullptr,
-        cryptonight_single_hash_gpu<CRYPTONIGHT, true,  VARIANT_GPU>,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-#       else
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
-#       endif
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_WOW>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_WOW>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_WOW>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_WOW>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_WOW>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_WOW>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_WOW>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_WOW>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_WOW>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_WOW>,
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_4>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_4>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_4>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_4>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_4>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_4>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_4>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_4>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_4>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_4>,
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_RWZ>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_RWZ>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_RWZ>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_RWZ>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_RWZ>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_RWZ>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_RWZ>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_RWZ>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_RWZ>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_RWZ>,
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_ZLS>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_ZLS>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_ZLS>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_ZLS>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_ZLS>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_ZLS>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_ZLS>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_ZLS>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_ZLS>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_ZLS>,
-
-        cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_DOUBLE>,
-        cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_DOUBLE>,
-        cryptonight_single_hash<CRYPTONIGHT, true,  VARIANT_DOUBLE>,
-        cryptonight_double_hash<CRYPTONIGHT, true,  VARIANT_DOUBLE>,
-        cryptonight_triple_hash<CRYPTONIGHT, false, VARIANT_DOUBLE>,
-        cryptonight_quad_hash<CRYPTONIGHT,   false, VARIANT_DOUBLE>,
-        cryptonight_penta_hash<CRYPTONIGHT,  false, VARIANT_DOUBLE>,
-        cryptonight_triple_hash<CRYPTONIGHT, true,  VARIANT_DOUBLE>,
-        cryptonight_quad_hash<CRYPTONIGHT,   true,  VARIANT_DOUBLE>,
-        cryptonight_penta_hash<CRYPTONIGHT,  true,  VARIANT_DOUBLE>,
-
-#       ifndef XMRIG_NO_AEON
-        cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
-        cryptonight_double_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
-        cryptonight_single_hash<CRYPTONIGHT_LITE, true,  VARIANT_0>,
-        cryptonight_double_hash<CRYPTONIGHT_LITE, true,  VARIANT_0>,
-        cryptonight_triple_hash<CRYPTONIGHT_LITE, false, VARIANT_0>,
-        cryptonight_quad_hash<CRYPTONIGHT_LITE,   false, VARIANT_0>,
-        cryptonight_penta_hash<CRYPTONIGHT_LITE,  false, VARIANT_0>,
-        cryptonight_triple_hash<CRYPTONIGHT_LITE, true,  VARIANT_0>,
-        cryptonight_quad_hash<CRYPTONIGHT_LITE,   true,  VARIANT_0>,
-        cryptonight_penta_hash<CRYPTONIGHT_LITE,  true,  VARIANT_0>,
-
-        cryptonight_single_hash<CRYPTONIGHT_LITE, false, VARIANT_1>,
-        cryptonight_double_hash<CRYPTONIGHT_LITE, false, VARIANT_1>,
-        cryptonight_single_hash<CRYPTONIGHT_LITE, true,  VARIANT_1>,
-        cryptonight_double_hash<CRYPTONIGHT_LITE, true,  VARIANT_1>,
-        cryptonight_triple_hash<CRYPTONIGHT_LITE, false, VARIANT_1>,
-        cryptonight_quad_hash<CRYPTONIGHT_LITE,   false, VARIANT_1>,
-        cryptonight_penta_hash<CRYPTONIGHT_LITE,  false, VARIANT_1>,
-        cryptonight_triple_hash<CRYPTONIGHT_LITE, true,  VARIANT_1>,
-        cryptonight_quad_hash<CRYPTONIGHT_LITE,   true,  VARIANT_1>,
-        cryptonight_penta_hash<CRYPTONIGHT_LITE,  true,  VARIANT_1>,
-
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
-#       else
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
-#       endif
-
-#       ifndef XMRIG_NO_SUMO
-        cryptonight_single_hash<CRYPTONIGHT_HEAVY, false, VARIANT_0>,
-        cryptonight_double_hash<CRYPTONIGHT_HEAVY, false, VARIANT_0>,
-        cryptonight_single_hash<CRYPTONIGHT_HEAVY, true,  VARIANT_0>,
-        cryptonight_double_hash<CRYPTONIGHT_HEAVY, true,  VARIANT_0>,
-        cryptonight_triple_hash<CRYPTONIGHT_HEAVY, false, VARIANT_0>,
-        cryptonight_quad_hash<CRYPTONIGHT_HEAVY,   false, VARIANT_0>,
-        cryptonight_penta_hash<CRYPTONIGHT_HEAVY,  false, VARIANT_0>,
-        cryptonight_triple_hash<CRYPTONIGHT_HEAVY, true,  VARIANT_0>,
-        cryptonight_quad_hash<CRYPTONIGHT_HEAVY,   true,  VARIANT_0>,
-        cryptonight_penta_hash<CRYPTONIGHT_HEAVY,  true,  VARIANT_0>,
-
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
-
-        cryptonight_single_hash<CRYPTONIGHT_HEAVY, false, VARIANT_TUBE>,
-        cryptonight_double_hash<CRYPTONIGHT_HEAVY, false, VARIANT_TUBE>,
-        cryptonight_single_hash<CRYPTONIGHT_HEAVY, true,  VARIANT_TUBE>,
-        cryptonight_double_hash<CRYPTONIGHT_HEAVY, true,  VARIANT_TUBE>,
-        cryptonight_triple_hash<CRYPTONIGHT_HEAVY, false, VARIANT_TUBE>,
-        cryptonight_quad_hash<CRYPTONIGHT_HEAVY,   false, VARIANT_TUBE>,
-        cryptonight_penta_hash<CRYPTONIGHT_HEAVY,  false, VARIANT_TUBE>,
-        cryptonight_triple_hash<CRYPTONIGHT_HEAVY, true,  VARIANT_TUBE>,
-        cryptonight_quad_hash<CRYPTONIGHT_HEAVY,   true,  VARIANT_TUBE>,
-        cryptonight_penta_hash<CRYPTONIGHT_HEAVY,  true,  VARIANT_TUBE>,
-
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
-
-        cryptonight_single_hash<CRYPTONIGHT_HEAVY, false, VARIANT_XHV>,
-        cryptonight_double_hash<CRYPTONIGHT_HEAVY, false, VARIANT_XHV>,
-        cryptonight_single_hash<CRYPTONIGHT_HEAVY, true,  VARIANT_XHV>,
-        cryptonight_double_hash<CRYPTONIGHT_HEAVY, true,  VARIANT_XHV>,
-        cryptonight_triple_hash<CRYPTONIGHT_HEAVY, false, VARIANT_XHV>,
-        cryptonight_quad_hash<CRYPTONIGHT_HEAVY,   false, VARIANT_XHV>,
-        cryptonight_penta_hash<CRYPTONIGHT_HEAVY,  false, VARIANT_XHV>,
-        cryptonight_triple_hash<CRYPTONIGHT_HEAVY, true,  VARIANT_XHV>,
-        cryptonight_quad_hash<CRYPTONIGHT_HEAVY,   true,  VARIANT_XHV>,
-        cryptonight_penta_hash<CRYPTONIGHT_HEAVY,  true,  VARIANT_XHV>,
-
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
-#       else
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
-#       endif
-
-#       ifndef XMRIG_NO_CN_PICO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
-
-        cryptonight_single_hash<CRYPTONIGHT_PICO, false, VARIANT_TRTL>,
-        cryptonight_double_hash<CRYPTONIGHT_PICO, false, VARIANT_TRTL>,
-        cryptonight_single_hash<CRYPTONIGHT_PICO, true,  VARIANT_TRTL>,
-        cryptonight_double_hash<CRYPTONIGHT_PICO, true,  VARIANT_TRTL>,
-        cryptonight_triple_hash<CRYPTONIGHT_PICO, false, VARIANT_TRTL>,
-        cryptonight_quad_hash<CRYPTONIGHT_PICO,   false, VARIANT_TRTL>,
-        cryptonight_penta_hash<CRYPTONIGHT_PICO,  false, VARIANT_TRTL>,
-        cryptonight_triple_hash<CRYPTONIGHT_PICO, true,  VARIANT_TRTL>,
-        cryptonight_quad_hash<CRYPTONIGHT_PICO,   true,  VARIANT_TRTL>,
-        cryptonight_penta_hash<CRYPTONIGHT_PICO,  true,  VARIANT_TRTL>,
-
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
-#       else
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS
-        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE
-#       endif
-    };
-
-    static_assert(count == sizeof(func_table) / sizeof(func_table[0]), "func_table size mismatch");
-
-    const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;
-
-#   ifndef NDEBUG
-    cn_hash_fun func = func_table[index];
-
-    assert(index < sizeof(func_table) / sizeof(func_table[0]));
-    assert(func != nullptr);
-
-    return func;
-#   else
-    return func_table[index];
-#   endif
-}
-
-
-xmrig::CpuThread *xmrig::CpuThread::createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly)
-{
-    assert(av > AV_AUTO && av < AV_MAX);
-
-    int64_t cpuId = -1L;
-
-    if (affinity != -1L) {
-        size_t idx = 0;
-
-        for (size_t i = 0; i < 64; i++) {
-            if (!(affinity & (1ULL << i))) {
-                continue;
-            }
-
-            if (idx == index) {
-                cpuId = i;
-                break;
-            }
-
-            idx++;
-        }
-    }
-
-    return new CpuThread(index, algorithm, av, multiway(av), cpuId, priority, isSoftAES(av), false, assembly);
-}
-
-
-xmrig::CpuThread *xmrig::CpuThread::createFromData(size_t index, Algo algorithm, const CpuThread::Data &data, int priority, bool softAES)
-{
-    int av                  = AV_AUTO;
-    const Multiway multiway = data.multiway;
-
-    if (multiway <= DoubleWay) {
-        av = softAES ? (multiway + 2) : multiway;
-    }
-    else {
-        av = softAES ? (multiway + 5) : (multiway + 2);
-    }
-
-    assert(av > AV_AUTO && av < AV_MAX);
-
-    return new CpuThread(index, algorithm, static_cast<AlgoVariant>(av), multiway, data.affinity, priority, softAES, false, data.assembly);
-}
-
-
-xmrig::CpuThread::Data xmrig::CpuThread::parse(const rapidjson::Value &object)
-{
-    Data data;
-
-    const auto &multiway = object["low_power_mode"];
-    if (multiway.IsBool()) {
-        data.multiway = multiway.IsTrue() ? DoubleWay : SingleWay;
-        data.valid    = true;
-    }
-    else if (multiway.IsUint()) {
-        data.setMultiway(multiway.GetInt());
-    }
-
-    if (!data.valid) {
-        return data;
-    }
-
-    const auto &affinity = object["affine_to_cpu"];
-    if (affinity.IsUint64()) {
-        data.affinity = affinity.GetInt64();
-    }
-
-#   ifndef XMRIG_NO_ASM
-    data.assembly = Asm::parse(object["asm"]);
-#   endif
-
-    return data;
-}
-
-
-xmrig::IThread::Multiway xmrig::CpuThread::multiway(AlgoVariant av)
-{
-    switch (av) {
-    case AV_SINGLE:
-    case AV_SINGLE_SOFT:
-        return SingleWay;
-
-    case AV_DOUBLE_SOFT:
-    case AV_DOUBLE:
-        return DoubleWay;
-
-    case AV_TRIPLE_SOFT:
-    case AV_TRIPLE:
-        return TripleWay;
-
-    case AV_QUAD_SOFT:
-    case AV_QUAD:
-        return QuadWay;
-
-    case AV_PENTA_SOFT:
-    case AV_PENTA:
-        return PentaWay;
-
-    default:
-        break;
-    }
-
-    return SingleWay;
-}
-
-
-#ifdef APP_DEBUG
-void xmrig::CpuThread::print() const
-{
-    LOG_DEBUG(GREEN_BOLD("CPU thread:   ") " index " WHITE_BOLD("%zu") ", multiway " WHITE_BOLD("%d") ", av " WHITE_BOLD("%d") ",",
-              index(), static_cast<int>(multiway()), static_cast<int>(m_av));
-
-#   ifndef XMRIG_NO_ASM
-    LOG_DEBUG("               assembly: %s, affine_to_cpu: %" PRId64, Asm::toString(m_assembly), affinity());
-#   else
-    LOG_DEBUG("               affine_to_cpu: %" PRId64, affinity());
-#   endif
-}
-#endif
-
-
-#ifndef XMRIG_NO_API
-rapidjson::Value xmrig::CpuThread::toAPI(rapidjson::Document &doc) const
-{
-    using namespace rapidjson;
-
-    Value obj(kObjectType);
-    auto &allocator = doc.GetAllocator();
-
-    obj.AddMember("type",          "cpu", allocator);
-    obj.AddMember("av",             m_av, allocator);
-    obj.AddMember("low_power_mode", multiway(), allocator);
-    obj.AddMember("affine_to_cpu",  affinity(), allocator);
-    obj.AddMember("priority",       priority(), allocator);
-    obj.AddMember("soft_aes",       isSoftAES(), allocator);
-
-    return obj;
-}
-#endif
-
-
-rapidjson::Value xmrig::CpuThread::toConfig(rapidjson::Document &doc) const
-{
-    using namespace rapidjson;
-
-    Value obj(kObjectType);
-    auto &allocator = doc.GetAllocator();
-
-    obj.AddMember("low_power_mode", multiway(), allocator);
-    obj.AddMember("affine_to_cpu",  affinity() == -1L ? Value(kFalseType) : Value(affinity()), allocator);
-
-#   ifndef XMRIG_NO_ASM
-    obj.AddMember("asm", Asm::toJSON(m_assembly), allocator);
-#   endif
-
-    return obj;
-}
diff --git a/src/workers/CpuThread.h b/src/workers/CpuThread.h
deleted file mode 100644
index 05d4a066..00000000
--- a/src/workers/CpuThread.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_CPUTHREAD_H
-#define XMRIG_CPUTHREAD_H
-
-
-#include "common/xmrig.h"
-#include "interfaces/IThread.h"
-
-
-struct cryptonight_ctx;
-
-
-namespace xmrig {
-
-
-class CpuThread : public IThread
-{
-public:
-    struct Data
-    {
-        inline Data() : assembly(ASM_AUTO), valid(false), affinity(-1L), multiway(SingleWay) {}
-
-        inline void setMultiway(int value)
-        {
-            if (value >= SingleWay && value <= PentaWay) {
-                multiway = static_cast<Multiway>(value);
-                valid    = true;
-            }
-        }
-
-        Assembly assembly;
-        bool valid;
-        int64_t affinity;
-        Multiway multiway;
-    };
-
-
-    CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly);
-
-    typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx, uint64_t height);
-    typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx);
-
-#   ifndef XMRIG_NO_ASM
-    static void patchAsmVariants();
-#   endif
-
-    static bool isSoftAES(AlgoVariant av);
-    static cn_hash_fun fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly);
-    static CpuThread *createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly);
-    static CpuThread *createFromData(size_t index, Algo algorithm, const CpuThread::Data &data, int priority, bool softAES);
-    static Data parse(const rapidjson::Value &object);
-    static Multiway multiway(AlgoVariant av);
-
-    inline bool isPrefetch() const               { return m_prefetch; }
-    inline bool isSoftAES() const                { return m_softAES; }
-    inline cn_hash_fun fn(Variant variant) const { return fn(m_algorithm, m_av, variant, m_assembly); }
-
-    inline Algo algorithm() const override       { return m_algorithm; }
-    inline int priority() const override         { return m_priority; }
-    inline int64_t affinity() const override     { return m_affinity; }
-    inline Multiway multiway() const override    { return m_multiway; }
-    inline size_t index() const override         { return m_index; }
-    inline Type type() const override            { return CPU; }
-
-protected:
-#   ifdef APP_DEBUG
-    void print() const override;
-#   endif
-
-#   ifndef XMRIG_NO_API
-    rapidjson::Value toAPI(rapidjson::Document &doc) const override;
-#   endif
-
-    rapidjson::Value toConfig(rapidjson::Document &doc) const override;
-
-private:
-    const Algo m_algorithm;
-    const AlgoVariant m_av;
-    const Assembly m_assembly;
-    const bool m_prefetch;
-    const bool m_softAES;
-    const int m_priority;
-    const int64_t m_affinity;
-    const Multiway m_multiway;
-    const size_t m_index;
-};
-
-
-} /* namespace xmrig */
-
-
-#endif /* XMRIG_CPUTHREAD_H */
diff --git a/src/workers/Handle.cpp b/src/workers/Handle.cpp
index d42ea368..cacaf636 100644
--- a/src/workers/Handle.cpp
+++ b/src/workers/Handle.cpp
@@ -22,25 +22,65 @@
  */
 
 
+#include <common/log/Log.h>
 #include "workers/Handle.h"
 
 
-Handle::Handle(xmrig::IThread *config, uint32_t offset, size_t totalWays) :
-    m_worker(nullptr),
-    m_totalWays(totalWays),
-    m_offset(offset),
-    m_config(config)
+Handle::Handle(xmrig::Config *config, xmrig::HasherConfig *hasherConfig, uint32_t offset) :
+        m_offset(offset),
+        m_config(config),
+        m_hasherConfig(hasherConfig),
+        m_hasher(nullptr)
 {
-}
+    std::vector<Hasher *> hashers = Hasher::getHashers();
+    for(Hasher *hasher : hashers) {
+        if(hasherConfig->type() == hasher->subType()) {
+            if(hasher->initialize(hasherConfig->algorithm(), hasherConfig->variant()) &&
+                hasher->configure(*hasherConfig) &&
+                hasher->deviceCount() > 0)
+                m_hasher = hasher;
 
+            std::string hasherInfo = hasher->info();
+
+            if(config->isColors()) {
+                std::string redDisabled = RED_BOLD("DISABLED");
+                std::string greenEnabled = GREEN_BOLD("ENABLED");
+
+                size_t startPos = hasherInfo.find("DISABLED");
+                while (startPos != string::npos) {
+                    hasherInfo.replace(startPos, 8, redDisabled);
+                    startPos = hasherInfo.find("DISABLED", startPos + redDisabled.size());
+                }
+
+                startPos = hasherInfo.find("ENABLED");
+                while (startPos != string::npos) {
+                    hasherInfo.replace(startPos, 7, greenEnabled);
+                    startPos = hasherInfo.find("ENABLED", startPos + greenEnabled.size());
+                }
+
+                Log::i()->text(GREEN_BOLD(" * Initializing %s hasher:") "\n%s", hasher->subType().c_str(), hasherInfo.c_str());
+            }
+            else {
+                Log::i()->text(" * Initializing %s hasher:\n%s", hasher->subType().c_str(), hasherInfo.c_str());
+            }
+        }
+    }
+}
 
 void Handle::join()
 {
-    uv_thread_join(&m_thread);
+    for(uv_thread_t thread : m_threads)
+        uv_thread_join(&thread);
 }
 
 
 void Handle::start(void (*callback) (void *))
 {
-    uv_thread_create(&m_thread, callback, this);
+    assert(m_hasher != nullptr);
+    for(int i=0; i < m_hasher->computingThreads(); i++) {
+        uv_thread_t thread;
+        HandleArg *arg = new HandleArg { this, i };
+        uv_thread_create(&thread, callback, arg);
+        m_threads.push_back(thread);
+    }
 }
diff --git a/src/workers/Handle.h b/src/workers/Handle.h
index 4bb899f9..50c7a2b4 100644
--- a/src/workers/Handle.h
+++ b/src/workers/Handle.h
@@ -27,35 +27,48 @@
 
 #include <assert.h>
 #include <stdint.h>
+#include <vector>
 #include <uv.h>
+#include <core/Config.h>
 
+#include "core/HasherConfig.h"
 
-#include "interfaces/IThread.h"
-
+#include "crypto/argon2_hasher/common/common.h"
+#include "crypto/argon2_hasher/hash/Hasher.h"
 
 class IWorker;
 
-
 class Handle
 {
 public:
-    Handle(xmrig::IThread *config, uint32_t offset, size_t totalWays);
+    Handle(xmrig::Config *config, xmrig::HasherConfig *hasherConfig, uint32_t offset);
+
+    struct HandleArg {
+        Handle *handle;
+        int workerId;
+    };
+
     void join();
     void start(void (*callback) (void *));
 
-    inline IWorker *worker() const         { return m_worker; }
-    inline size_t threadId() const         { return m_config->index(); }
-    inline size_t totalWays() const        { return m_totalWays; }
+    inline std::vector<IWorker *> &workers()         { return m_workers; }
+    inline size_t hasherId() const         { return m_hasherConfig->index(); }
+    inline size_t parallelism(int workerIdx) const        { return m_hasher != nullptr ? m_hasher->parallelism(workerIdx) : 0; }
+    inline size_t computingThreads() const   { return m_hasher != nullptr ? m_hasher->computingThreads() : 0; }
     inline uint32_t offset() const         { return m_offset; }
-    inline void setWorker(IWorker *worker) { assert(worker != nullptr); m_worker = worker; }
-    inline xmrig::IThread *config() const  { return m_config; }
+    inline void addWorker(IWorker *worker) { assert(worker != nullptr); m_workers.push_back(worker); }
+    inline xmrig::HasherConfig *config() const  { return m_hasherConfig; }
+    inline Hasher *hasher() const { return m_hasher; }
 
 private:
-    IWorker *m_worker;
-    size_t m_totalWays;
+    std::vector<uv_thread_t> m_threads;
+    std::vector<IWorker *> m_workers;
+
+    Hasher *m_hasher;
     uint32_t m_offset;
-    uv_thread_t m_thread;
-    xmrig::IThread *m_config;
+
+    xmrig::HasherConfig *m_hasherConfig;
+    xmrig::Config *m_config;
 };
 
 
diff --git a/src/workers/Hashrate.cpp b/src/workers/Hashrate.cpp
index 2a750318..dcb4982e 100644
--- a/src/workers/Hashrate.cpp
+++ b/src/workers/Hashrate.cpp
@@ -33,11 +33,12 @@
 #include "core/Config.h"
 #include "core/Controller.h"
 #include "workers/Hashrate.h"
+#include "workers/Handle.h"
 
 
 inline static const char *format(double h, char *buf, size_t size)
 {
-    if (isnormal(h)) {
+    if (std::isnormal(h)) {
         snprintf(buf, size, "%03.1f", h);
         return buf;
     }
@@ -46,19 +47,26 @@ inline static const char *format(double h, char *buf, size_t size)
 }
 
 
-Hashrate::Hashrate(size_t threads, xmrig::Controller *controller) :
+Hashrate::Hashrate(const std::vector<Handle*> &hashers, xmrig::Controller *controller) :
     m_highest(0.0),
-    m_threads(threads),
     m_controller(controller)
 {
-    m_counts     = new uint64_t*[threads];
-    m_timestamps = new uint64_t*[threads];
-    m_top        = new uint32_t[threads];
+    m_hashers = hashers.size();
+    m_workers = new size_t[m_hashers];
+    m_counts     = new uint64_t**[m_hashers];
+    m_timestamps = new uint64_t**[m_hashers];
+    m_top        = new uint32_t*[m_hashers];
 
-    for (size_t i = 0; i < threads; i++) {
-        m_counts[i]     = new uint64_t[kBucketSize]();
-        m_timestamps[i] = new uint64_t[kBucketSize]();
-        m_top[i]        = 0;
+    for (size_t i = 0; i < hashers.size(); i++) {
+        m_workers[i] = hashers[i]->hasher()->deviceCount();
+        m_counts[i]     = new uint64_t*[m_workers[i]];
+        m_timestamps[i] = new uint64_t*[m_workers[i]];
+        m_top[i]        = new uint32_t[m_workers[i]];
+        for (size_t j = 0; j < m_workers[i]; j++) {
+            m_counts[i][j]     = new uint64_t[kBucketSize]();
+            m_timestamps[i][j] = new uint64_t[kBucketSize]();
+            m_top[i][j]        = 0;
+        }
     }
 
     const int printTime = controller->config()->printTime();
@@ -77,10 +85,12 @@ double Hashrate::calc(size_t ms) const
     double result = 0.0;
     double data;
 
-    for (size_t i = 0; i < m_threads; ++i) {
-        data = calc(i, ms);
-        if (isnormal(data)) {
-            result += data;
+    for (size_t i = 0; i < m_hashers; ++i) {
+        for(size_t j = 0; j < m_workers[i]; j++) {
+            data = calc(i, j, ms);
+            if (std::isnormal(data)) {
+                result += data;
+            }
         }
     }
 
@@ -88,10 +98,12 @@ double Hashrate::calc(size_t ms) const
 }
 
 
-double Hashrate::calc(size_t threadId, size_t ms) const
+double Hashrate::calc(size_t hasherId, size_t workerId, size_t ms) const
 {
-    assert(threadId < m_threads);
-    if (threadId >= m_threads) {
+    assert(hasherId < m_hashers);
+    assert(workerId < m_workers[hasherId]);
+
+    if (hasherId >= m_hashers || workerId >= m_workers[hasherId]) {
         return nan("");
     }
 
@@ -105,24 +117,24 @@ double Hashrate::calc(size_t threadId, size_t ms) const
     bool haveFullSet           = false;
 
     for (size_t i = 1; i < kBucketSize; i++) {
-        const size_t idx = (m_top[threadId] - i) & kBucketMask;
+        const size_t idx = (m_top[hasherId][workerId] - i) & kBucketMask;
 
-        if (m_timestamps[threadId][idx] == 0) {
+        if (m_timestamps[hasherId][workerId][idx] == 0) {
             break;
         }
 
         if (lastestStamp == 0) {
-            lastestStamp = m_timestamps[threadId][idx];
-            lastestHashCnt = m_counts[threadId][idx];
+            lastestStamp = m_timestamps[hasherId][workerId][idx];
+            lastestHashCnt = m_counts[hasherId][workerId][idx];
         }
 
-        if (now - m_timestamps[threadId][idx] > ms) {
+        if (now - m_timestamps[hasherId][workerId][idx] > ms) {
             haveFullSet = true;
             break;
         }
 
-        earliestStamp = m_timestamps[threadId][idx];
-        earliestHashCount = m_counts[threadId][idx];
+        earliestStamp = m_timestamps[hasherId][workerId][idx];
+        earliestHashCount = m_counts[hasherId][workerId][idx];
     }
 
     if (!haveFullSet || earliestStamp == 0 || lastestStamp == 0) {
@@ -142,13 +154,13 @@ double Hashrate::calc(size_t threadId, size_t ms) const
 }
 
 
-void Hashrate::add(size_t threadId, uint64_t count, uint64_t timestamp)
+void Hashrate::add(size_t hasherId, size_t workerId, uint64_t count, uint64_t timestamp)
 {
-    const size_t top = m_top[threadId];
-    m_counts[threadId][top]     = count;
-    m_timestamps[threadId][top] = timestamp;
+    const size_t top = m_top[hasherId][workerId];
+    m_counts[hasherId][workerId][top]     = count;
+    m_timestamps[hasherId][workerId][top] = timestamp;
 
-    m_top[threadId] = (top + 1) & kBucketMask;
+    m_top[hasherId][workerId] = (top + 1) & kBucketMask;
 }
 
 
@@ -178,7 +190,7 @@ void Hashrate::stop()
 void Hashrate::updateHighest()
 {
    double highest = calc(ShortInterval);
-   if (isnormal(highest) && highest > m_highest) {
+   if (std::isnormal(highest) && highest > m_highest) {
        m_highest = highest;
    }
 }
diff --git a/src/workers/Hashrate.h b/src/workers/Hashrate.h
index e766f117..a1f8733f 100644
--- a/src/workers/Hashrate.h
+++ b/src/workers/Hashrate.h
@@ -32,7 +32,7 @@
 namespace xmrig {
     class Controller;
 }
-
+class Handle;
 
 class Hashrate
 {
@@ -43,16 +43,15 @@ public:
         LargeInterval  = 900000
     };
 
-    Hashrate(size_t threads, xmrig::Controller *controller);
+    Hashrate(const std::vector<Handle*> &workers, xmrig::Controller *controller);
     double calc(size_t ms) const;
-    double calc(size_t threadId, size_t ms) const;
-    void add(size_t threadId, uint64_t count, uint64_t timestamp);
+    double calc(size_t hasherId, size_t workerId, size_t ms) const;
+    void add(size_t hasherId, size_t workerId, uint64_t count, uint64_t timestamp);
     void print() const;
     void stop();
     void updateHighest();
 
     inline double highest() const { return m_highest; }
-    inline size_t threads() const { return m_threads; }
 
     static const char *format(double h, char *buf, size_t size);
 
@@ -63,10 +62,11 @@ private:
     constexpr static size_t kBucketMask = kBucketSize - 1;
 
     double m_highest;
-    size_t m_threads;
-    uint32_t* m_top;
-    uint64_t** m_counts;
-    uint64_t** m_timestamps;
+    size_t m_hashers;
+    size_t* m_workers;
+    uint32_t** m_top;
+    uint64_t*** m_counts;
+    uint64_t*** m_timestamps;
     uv_timer_t m_timer;
     xmrig::Controller *m_controller;
 };
diff --git a/src/workers/MultiWorker.cpp b/src/workers/MultiWorker.cpp
deleted file mode 100644
index 02eec378..00000000
--- a/src/workers/MultiWorker.cpp
+++ /dev/null
@@ -1,273 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include <thread>
-
-
-#include "crypto/CryptoNight_test.h"
-#include "common/log/Log.h"
-#include "workers/CpuThread.h"
-#include "workers/MultiWorker.h"
-#include "workers/Workers.h"
-
-
-template<size_t N>
-MultiWorker<N>::MultiWorker(Handle *handle)
-    : Worker(handle)
-{
-    m_memory = Mem::create(m_ctx, m_thread->algorithm(), N);
-}
-
-
-template<size_t N>
-MultiWorker<N>::~MultiWorker()
-{
-    Mem::release(m_ctx, N, m_memory);
-}
-
-
-template<size_t N>
-bool MultiWorker<N>::selfTest()
-{
-    using namespace xmrig;
-
-    if (m_thread->algorithm() == CRYPTONIGHT) {
-        const bool rc = verify(VARIANT_0,      test_output_v0)   &&
-                        verify(VARIANT_1,      test_output_v1)   &&
-                        verify(VARIANT_2,      test_output_v2)   &&
-                        verify(VARIANT_XTL,    test_output_xtl)  &&
-                        verify(VARIANT_MSR,    test_output_msr)  &&
-                        verify(VARIANT_XAO,    test_output_xao)  &&
-                        verify(VARIANT_RTO,    test_output_rto)  &&
-                        verify(VARIANT_HALF,   test_output_half) &&
-                        verify2(VARIANT_WOW,   test_output_wow)  &&
-                        verify2(VARIANT_4,     test_output_r)    &&
-                        verify(VARIANT_RWZ,    test_output_rwz)  &&
-                        verify(VARIANT_ZLS,    test_output_zls)  &&
-                        verify(VARIANT_DOUBLE, test_output_double);
-
-#       ifndef XMRIG_NO_CN_GPU
-        if (!rc || N > 1) {
-            return rc;
-        }
-
-        return verify(VARIANT_GPU, test_output_gpu);
-#       else
-        return rc;
-#       endif
-    }
-
-#   ifndef XMRIG_NO_AEON
-    if (m_thread->algorithm() == CRYPTONIGHT_LITE) {
-        return verify(VARIANT_0,    test_output_v0_lite) &&
-               verify(VARIANT_1,    test_output_v1_lite);
-    }
-#   endif
-
-#   ifndef XMRIG_NO_SUMO
-    if (m_thread->algorithm() == CRYPTONIGHT_HEAVY) {
-        return verify(VARIANT_0,    test_output_v0_heavy)  &&
-               verify(VARIANT_XHV,  test_output_xhv_heavy) &&
-               verify(VARIANT_TUBE, test_output_tube_heavy);
-    }
-#   endif
-
-#   ifndef XMRIG_NO_CN_PICO
-    if (m_thread->algorithm() == CRYPTONIGHT_PICO) {
-        return verify(VARIANT_TRTL, test_output_pico_trtl);
-    }
-#   endif
-
-    return false;
-}
-
-
-template<size_t N>
-void MultiWorker<N>::start()
-{
-    while (Workers::sequence() > 0) {
-        if (Workers::isPaused()) {
-            do {
-                std::this_thread::sleep_for(std::chrono::milliseconds(200));
-            }
-            while (Workers::isPaused());
-
-            if (Workers::sequence() == 0) {
-                break;
-            }
-
-            consumeJob();
-        }
-
-        while (!Workers::isOutdated(m_sequence)) {
-            if ((m_count & 0x7) == 0) {
-                storeStats();
-            }
-
-            m_thread->fn(m_state.job.algorithm().variant())(m_state.blob, m_state.job.size(), m_hash, m_ctx, m_state.job.height());
-
-            for (size_t i = 0; i < N; ++i) {
-                if (*reinterpret_cast<uint64_t*>(m_hash + (i * 32) + 24) < m_state.job.target()) {
-                    Workers::submit(xmrig::JobResult(m_state.job.poolId(), m_state.job.id(), m_state.job.clientId(), *nonce(i), m_hash + (i * 32), m_state.job.diff(), m_state.job.algorithm()));
-                }
-
-                *nonce(i) += 1;
-            }
-
-            m_count += N;
-
-            std::this_thread::yield();
-        }
-
-        consumeJob();
-    }
-}
-
-
-template<size_t N>
-bool MultiWorker<N>::resume(const xmrig::Job &job)
-{
-    if (m_state.job.poolId() == -1 && job.poolId() >= 0 && job.id() == m_pausedState.job.id()) {
-        m_state = m_pausedState;
-        return true;
-    }
-
-    return false;
-}
-
-
-template<size_t N>
-bool MultiWorker<N>::verify(xmrig::Variant variant, const uint8_t *referenceValue)
-{
-
-    xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant);
-    if (!func) {
-        return false;
-    }
-
-    func(test_input, 76, m_hash, m_ctx, 0);
-    return memcmp(m_hash, referenceValue, sizeof m_hash) == 0;
-}
-
-
-template<size_t N>
-bool MultiWorker<N>::verify2(xmrig::Variant variant, const uint8_t *referenceValue)
-{
-    xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant);
-    if (!func) {
-        return false;
-    }
-
-    for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
-        const size_t size = cn_r_test_input[i].size;
-        for (size_t k = 0; k < N; ++k) {
-            memcpy(m_state.blob + (k * size), cn_r_test_input[i].data, size);
-        }
-
-        func(m_state.blob, size, m_hash, m_ctx, cn_r_test_input[i].height);
-
-        for (size_t k = 0; k < N; ++k) {
-            if (memcmp(m_hash + k * 32, referenceValue + i * 32, sizeof m_hash / N) != 0) {
-                return false;
-            }
-        }
-    }
-
-    return true;
-}
-
-
-template<>
-bool MultiWorker<1>::verify2(xmrig::Variant variant, const uint8_t *referenceValue)
-{
-    xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant);
-    if (!func) {
-        return false;
-    }
-
-    for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
-        func(cn_r_test_input[i].data, cn_r_test_input[i].size, m_hash, m_ctx, cn_r_test_input[i].height);
-
-        if (memcmp(m_hash, referenceValue + i * 32, sizeof m_hash) != 0) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-
-template<size_t N>
-void MultiWorker<N>::consumeJob()
-{
-    xmrig::Job job = Workers::job();
-    m_sequence = Workers::sequence();
-    if (m_state.job == job) {
-        return;
-    }
-
-    save(job);
-
-    if (resume(job)) {
-        return;
-    }
-
-    m_state.job = job;
-
-    const size_t size = m_state.job.size();
-    memcpy(m_state.blob, m_state.job.blob(), m_state.job.size());
-
-    if (N > 1) {
-        for (size_t i = 1; i < N; ++i) {
-            memcpy(m_state.blob + (i * size), m_state.blob, size);
-        }
-    }
-
-    for (size_t i = 0; i < N; ++i) {
-        if (m_state.job.isNicehash()) {
-            *nonce(i) = (*nonce(i) & 0xff000000U) + (0xffffffU / m_totalWays * (m_offset + i));
-        }
-        else {
-           *nonce(i) = 0xffffffffU / m_totalWays * (m_offset + i);
-        }
-    }
-}
-
-
-template<size_t N>
-void MultiWorker<N>::save(const xmrig::Job &job)
-{
-    if (job.poolId() == -1 && m_state.job.poolId() >= 0) {
-        m_pausedState = m_state;
-    }
-}
-
-
-template class MultiWorker<1>;
-template class MultiWorker<2>;
-template class MultiWorker<3>;
-template class MultiWorker<4>;
-template class MultiWorker<5>;
diff --git a/src/workers/MultiWorker.h b/src/workers/MultiWorker.h
deleted file mode 100644
index b7e4c8ca..00000000
--- a/src/workers/MultiWorker.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* XMRig
- * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
- * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
- * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
- * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
- * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
- * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XMRIG_MULTIWORKER_H
-#define XMRIG_MULTIWORKER_H
-
-
-#include "common/net/Job.h"
-#include "Mem.h"
-#include "net/JobResult.h"
-#include "workers/Worker.h"
-
-
-class Handle;
-
-
-template<size_t N>
-class MultiWorker : public Worker
-{
-public:
-    MultiWorker(Handle *handle);
-    ~MultiWorker();
-
-protected:
-    bool selfTest() override;
-    void start() override;
-
-private:
-    bool resume(const xmrig::Job &job);
-    bool verify(xmrig::Variant variant, const uint8_t *referenceValue);
-    bool verify2(xmrig::Variant variant, const uint8_t *referenceValue);
-    void consumeJob();
-    void save(const xmrig::Job &job);
-
-    inline uint32_t *nonce(size_t index)
-    {
-        return reinterpret_cast<uint32_t*>(m_state.blob + (index * m_state.job.size()) + 39);
-    }
-
-    struct State
-    {
-        alignas(16) uint8_t blob[xmrig::Job::kMaxBlobSize * N];
-        xmrig::Job job;
-    };
-
-
-    cryptonight_ctx *m_ctx[N];
-    State m_pausedState;
-    State m_state;
-    uint8_t m_hash[N * 32];
-};
-
-
-#endif /* XMRIG_MULTIWORKER_H */
diff --git a/src/workers/Worker.cpp b/src/workers/Worker.cpp
index c569908c..40cb338d 100644
--- a/src/workers/Worker.cpp
+++ b/src/workers/Worker.cpp
@@ -26,29 +26,25 @@
 
 #include "common/cpu/Cpu.h"
 #include "common/Platform.h"
-#include "workers/CpuThread.h"
+#include "core/HasherConfig.h"
 #include "workers/Handle.h"
 #include "workers/Worker.h"
+#include "workers/Workers.h"
 
 
-Worker::Worker(Handle *handle) :
-    m_id(handle->threadId()),
-    m_totalWays(handle->totalWays()),
-    m_offset(handle->offset()),
-    m_hashCount(0),
-    m_timestamp(0),
-    m_count(0),
-    m_sequence(0),
-    m_thread(static_cast<xmrig::CpuThread *>(handle->config()))
+Worker::Worker(Handle *handle, int workerIdx) :
+        m_id(workerIdx),
+        m_hashCount(0),
+        m_timestamp(0),
+        m_count(0),
+        m_sequence(0),
+        m_config(static_cast<xmrig::HasherConfig *>(handle->config())),
+        m_hasher(handle->hasher())
 {
-    if (xmrig::Cpu::info()->threads() > 1 && m_thread->affinity() != -1L) {
-        Platform::setThreadAffinity(m_thread->affinity());
-    }
-
-    Platform::setThreadPriority(m_thread->priority());
+    m_offset = handle->offset() + m_id;
+    m_hash = new uint8_t[m_hasher->parallelism(m_id) * 36];
 }
 
-
 void Worker::storeStats()
 {
     using namespace std::chrono;
@@ -57,3 +53,103 @@ void Worker::storeStats()
     m_hashCount.store(m_count, std::memory_order_relaxed);
     m_timestamp.store(timestamp, std::memory_order_relaxed);
 }
+
+bool Worker::selfTest()
+{
+    return true;
+}
+
+void Worker::start() {
+    if(m_hasher->type() == "CPU" && m_hasher->subType() == "CPU") {
+        if (xmrig::Cpu::info()->threads() > 1 && m_config->getCPUAffinity(m_id) != -1L) {
+            Platform::setThreadAffinity(m_config->getCPUAffinity(m_id));
+        }
+    }
+
+    Platform::setThreadPriority(m_config->priority());
+    int parallelism = m_hasher->parallelism(m_id);
+
+    while (Workers::sequence() > 0) {
+        if (Workers::isPaused()) {
+            do {
+                std::this_thread::sleep_for(std::chrono::milliseconds(200));
+            }
+            while (Workers::isPaused());
+
+            if (Workers::sequence() == 0) {
+                break;
+            }
+
+            consumeJob();
+        }
+
+        while (!Workers::isOutdated(m_sequence)) {
+            int hashCount = m_hasher->compute(m_id, m_state.blob, m_state.job.size(), m_hash);
+
+            if(hashCount == parallelism) {
+
+                for (size_t i = 0; i < parallelism; ++i) {
+                    if (*reinterpret_cast<uint64_t *>(m_hash + (i * 36) + 24) < m_state.job.target()) {
+                        Workers::submit(xmrig::JobResult(m_state.job.poolId(), m_state.job.id(), m_state.job.clientId(),
+                                                         *reinterpret_cast<uint32_t*>(m_hash + (i * 36) + 32), m_hash + (i * 36), m_state.job.diff(),
+                                                         m_state.job.algorithm()));
+                    }
+                }
+
+                m_count += parallelism;
+            }
+
+            storeStats();
+
+            std::this_thread::yield();
+        }
+
+        consumeJob();
+    }
+}
+
+bool Worker::consumeJob() {
+    xmrig::Job job = Workers::job();
+    m_sequence = Workers::sequence();
+    if (m_state.job == job) {
+        return false;
+    }
+
+    save(job);
+
+    if (resume(job)) {
+        return false;
+    }
+
+    m_state.job = job;
+
+    const size_t size = m_state.job.size();
+    memcpy(m_state.blob, m_state.job.blob(), size);
+
+    uint32_t *nonce = reinterpret_cast<uint32_t*>(m_state.blob + 39);
+    if (m_state.job.isNicehash()) {
+        *nonce = (*nonce & 0xff000000U) + (0xffffffU / Workers::totalThreads() * m_offset);
+    }
+    else {
+        *nonce = 0xffffffffU / Workers::totalThreads() * m_offset;
+    }
+
+    return true;
+}
+
+bool Worker::resume(const xmrig::Job &job)
+{
+    if (m_state.job.poolId() == -1 && job.poolId() >= 0 && job.id() == m_pausedState.job.id()) {
+        m_state = m_pausedState;
+        return true;
+    }
+
+    return false;
+}
+
+void Worker::save(const xmrig::Job &job)
+{
+    if (job.poolId() == -1 && m_state.job.poolId() >= 0) {
+        m_pausedState = m_state;
+    }
+}
diff --git a/src/workers/Worker.h b/src/workers/Worker.h
index 73e25033..c34029af 100644
--- a/src/workers/Worker.h
+++ b/src/workers/Worker.h
@@ -30,39 +30,54 @@
 
 
 #include "interfaces/IWorker.h"
-#include "Mem.h"
-
+#include "common/net/Job.h"
+#include "net/JobResult.h"
 
 class Handle;
 
 
 namespace xmrig {
-    class CpuThread;
+    class HasherConfig;
 }
 
 
 class Worker : public IWorker
 {
 public:
-    Worker(Handle *handle);
+    Worker(Handle *handle, int workerIdx);
 
-    inline const MemInfo &memory() const       { return m_memory; }
     inline size_t id() const override          { return m_id; }
     inline uint64_t hashCount() const override { return m_hashCount.load(std::memory_order_relaxed); }
     inline uint64_t timestamp() const override { return m_timestamp.load(std::memory_order_relaxed); }
+    inline size_t parallelism() const override { return m_hasher->parallelism(m_id); }
 
-protected:
+    bool selfTest() override;
+    void start() override;
+
+private:
     void storeStats();
+    bool consumeJob();
+
+    bool resume(const xmrig::Job &job);
+    void save(const xmrig::Job &job);
+
+    struct State
+    {
+        alignas(16) uint8_t blob[xmrig::Job::kMaxBlobSize];
+        xmrig::Job job;
+    };
 
     const size_t m_id;
-    const size_t m_totalWays;
-    const uint32_t m_offset;
-    MemInfo m_memory;
+    uint32_t m_offset;
     std::atomic<uint64_t> m_hashCount;
     std::atomic<uint64_t> m_timestamp;
+    Hasher *m_hasher;
     uint64_t m_count;
     uint64_t m_sequence;
-    xmrig::CpuThread *m_thread;
+    xmrig::HasherConfig *m_config;
+    State m_pausedState;
+    State m_state;
+    uint8_t *m_hash;
 };
 
 
diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp
index f718a52c..77382c9d 100644
--- a/src/workers/Workers.cpp
+++ b/src/workers/Workers.cpp
@@ -28,18 +28,17 @@
 
 
 #include "api/Api.h"
+#include "api/ApiRouter.h"
 #include "common/log/Log.h"
 #include "core/Config.h"
 #include "core/Controller.h"
-#include "crypto/CryptoNight_constants.h"
+#include "crypto/Argon2_constants.h"
 #include "interfaces/IJobResultListener.h"
-#include "interfaces/IThread.h"
-#include "Mem.h"
 #include "rapidjson/document.h"
 #include "workers/Handle.h"
 #include "workers/Hashrate.h"
-#include "workers/MultiWorker.h"
 #include "workers/Workers.h"
+#include "workers/Worker.h"
 
 
 bool Workers::m_active = false;
@@ -58,6 +57,7 @@ uv_mutex_t Workers::m_mutex;
 uv_rwlock_t Workers::m_rwlock;
 uv_timer_t Workers::m_timer;
 xmrig::Controller *Workers::m_controller = nullptr;
+std::atomic<int> Workers::m_totalThreads;
 
 
 xmrig::Job Workers::job()
@@ -70,26 +70,6 @@ xmrig::Job Workers::job()
 }
 
 
-size_t Workers::hugePages()
-{
-    uv_mutex_lock(&m_mutex);
-    const size_t hugePages = m_status.hugePages;
-    uv_mutex_unlock(&m_mutex);
-
-    return hugePages;
-}
-
-
-size_t Workers::threads()
-{
-    uv_mutex_lock(&m_mutex);
-    const size_t threads = m_status.threads;
-    uv_mutex_unlock(&m_mutex);
-
-    return threads;
-}
-
-
 void Workers::printHashrate(bool detail)
 {
     assert(m_controller != nullptr);
@@ -103,19 +83,23 @@ void Workers::printHashrate(bool detail)
         char num2[8] = { 0 };
         char num3[8] = { 0 };
 
-        Log::i()->text("%s| THREAD | AFFINITY | 10s H/s | 60s H/s | 15m H/s |", isColors ? "\x1B[1;37m" : "");
+        Log::i()->text("%s|  TYPE   |   ID  | 10s H/s | 60s H/s | 15m H/s |", isColors ? "\x1B[1;37m" : "");
 
         size_t i = 0;
-        for (const xmrig::IThread *thread : m_controller->config()->threads()) {
-             Log::i()->text("| %6zu | %8" PRId64 " | %7s | %7s | %7s |",
-                            thread->index(),
-                            thread->affinity(),
-                            Hashrate::format(m_hashrate->calc(thread->index(), Hashrate::ShortInterval),  num1, sizeof num1),
-                            Hashrate::format(m_hashrate->calc(thread->index(), Hashrate::MediumInterval), num2, sizeof num2),
-                            Hashrate::format(m_hashrate->calc(thread->index(), Hashrate::LargeInterval),  num3, sizeof num3)
-                            );
-
-             i++;
+        for (const Handle *worker : m_workers) {
+            for(int i = 0; i < worker->hasher()->deviceCount(); i++) {
+                Log::i()->text("| %7s | %s%-2d | %7s | %7s | %7s |",
+                               worker->hasher()->subType().c_str(),
+                               worker->hasher()->subType(true).c_str(),
+                               i,
+                               Hashrate::format(m_hashrate->calc(worker->hasherId(), i, Hashrate::ShortInterval), num1,
+                                                sizeof num1),
+                               Hashrate::format(m_hashrate->calc(worker->hasherId(), i, Hashrate::MediumInterval), num2,
+                                                sizeof num2),
+                               Hashrate::format(m_hashrate->calc(worker->hasherId(), i, Hashrate::LargeInterval), num3,
+                                                sizeof num3)
+                );
+            }
         }
     }
 
@@ -159,38 +143,22 @@ void Workers::setJob(const xmrig::Job &job, bool donate)
 }
 
 
-void Workers::start(xmrig::Controller *controller)
+bool Workers::start(xmrig::Controller *controller)
 {
-#   ifdef APP_DEBUG
-    LOG_NOTICE("THREADS ------------------------------------------------------------------");
-    for (const xmrig::IThread *thread : controller->config()->threads()) {
-        thread->print();
-    }
-    LOG_NOTICE("--------------------------------------------------------------------------");
-#   endif
-
-#   ifndef XMRIG_NO_ASM
-    xmrig::CpuThread::patchAsmVariants();
-#   endif
-
     m_controller = controller;
 
-    const std::vector<xmrig::IThread *> &threads = controller->config()->threads();
+    const std::vector<xmrig::HasherConfig *> &hashers = controller->config()->hasherConfigs();
     m_status.algo    = controller->config()->algorithm().algo();
+    m_status.variant = controller->config()->algorithm().variant();
     m_status.colors  = controller->config()->isColors();
-    m_status.threads = threads.size();
-
-    for (const xmrig::IThread *thread : threads) {
-       m_status.ways += thread->multiway();
-    }
-
-    m_hashrate = new Hashrate(threads.size(), controller);
+    m_status.hashers = hashers.size();
 
     uv_mutex_init(&m_mutex);
     uv_rwlock_init(&m_rwlock);
 
     m_sequence = 1;
     m_paused   = 1;
+    m_totalThreads = 0;
 
     uv_async_init(uv_default_loop(), &m_async, Workers::onResult);
     uv_timer_init(uv_default_loop(), &m_timer);
@@ -198,15 +166,29 @@ void Workers::start(xmrig::Controller *controller)
 
     uint32_t offset = 0;
 
-    for (xmrig::IThread *thread : threads) {
-        Handle *handle = new Handle(thread, offset, m_status.ways);
-        offset += thread->multiway();
+    for (xmrig::HasherConfig *hasherConfig : hashers) {
+        Handle *handle = new Handle(controller->config(), hasherConfig, offset);
+        if(handle->hasher() != nullptr) {
+            offset += handle->computingThreads();
+            m_totalThreads += handle->computingThreads();
 
-        m_workers.push_back(handle);
-        handle->start(Workers::onReady);
+            m_workers.push_back(handle);
+            handle->start(Workers::onReady);
+        }
     }
 
-    controller->save();
+    if(m_workers.size() > 0) {
+        Log::i()->text(m_status.colors ? GREEN_BOLD(" * Hashers initialization complete * ") : " * Hashers initialization complete * ");
+
+        m_hashrate = new Hashrate(m_workers, controller);
+
+        controller->save();
+    }
+    else {
+        return false;
+    }
+
+    return true;
 }
 
 
@@ -236,60 +218,49 @@ void Workers::submit(const xmrig::JobResult &result)
 
 
 #ifndef XMRIG_NO_API
-void Workers::threadsSummary(rapidjson::Document &doc)
+void Workers::hashersSummary(rapidjson::Document &doc)
 {
-    uv_mutex_lock(&m_mutex);
-    const uint64_t pages[2] = { m_status.hugePages, m_status.pages };
-    const uint64_t memory   = m_status.ways * xmrig::cn_select_memory(m_status.algo);
-    uv_mutex_unlock(&m_mutex);
-
     auto &allocator = doc.GetAllocator();
 
-    rapidjson::Value hugepages(rapidjson::kArrayType);
-    hugepages.PushBack(pages[0], allocator);
-    hugepages.PushBack(pages[1], allocator);
+    rapidjson::Value hashers(rapidjson::kArrayType);
 
-    doc.AddMember("hugepages", hugepages, allocator);
-    doc.AddMember("memory", memory, allocator);
+    for(int i = 0; i < m_workers.size(); i++) {
+        Handle *worker = m_workers[i];
+        for(int j=0; j < worker->hasher()->deviceCount(); j++) {
+            rapidjson::Value hasherDoc(rapidjson::kObjectType);
+
+            xmrig::String type = worker->hasher()->type().data();
+            xmrig::String id = (worker->hasher()->subType(true) + to_string(j)).data();
+
+            hasherDoc.AddMember("type",  type.toJSON(doc), allocator);
+            hasherDoc.AddMember("id",   id.toJSON(doc), allocator);
+
+            rapidjson::Value hashrateEntry(rapidjson::kArrayType);
+            hashrateEntry.PushBack(ApiRouter::normalize(m_hashrate->calc(i, j, Hashrate::ShortInterval)), allocator);
+            hashrateEntry.PushBack(ApiRouter::normalize(m_hashrate->calc(i, j, Hashrate::MediumInterval)), allocator);
+            hashrateEntry.PushBack(ApiRouter::normalize(m_hashrate->calc(i, j, Hashrate::LargeInterval)), allocator);
+
+            hasherDoc.AddMember("hashrate",   hashrateEntry, allocator);
+
+            hashers.PushBack(hasherDoc, allocator);
+        }
+    }
+
+    doc.AddMember("hashers", hashers, allocator);
 }
 #endif
 
 
 void Workers::onReady(void *arg)
 {
-    auto handle = static_cast<Handle*>(arg);
+    auto handleArg = static_cast<Handle::HandleArg*>(arg);
 
-    IWorker *worker = nullptr;
+    IWorker *worker = new Worker(handleArg->handle, handleArg->workerId);
 
-    switch (handle->config()->multiway()) {
-    case 1:
-        worker = new MultiWorker<1>(handle);
-        break;
-
-    case 2:
-        worker = new MultiWorker<2>(handle);
-        break;
-
-    case 3:
-        worker = new MultiWorker<3>(handle);
-        break;
-
-    case 4:
-        worker = new MultiWorker<4>(handle);
-        break;
-
-    case 5:
-        worker = new MultiWorker<5>(handle);
-        break;
-
-    default:
-        break;
-    }
-
-    handle->setWorker(worker);
+    handleArg->handle->addWorker(worker);
 
     if (!worker->selfTest()) {
-        LOG_ERR("thread %zu error: \"hash self-test failed\".", handle->worker()->id());
+        LOG_ERR("hasher %zu error: \"hash self-test failed\".", worker->id());
 
         return;
     }
@@ -319,12 +290,28 @@ void Workers::onResult(uv_async_t *handle)
 
 void Workers::onTick(uv_timer_t *handle)
 {
-    for (Handle *handle : m_workers) {
-        if (!handle->worker()) {
-            return;
-        }
+    for (int h =0; h < m_workers.size(); h++) {
+        Handle *handle = m_workers[h];
 
-        m_hashrate->add(handle->threadId(), handle->worker()->hashCount(), handle->worker()->timestamp());
+        std::vector<IWorker *> internalWorkers = handle->workers();
+        if (internalWorkers.size() == 0)
+            return;
+
+        int deviceCount = handle->hasher()->deviceCount();
+        int computingThreads = internalWorkers.size();
+        int multiplier = computingThreads / deviceCount;
+
+        for(int i = 0; i < deviceCount; i++) {
+            uint64_t hashCount = 0;
+            uint64_t timeStamp = 0;
+
+            for(int j = 0; j < multiplier; j++) {
+                hashCount += internalWorkers[i * multiplier + j]->hashCount();
+                timeStamp = max(timeStamp, internalWorkers[i * multiplier + j]->timestamp());
+            }
+
+            m_hashrate->add(h, i, hashCount, timeStamp);
+        }
     }
 
     if ((m_ticks++ & 0xF) == 0)  {
@@ -339,23 +326,19 @@ void Workers::start(IWorker *worker)
 
     uv_mutex_lock(&m_mutex);
     m_status.started++;
-    m_status.pages     += w->memory().pages;
-    m_status.hugePages += w->memory().hugePages;
 
-    if (m_status.started == m_status.threads) {
-        const double percent = (double) m_status.hugePages / m_status.pages * 100.0;
-        const size_t memory  = m_status.ways * xmrig::cn_select_memory(m_status.algo) / 1024;
-
-        if (m_status.colors) {
-            LOG_INFO(GREEN_BOLD("READY (CPU)") " threads " CYAN_BOLD("%zu(%zu)") " huge pages %s%zu/%zu %1.0f%%\x1B[0m memory " CYAN_BOLD("%zu KB") "",
-                     m_status.threads, m_status.ways,
+    if (m_status.started == m_status.hashers) {
+/// TODO better status description
+/*        if (m_status.colors) {
+            LOG_INFO(GREEN_BOLD("READY (CPU)") " threads " CYAN_BOLD("%zu") " huge pages %s%zu/%zu %1.0f%%\x1B[0m memory " CYAN_BOLD("%.2f KB") "",
+                     m_status.hashers,
                      (m_status.hugePages == m_status.pages ? "\x1B[1;32m" : (m_status.hugePages == 0 ? "\x1B[1;31m" : "\x1B[1;33m")),
                      m_status.hugePages, m_status.pages, percent, memory);
         }
         else {
-            LOG_INFO("READY (CPU) threads %zu(%zu) huge pages %zu/%zu %1.0f%% memory %zu KB",
-                     m_status.threads, m_status.ways, m_status.hugePages, m_status.pages, percent, memory);
-        }
+            LOG_INFO("READY (CPU) threads %zu huge pages %zu/%zu %1.0f%% memory %zu KB",
+                     m_status.hashers, m_status.hugePages, m_status.pages, percent, memory);
+        } */
     }
 
     uv_mutex_unlock(&m_mutex);
diff --git a/src/workers/Workers.h b/src/workers/Workers.h
index a9b8e695..8c42c8b3 100644
--- a/src/workers/Workers.h
+++ b/src/workers/Workers.h
@@ -51,12 +51,10 @@ class Workers
 {
 public:
     static xmrig::Job job();
-    static size_t hugePages();
-    static size_t threads();
     static void printHashrate(bool detail);
     static void setEnabled(bool enabled);
     static void setJob(const xmrig::Job &job, bool donate);
-    static void start(xmrig::Controller *controller);
+    static bool start(xmrig::Controller *controller);
     static void stop();
     static void submit(const xmrig::JobResult &result);
 
@@ -67,9 +65,11 @@ public:
     static inline uint64_t sequence()                                   { return m_sequence.load(std::memory_order_relaxed); }
     static inline void pause()                                          { m_active = false; m_paused = 1; m_sequence++; }
     static inline void setListener(xmrig::IJobResultListener *listener) { m_listener = listener; }
+    static inline int totalThreads()                                    { return m_totalThreads.load(std::memory_order_relaxed); }
+    static inline std::vector<Handle *> workers()                       { return m_workers; }
 
 #   ifndef XMRIG_NO_API
-    static void threadsSummary(rapidjson::Document &doc);
+    static void hashersSummary(rapidjson::Document &doc);
 #   endif
 
 private:
@@ -82,22 +82,17 @@ private:
     {
     public:
         inline LaunchStatus() :
-            colors(true),
-            hugePages(0),
-            pages(0),
-            started(0),
-            threads(0),
-            ways(0),
-            algo(xmrig::CRYPTONIGHT)
+                colors(true),
+                started(0),
+                hashers(0),
+                algo(xmrig::ARGON2)
         {}
 
         bool colors;
-        size_t hugePages;
-        size_t pages;
         size_t started;
-        size_t threads;
-        size_t ways;
+        size_t hashers;
         xmrig::Algo algo;
+        xmrig::Variant variant;
     };
 
     static bool m_active;
@@ -110,6 +105,7 @@ private:
     static std::atomic<uint64_t> m_sequence;
     static std::list<xmrig::JobResult> m_queue;
     static std::vector<Handle*> m_workers;
+    static std::atomic<int> m_totalThreads;
     static uint64_t m_ticks;
     static uv_async_t m_async;
     static uv_mutex_t m_mutex;