diff --git a/CMakeLists.txt b/CMakeLists.txt index 14dcc931..1f328ccc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,22 +1,26 @@ cmake_minimum_required(VERSION 2.8) -project(xmrig) +project(ninjarig) option(WITH_LIBCPUID "Use Libcpuid" ON) -option(WITH_AEON "CryptoNight-Lite support" ON) -option(WITH_SUMO "CryptoNight-Heavy support" ON) -option(WITH_CN_PICO "CryptoNight-Pico support" ON) -option(WITH_CN_GPU "CryptoNight-GPU support" ON) option(WITH_HTTPD "HTTP REST API" ON) option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_TLS "Enable OpenSSL support" ON) -option(WITH_ASM "Enable ASM PoW implementations" ON) -option(BUILD_STATIC "Build static binary" OFF) -option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) +option(WITH_CUDA "Enable CUDA support" ON) +option(WITH_OPENCL "Enable OpenCL support" ON) include (CheckIncludeFile) include (cmake/cpu.cmake) +include (cmake/TargetArch.cmake) +target_architecture (ARCH) +MESSAGE( STATUS "Target architecture is: " ${ARCH} ) + +SET(CMAKE_SKIP_BUILD_RPATH FALSE) +SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) +SET(CMAKE_INSTALL_RPATH "./") +SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +set (CMAKE_MACOSX_RPATH 0) set(HEADERS src/api/NetworkState.h @@ -69,43 +73,23 @@ set(HEADERS src/core/ConfigLoader_default.h src/core/Controller.h src/interfaces/IJobResultListener.h - src/interfaces/IThread.h src/interfaces/IWorker.h - src/Mem.h src/net/JobResult.h src/net/Network.h src/net/strategies/DonateStrategy.h + src/net/strategies/Http.h src/Summary.h src/version.h - src/workers/CpuThread.h + src/core/HasherConfig.h src/workers/Handle.h src/workers/Hashrate.h - src/workers/MultiWorker.h src/workers/Worker.h src/workers/Workers.h ) set(HEADERS_CRYPTO - src/crypto/c_blake256.h - src/crypto/c_groestl.h - src/crypto/c_jh.h - src/crypto/c_skein.h - src/crypto/CryptoNight.h - src/crypto/CryptoNight_constants.h - src/crypto/CryptoNight_monero.h - src/crypto/CryptoNight_test.h - src/crypto/groestl_tables.h - src/crypto/hash.h - src/crypto/skein_port.h - src/crypto/soft_aes.h - src/crypto/asm/CryptonightR_template.h - ) - -if (XMRIG_ARM) - set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/CryptoNight_arm.h) -else() - set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/CryptoNight_x86.h) -endif() + src/crypto/Argon2_constants.h + ) set(SOURCES src/api/NetworkState.cpp @@ -138,25 +122,64 @@ set(SOURCES src/common/Platform.cpp src/core/Config.cpp src/core/Controller.cpp - src/Mem.cpp src/net/Network.cpp src/net/strategies/DonateStrategy.cpp + src/net/strategies/Http.cpp + src/net/strategies/http_parser/http_parser.c src/Summary.cpp - src/workers/CpuThread.cpp src/workers/Handle.cpp src/workers/Hashrate.cpp - src/workers/MultiWorker.cpp src/workers/Worker.cpp src/workers/Workers.cpp src/xmrig.cpp ) -set(SOURCES_CRYPTO - src/crypto/c_groestl.c - src/crypto/c_blake256.c - src/crypto/c_jh.c - src/crypto/c_skein.c - ) +set(HEADERS_COMMON + src/crypto/argon2_hasher/common/common.h + src/crypto/argon2_hasher/common/DLLExport.h + src/crypto/argon2_hasher/common/DLLImport.h + src/crypto/argon2_hasher/crypt/base64.h + src/crypto/argon2_hasher/crypt/hex.h + src/crypto/argon2_hasher/crypt/random_generator.h + src/crypto/argon2_hasher/crypt/sha512.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h + src/crypto/argon2_hasher/hash/argon2/Argon2.h + src/crypto/argon2_hasher/hash/argon2/Defs.h + src/crypto/argon2_hasher/hash/Hasher.h + ) + +set(SOURCES_COMMON + src/crypto/argon2_hasher/common/common.cpp + src/crypto/argon2_hasher/crypt/base64.cpp + src/crypto/argon2_hasher/crypt/hex.cpp + src/crypto/argon2_hasher/crypt/random_generator.cpp + src/crypto/argon2_hasher/crypt/sha512.cpp + src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c + src/crypto/argon2_hasher/hash/argon2/Argon2.cpp + src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c + src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c + src/crypto/argon2_hasher/hash/Hasher.cpp + src/core/HasherConfig.cpp) + +set(SOURCE_CPU_HASHER src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp src/crypto/argon2_hasher/hash/cpu/CpuHasher.h) + +set(SOURCE_OPENCL_HASHER src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h + src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h) + +set(SOURCE_CUDA_HASHER src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h + src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu) + +set(ARGON2_FILL_BLOCKS_SRC + src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c + src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h + src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h + src/crypto/argon2_hasher/hash/argon2/Defs.h + src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h) if (WIN32) set(SOURCES_OS @@ -164,8 +187,7 @@ if (WIN32) src/App_win.cpp src/base/io/Json_win.cpp src/common/Platform_win.cpp - src/Mem_win.cpp - ) + ) add_definitions(/DWIN32) set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv) @@ -174,15 +196,13 @@ elseif (APPLE) src/App_unix.cpp src/base/io/Json_unix.cpp src/common/Platform_mac.cpp - src/Mem_unix.cpp - ) + ) else() set(SOURCES_OS src/App_unix.cpp src/base/io/Json_unix.cpp src/common/Platform_unix.cpp - src/Mem_unix.cpp - ) + ) if (CMAKE_SYSTEM_NAME STREQUAL FreeBSD) set(EXTRA_LIBS kvm pthread) @@ -225,8 +245,6 @@ else() endif() include(cmake/OpenSSL.cmake) -include(cmake/asm.cmake) -include(cmake/cn-gpu.cmake) CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H) if (HAVE_SYSLOG_H) @@ -234,22 +252,6 @@ if (HAVE_SYSLOG_H) set(SOURCES_SYSLOG src/common/log/SysLog.h src/common/log/SysLog.cpp) endif() -if (NOT WITH_AEON) - add_definitions(/DXMRIG_NO_AEON) -endif() - -if (NOT WITH_SUMO) - add_definitions(/DXMRIG_NO_SUMO) -endif() - -if (NOT WITH_IPBC) - add_definitions(/DXMRIG_NO_IPBC) -endif() - -if (NOT WITH_CN_PICO) - add_definitions(/DXMRIG_NO_CN_PICO) -endif() - if (WITH_EMBEDDED_CONFIG) add_definitions(/DXMRIG_FEATURE_EMBEDDED_CONFIG) endif() @@ -284,14 +286,115 @@ endif() include_directories(src) include_directories(src/3rdparty) include_directories(${UV_INCLUDE_DIR}) +include_directories(src/crypto/argon2_hasher/hash/cpu/cpu_features/include) -if (BUILD_STATIC) - set(CMAKE_EXE_LINKER_FLAGS " -static") -endif() +add_subdirectory(src/crypto/argon2_hasher/hash/cpu/cpu_features) +set_property(TARGET cpu_features PROPERTY POSITION_INDEPENDENT_CODE ON) if (WITH_DEBUG_LOG) add_definitions(/DAPP_DEBUG) endif() -add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES}) -target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB}) +add_library(argon2_common SHARED ${HEADERS_COMMON} ${SOURCES_COMMON}) +target_link_libraries(argon2_common ${CMAKE_DL_LIBS}) + +add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES}) +target_link_libraries(${CMAKE_PROJECT_NAME} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB} argon2_common) + +add_library(cpu_hasher MODULE ${SOURCE_CPU_HASHER}) +set_target_properties(cpu_hasher + PROPERTIES + PREFIX "" + SUFFIX ".hsh" + LIBRARY_OUTPUT_DIRECTORY modules + ) +target_link_libraries(cpu_hasher argon2_common cpu_features) +add_dependencies(${CMAKE_PROJECT_NAME} cpu_hasher) + +add_library(argon2_fill_blocks_REF MODULE ${ARGON2_FILL_BLOCKS_SRC}) +set_target_properties(argon2_fill_blocks_REF + PROPERTIES + PREFIX "" + SUFFIX ".opt" + LIBRARY_OUTPUT_DIRECTORY modules + ) +target_compile_definitions(argon2_fill_blocks_REF PRIVATE BUILD_REF=1) +add_dependencies(cpu_hasher argon2_fill_blocks_REF) + +if(ARCH STREQUAL "x86_64") + add_library(argon2_fill_blocks_SSE2 MODULE ${ARGON2_FILL_BLOCKS_SRC}) + add_library(argon2_fill_blocks_SSSE3 MODULE ${ARGON2_FILL_BLOCKS_SRC}) + add_library(argon2_fill_blocks_AVX MODULE ${ARGON2_FILL_BLOCKS_SRC}) + add_library(argon2_fill_blocks_AVX2 MODULE ${ARGON2_FILL_BLOCKS_SRC}) + add_library(argon2_fill_blocks_AVX512F MODULE ${ARGON2_FILL_BLOCKS_SRC}) + set_target_properties(argon2_fill_blocks_SSE2 argon2_fill_blocks_SSSE3 argon2_fill_blocks_AVX argon2_fill_blocks_AVX2 argon2_fill_blocks_AVX512F + PROPERTIES + PREFIX "" + SUFFIX ".opt" + LIBRARY_OUTPUT_DIRECTORY modules + ) + target_compile_options(argon2_fill_blocks_SSE2 PRIVATE -msse2) + target_compile_options(argon2_fill_blocks_SSSE3 PRIVATE -mssse3) + target_compile_options(argon2_fill_blocks_AVX PRIVATE -mavx) + target_compile_options(argon2_fill_blocks_AVX2 PRIVATE -mavx2) + target_compile_options(argon2_fill_blocks_AVX512F PRIVATE -mavx512f) + add_dependencies(cpu_hasher argon2_fill_blocks_SSE2 argon2_fill_blocks_SSSE3 argon2_fill_blocks_AVX argon2_fill_blocks_AVX2 argon2_fill_blocks_AVX512F) +endif() + +if(ARCH STREQUAL "arm" OR ARCH STREQUAL "aarch64") + add_library(argon2_fill_blocks_NEON MODULE ${ARGON2_FILL_BLOCKS_SRC}) + set_target_properties(argon2_fill_blocks_NEON + PROPERTIES + PREFIX "" + SUFFIX ".opt" + LIBRARY_OUTPUT_DIRECTORY modules + ) + target_compile_options(common PRIVATE -D__NEON__) + if(ARCH STREQUAL "arm") + target_compile_options(argon2_fill_blocks_NEON PRIVATE -D__NEON__ -mfpu=neon -funsafe-math-optimizations) + else() + target_compile_options(argon2_fill_blocks_NEON PRIVATE -D__NEON__) + endif(ARCH STREQUAL "arm") + + add_dependencies(cpu_hasher argon2_fill_blocks_NEON) +endif(ARCH STREQUAL "arm" OR ARCH STREQUAL "aarch64") + +if(WITH_OPENCL) + add_definitions(-DWITH_OPENCL) + find_package(OpenCL REQUIRED) + include_directories(${OpenCL_INCLUDE_DIR}) + add_library(opencl_hasher MODULE ${SOURCE_OPENCL_HASHER}) + set_target_properties(opencl_hasher + PROPERTIES + PREFIX "" + SUFFIX ".hsh" + LIBRARY_OUTPUT_DIRECTORY modules + ) + target_link_libraries(opencl_hasher argon2_common ${OpenCL_LIBRARY}) + add_dependencies(${CMAKE_PROJECT_NAME} opencl_hasher) +endif() + +if(WITH_CUDA) + add_definitions(-DWITH_CUDA) + find_package(CUDA REQUIRED) + if(NOT WIN32) + add_definitions(-DPARALLEL_CUDA) + endif() + set( + CUDA_NVCC_FLAGS + ${CUDA_NVCC_FLAGS}; + -O3 -arch=compute_35 -std=c++11 + ) + cuda_add_library(cuda_hasher MODULE ${SOURCE_CUDA_HASHER}) + set_target_properties(cuda_hasher + PROPERTIES + PREFIX "" + SUFFIX ".hsh" + LIBRARY_OUTPUT_DIRECTORY modules + ) + target_link_libraries(cuda_hasher argon2_common) + add_dependencies(${CMAKE_PROJECT_NAME} cuda_hasher) +endif() + + + diff --git a/cmake/TargetArch.cmake b/cmake/TargetArch.cmake new file mode 100644 index 00000000..be66b82f --- /dev/null +++ b/cmake/TargetArch.cmake @@ -0,0 +1,116 @@ +# Based on the Qt 5 processor detection code, so should be very accurate +# https://qt.gitorious.org/qt/qtbase/blobs/master/src/corelib/global/qprocessordetection.h +# Currently handles arm (v5, v6, v7), x86 (32/64), ia64, and ppc (32/64) + +# Regarding POWER/PowerPC, just as is noted in the Qt source, +# "There are many more known variants/revisions that we do not handle/detect." + +set(archdetect_c_code " +#if defined(__arm__) || defined(__TARGET_ARCH_ARM) + #error cmake_ARCH arm +#elif defined(__aarch64__) + #error cmake_ARCH aarch64 +#elif defined(__i386) || defined(__i386__) || defined(_M_IX86) + #error cmake_ARCH i386 +#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) + #error cmake_ARCH x86_64 +#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) + #error cmake_ARCH ia64 +#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \\ + || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \\ + || defined(_M_MPPC) || defined(_M_PPC) + #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__) + #error cmake_ARCH ppc64 + #else + #error cmake_ARCH ppc + #endif +#endif + +#error cmake_ARCH unknown +") + +# Set ppc_support to TRUE before including this file or ppc and ppc64 +# will be treated as invalid architectures since they are no longer supported by Apple + +function(target_architecture output_var) + if(APPLE AND CMAKE_OSX_ARCHITECTURES) + # On OS X we use CMAKE_OSX_ARCHITECTURES *if* it was set + # First let's normalize the order of the values + + # Note that it's not possible to compile PowerPC applications if you are using + # the OS X SDK version 10.6 or later - you'll need 10.4/10.5 for that, so we + # disable it by default + # See this page for more information: + # http://stackoverflow.com/questions/5333490/how-can-we-restore-ppc-ppc64-as-well-as-full-10-4-10-5-sdk-support-to-xcode-4 + + # Architecture defaults to i386 or ppc on OS X 10.5 and earlier, depending on the CPU type detected at runtime. + # On OS X 10.6+ the default is x86_64 if the CPU supports it, i386 otherwise. + + foreach(osx_arch ${CMAKE_OSX_ARCHITECTURES}) + if("${osx_arch}" STREQUAL "ppc" AND ppc_support) + set(osx_arch_ppc TRUE) + elseif("${osx_arch}" STREQUAL "i386") + set(osx_arch_i386 TRUE) + elseif("${osx_arch}" STREQUAL "x86_64") + set(osx_arch_x86_64 TRUE) + elseif("${osx_arch}" STREQUAL "ppc64" AND ppc_support) + set(osx_arch_ppc64 TRUE) + else() + message(FATAL_ERROR "Invalid OS X arch name: ${osx_arch}") + endif() + endforeach() + + # Now add all the architectures in our normalized order + if(osx_arch_ppc) + list(APPEND ARCH ppc) + endif() + + if(osx_arch_i386) + list(APPEND ARCH i386) + endif() + + if(osx_arch_x86_64) + list(APPEND ARCH x86_64) + endif() + + if(osx_arch_ppc64) + list(APPEND ARCH ppc64) + endif() + else() + file(WRITE "${CMAKE_BINARY_DIR}/arch.c" "${archdetect_c_code}") + + enable_language(C) + + # Detect the architecture in a rather creative way... + # This compiles a small C program which is a series of ifdefs that selects a + # particular #error preprocessor directive whose message string contains the + # target architecture. The program will always fail to compile (both because + # file is not a valid C program, and obviously because of the presence of the + # #error preprocessor directives... but by exploiting the preprocessor in this + # way, we can detect the correct target architecture even when cross-compiling, + # since the program itself never needs to be run (only the compiler/preprocessor) + try_run( + run_result_unused + compile_result_unused + "${CMAKE_BINARY_DIR}" + "${CMAKE_BINARY_DIR}/arch.c" + COMPILE_OUTPUT_VARIABLE ARCH + CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + ) + + # Parse the architecture name from the compiler output + string(REGEX MATCH "cmake_ARCH ([a-zA-Z0-9_]+)" ARCH "${ARCH}") + + # Get rid of the value marker leaving just the architecture name + string(REPLACE "cmake_ARCH " "" ARCH "${ARCH}") + + # If we are compiling with an unknown architecture this variable should + # already be set to "unknown" but in the case that it's empty (i.e. due + # to a typo in the code), then set it to unknown + if (NOT ARCH) + set(ARCH unknown) + endif() + endif() + + set(${output_var} "${ARCH}" PARENT_SCOPE) +endfunction() diff --git a/cmake/asm.cmake b/cmake/asm.cmake deleted file mode 100644 index 389f6723..00000000 --- a/cmake/asm.cmake +++ /dev/null @@ -1,45 +0,0 @@ -if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) - set(XMRIG_ASM_LIBRARY "xmrig-asm") - - if (CMAKE_C_COMPILER_ID MATCHES MSVC) - enable_language(ASM_MASM) - - if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141) - set(XMRIG_ASM_FILES - "src/crypto/asm/cn_main_loop.asm" - "src/crypto/asm/CryptonightR_template.asm" - ) - else() - set(XMRIG_ASM_FILES - "src/crypto/asm/win64/cn_main_loop.asm" - "src/crypto/asm/win64/CryptonightR_template.asm" - ) - endif() - - set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY ASM_MASM) - else() - enable_language(ASM) - - if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU) - set(XMRIG_ASM_FILES - "src/crypto/asm/win64/cn_main_loop.S" - "src/crypto/asm/CryptonightR_template.S" - ) - else() - set(XMRIG_ASM_FILES - "src/crypto/asm/cn_main_loop.S" - "src/crypto/asm/CryptonightR_template.S" - ) - endif() - - set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY C) - endif() - - add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES}) - set(XMRIG_ASM_SOURCES src/crypto/Asm.h src/crypto/Asm.cpp src/crypto/CryptonightR_gen.cpp) - set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) -else() - set(XMRIG_ASM_SOURCES "") - set(XMRIG_ASM_LIBRARY "") - add_definitions(/DXMRIG_NO_ASM) -endif() diff --git a/cmake/cn-gpu.cmake b/cmake/cn-gpu.cmake deleted file mode 100644 index b529f0b2..00000000 --- a/cmake/cn-gpu.cmake +++ /dev/null @@ -1,23 +0,0 @@ -if (WITH_CN_GPU AND CMAKE_SIZEOF_VOID_P EQUAL 8) - - if (XMRIG_ARM) - set(CN_GPU_SOURCES src/crypto/cn_gpu_arm.cpp) - - if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang) - set_source_files_properties(src/crypto/cn_gpu_arm.cpp PROPERTIES COMPILE_FLAGS "-O3") - endif() - else() - set(CN_GPU_SOURCES src/crypto/cn_gpu_avx.cpp src/crypto/cn_gpu_ssse3.cpp) - - if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang) - set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "-O3 -mavx2") - set_source_files_properties(src/crypto/cn_gpu_ssse3.cpp PROPERTIES COMPILE_FLAGS "-O3") - elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) - set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") - endif() - endif() -else() - set(CN_GPU_SOURCES "") - - add_definitions(/DXMRIG_NO_CN_GPU) -endif() diff --git a/doc/ALGORITHMS.md b/doc/ALGORITHMS.md index 835a1d49..9b42ead1 100644 --- a/doc/ALGORITHMS.md +++ b/doc/ALGORITHMS.md @@ -1,17 +1,17 @@ # Algorithms -XMRig uses a different way to specify algorithms, compared to other miners. +NinjaRig uses a different way to specify algorithms, compared to other miners. Algorithm selection splitted to 2 parts: - * Global base algorithm per miner or proxy instance, `algo` option. Possible values: `cryptonight`, `cryptonight-lite`, `cryptonight-heavy`. + * Global base algorithm per miner or proxy instance, `algo` option. Possible values: `argon2id`. * Algorithm variant specified separately for each pool, `variant` option. * [Full table for supported algorithm and variants.](https://github.com/xmrig/xmrig-proxy/blob/master/doc/STRATUM_EXT.md#14-algorithm-names-and-variants) #### Example ```json { - "algo": "cryptonight", + "algo": "argon2id", ... "pools": [ { diff --git a/doc/api/1/config.json b/doc/api/1/config.json index 2c74cfba..560ff810 100644 --- a/doc/api/1/config.json +++ b/doc/api/1/config.json @@ -1,5 +1,5 @@ { - "algo": "cryptonight", + "algo": "chukwa", "api": { "port": 44444, "access-token": "TOKEN", @@ -19,16 +19,16 @@ "max-cpu-usage": 75, "pools": [ { - "url": "pool.monero.hashvault.pro:3333", - "user": "48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD", + "url": "publicnode.ydns.eu:4666", + "user": "WrkzZon3ZArBkZVqAH9n6MM2eq2tV6sN9GwqD73hTKuYAyhMYK48ukQPFQssEMXnFMFs3nwekTLiXa9obkxM6f1KA2i73gEcq8", "pass": "x", "keepalive": false, "nicehash": false, "variant": -1 }, { - "url": "pool.supportxmr.com:3333", - "user": "48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD", + "url": "testnet.wrkz.work:5555", + "user": "WrkzZon3ZArBkZVqAH9n6MM2eq2tV6sN9GwqD73hTKuYAyhMYK48ukQPFQssEMXnFMFs3nwekTLiXa9obkxM6f1KA2i73gEcq8", "pass": "x", "keepalive": false, "nicehash": false, diff --git a/doc/api/1/summary.json b/doc/api/1/summary.json index ed3cd128..95519d56 100644 --- a/doc/api/1/summary.json +++ b/doc/api/1/summary.json @@ -1,17 +1,16 @@ { "id": "92f3104f9a2ee78c", "worker_id": "Ubuntu-1604-xenial-64-minimal", - "version": "2.6.0-beta3", + "version": "1.0.0-alpha", "kind": "cpu", - "ua": "XMRig/2.6.0-beta3 (Linux x86_64) libuv/1.8.0 gcc/5.4.0", + "ua": "NinjaRig/1.0.0-alpha (Linux x86_64) libuv/1.8.0 gcc/5.4.0", "cpu": { "brand": "Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz", "aes": true, "x64": true, "sockets": 1 }, - "algo": "cryptonight", - "hugepages": true, + "algo": "chukwa", "donate_level": 5, "hashrate": { "total": [ @@ -64,7 +63,7 @@ "error_log": [] }, "connection": { - "pool": "pool.monero.hashvault.pro:3333", + "pool": "publicnode.ydns.eu:4666", "uptime": 953, "ping": 35, "failures": 0, diff --git a/doc/api/1/threads.json b/doc/api/1/threads.json index e536883d..5b302af6 100644 --- a/doc/api/1/threads.json +++ b/doc/api/1/threads.json @@ -1,14 +1,9 @@ { - "hugepages": [ - 4, - 4 - ], "memory": 8388608, "threads": [ { "type": "cpu", - "algo": "cryptonight", - "av": 1, + "algo": "chukwa", "low_power_mode": 1, "affine_to_cpu": 0, "priority": -1, @@ -21,7 +16,7 @@ }, { "type": "cpu", - "algo": "cryptonight", + "algo": "chukwa", "av": 1, "low_power_mode": 1, "affine_to_cpu": 1, @@ -35,7 +30,7 @@ }, { "type": "cpu", - "algo": "cryptonight", + "algo": "chukwa", "av": 1, "low_power_mode": 1, "affine_to_cpu": 2, @@ -49,7 +44,7 @@ }, { "type": "cpu", - "algo": "cryptonight", + "algo": "chukwa", "av": 1, "low_power_mode": 1, "affine_to_cpu": 3, diff --git a/res/app.rc b/res/app.rc index 037d842a..84a9e90d 100644 --- a/res/app.rc +++ b/res/app.rc @@ -24,7 +24,7 @@ VS_VERSION_INFO VERSIONINFO VALUE "FileDescription", APP_DESC VALUE "FileVersion", APP_VERSION VALUE "LegalCopyright", APP_COPYRIGHT - VALUE "OriginalFilename", "xmrig.exe" + VALUE "OriginalFilename", "ninjarig.exe" VALUE "ProductName", APP_NAME VALUE "ProductVersion", APP_VERSION END diff --git a/src/App.cpp b/src/App.cpp index e75766ac..0b69c884 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -27,23 +27,19 @@ #include #include - #include "api/Api.h" #include "App.h" #include "base/kernel/Signals.h" #include "common/Console.h" -#include "common/cpu/Cpu.h" #include "common/log/Log.h" #include "common/Platform.h" #include "core/Config.h" #include "core/Controller.h" -#include "crypto/CryptoNight.h" -#include "Mem.h" #include "net/Network.h" #include "Summary.h" -#include "version.h" #include "workers/Workers.h" - +#include +#include #ifndef XMRIG_NO_HTTPD # include "common/api/Httpd.h" @@ -55,6 +51,8 @@ xmrig::App::App(Process *process) : m_httpd(nullptr), m_signals(nullptr) { + srand(time(NULL)); + m_controller = new Controller(process); if (m_controller->init() != 0) { return; @@ -63,6 +61,8 @@ xmrig::App::App(Process *process) : if (!m_controller->config()->isBackground()) { m_console = new Console(this); } + + process->location(Process::ExeLocation, m_appFileName); } @@ -90,7 +90,8 @@ int xmrig::App::exec() background(); - Mem::init(m_controller->config()->isHugePages()); + // load hasher modules + Hasher::loadHashers(m_appFileName); Summary::print(m_controller); @@ -115,7 +116,8 @@ int xmrig::App::exec() m_httpd->start(); # endif - Workers::start(m_controller); + if(!Workers::start(m_controller)) + return 0; m_controller->network()->connect(); diff --git a/src/App.h b/src/App.h index fc944967..b1e9d8a3 100644 --- a/src/App.h +++ b/src/App.h @@ -64,6 +64,7 @@ private: Controller *m_controller; Httpd *m_httpd; Signals *m_signals; + char m_appFileName[512]; }; diff --git a/src/Mem.cpp b/src/Mem.cpp deleted file mode 100644 index 01a2157b..00000000 --- a/src/Mem.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include "common/utils/mm_malloc.h" -#include "crypto/CryptoNight.h" -#include "crypto/CryptoNight_constants.h" -#include "Mem.h" - - -bool Mem::m_enabled = true; -int Mem::m_flags = 0; - - -MemInfo Mem::create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count) -{ - using namespace xmrig; - - MemInfo info; - info.size = cn_select_memory(algorithm) * count; - - constexpr const size_t align_size = 2 * 1024 * 1024; - info.size = ((info.size + align_size - 1) / align_size) * align_size; - info.pages = info.size / align_size; - - allocate(info, m_enabled); - - for (size_t i = 0; i < count; ++i) { - cryptonight_ctx *c = static_cast(_mm_malloc(sizeof(cryptonight_ctx), 4096)); - c->memory = info.memory + (i * cn_select_memory(algorithm)); - - uint8_t* p = reinterpret_cast(allocateExecutableMemory(0x4000)); - c->generated_code = reinterpret_cast(p); - c->generated_code_double = reinterpret_cast(p + 0x2000); - - c->generated_code_data.variant = xmrig::VARIANT_MAX; - c->generated_code_data.height = (uint64_t)(-1); - c->generated_code_double_data = c->generated_code_data; - - ctx[i] = c; - } - - return info; -} - - -void Mem::release(cryptonight_ctx **ctx, size_t count, MemInfo &info) -{ - release(info); - - for (size_t i = 0; i < count; ++i) { - _mm_free(ctx[i]); - } -} - diff --git a/src/Mem.h b/src/Mem.h deleted file mode 100644 index 9e39e963..00000000 --- a/src/Mem.h +++ /dev/null @@ -1,78 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_MEM_H -#define XMRIG_MEM_H - - -#include -#include - - -#include "common/xmrig.h" - - -struct cryptonight_ctx; - - -struct MemInfo -{ - alignas(16) uint8_t *memory; - - size_t hugePages; - size_t pages; - size_t size; -}; - - -class Mem -{ -public: - enum Flags { - HugepagesAvailable = 1, - HugepagesEnabled = 2, - Lock = 4 - }; - - static MemInfo create(cryptonight_ctx **ctx, xmrig::Algo algorithm, size_t count); - static void init(bool enabled); - static void release(cryptonight_ctx **ctx, size_t count, MemInfo &info); - - static void *allocateExecutableMemory(size_t size); - static void protectExecutableMemory(void *p, size_t size); - static void flushInstructionCache(void *p, size_t size); - - static inline bool isHugepagesAvailable() { return (m_flags & HugepagesAvailable) != 0; } - -private: - static void allocate(MemInfo &info, bool enabled); - static void release(MemInfo &info); - - static int m_flags; - static bool m_enabled; -}; - - -#endif /* XMRIG_MEM_H */ diff --git a/src/Mem_unix.cpp b/src/Mem_unix.cpp deleted file mode 100644 index 833c200c..00000000 --- a/src/Mem_unix.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include -#include - - -#include "common/log/Log.h" -#include "common/utils/mm_malloc.h" -#include "common/xmrig.h" -#include "crypto/CryptoNight.h" -#include "Mem.h" - - -void Mem::init(bool enabled) -{ - m_enabled = enabled; -} - - -void Mem::allocate(MemInfo &info, bool enabled) -{ - info.hugePages = 0; - - if (!enabled) { - info.memory = static_cast(_mm_malloc(info.size, 4096)); - - return; - } - -# if defined(__APPLE__) - info.memory = static_cast(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0)); -# elif defined(__FreeBSD__) - info.memory = static_cast(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0)); -# else - info.memory = static_cast(mmap(0, info.size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0)); -# endif - - if (info.memory == MAP_FAILED) { - return allocate(info, false);; - } - - info.hugePages = info.pages; - - if (madvise(info.memory, info.size, MADV_RANDOM | MADV_WILLNEED) != 0) { - LOG_ERR("madvise failed"); - } - - if (mlock(info.memory, info.size) == 0) { - m_flags |= Lock; - } -} - - -void Mem::release(MemInfo &info) -{ - if (info.hugePages) { - if (m_flags & Lock) { - munlock(info.memory, info.size); - } - - munmap(info.memory, info.size); - } - else { - _mm_free(info.memory); - } -} - - -void *Mem::allocateExecutableMemory(size_t size) -{ -# if defined(__APPLE__) - return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); -# else - return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -# endif -} - - -void Mem::protectExecutableMemory(void *p, size_t size) -{ - mprotect(p, size, PROT_READ | PROT_EXEC); -} - - -void Mem::flushInstructionCache(void *p, size_t size) -{ -# ifndef __FreeBSD__ - __builtin___clear_cache(reinterpret_cast(p), reinterpret_cast(p) + size); -# endif -} diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp deleted file mode 100644 index 27c1348b..00000000 --- a/src/Mem_win.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include -#include -#include -#include - - -#include "common/log/Log.h" -#include "common/utils/mm_malloc.h" -#include "common/xmrig.h" -#include "crypto/CryptoNight.h" -#include "crypto/CryptoNight_constants.h" -#include "Mem.h" - - -/***************************************************************** -SetLockPagesPrivilege: a function to obtain or -release the privilege of locking physical pages. - -Inputs: - -HANDLE hProcess: Handle for the process for which the -privilege is needed - -BOOL bEnable: Enable (TRUE) or disable? - -Return value: TRUE indicates success, FALSE failure. - -*****************************************************************/ -/** - * AWE Example: https://msdn.microsoft.com/en-us/library/windows/desktop/aa366531(v=vs.85).aspx - * Creating a File Mapping Using Large Pages: https://msdn.microsoft.com/en-us/library/aa366543(VS.85).aspx - */ -static BOOL SetLockPagesPrivilege() { - HANDLE token; - - if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token) != TRUE) { - return FALSE; - } - - TOKEN_PRIVILEGES tp; - tp.PrivilegeCount = 1; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - - if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &(tp.Privileges[0].Luid)) != TRUE) { - return FALSE; - } - - BOOL rc = AdjustTokenPrivileges(token, FALSE, (PTOKEN_PRIVILEGES) &tp, 0, NULL, NULL); - if (rc != TRUE || GetLastError() != ERROR_SUCCESS) { - return FALSE; - } - - CloseHandle(token); - - return TRUE; -} - - -static LSA_UNICODE_STRING StringToLsaUnicodeString(LPCTSTR string) { - LSA_UNICODE_STRING lsaString; - - DWORD dwLen = (DWORD) wcslen(string); - lsaString.Buffer = (LPWSTR) string; - lsaString.Length = (USHORT)((dwLen) * sizeof(WCHAR)); - lsaString.MaximumLength = (USHORT)((dwLen + 1) * sizeof(WCHAR)); - return lsaString; -} - - -static BOOL ObtainLockPagesPrivilege() { - HANDLE token; - PTOKEN_USER user = NULL; - - if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token) == TRUE) { - DWORD size = 0; - - GetTokenInformation(token, TokenUser, NULL, 0, &size); - if (size) { - user = (PTOKEN_USER) LocalAlloc(LPTR, size); - } - - GetTokenInformation(token, TokenUser, user, size, &size); - CloseHandle(token); - } - - if (!user) { - return FALSE; - } - - LSA_HANDLE handle; - LSA_OBJECT_ATTRIBUTES attributes; - ZeroMemory(&attributes, sizeof(attributes)); - - BOOL result = FALSE; - if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0) { - LSA_UNICODE_STRING str = StringToLsaUnicodeString(_T(SE_LOCK_MEMORY_NAME)); - - if (LsaAddAccountRights(handle, user->User.Sid, &str, 1) == 0) { - LOG_NOTICE("Huge pages support was successfully enabled, but reboot required to use it"); - result = TRUE; - } - - LsaClose(handle); - } - - LocalFree(user); - return result; -} - - -static BOOL TrySetLockPagesPrivilege() { - if (SetLockPagesPrivilege()) { - return TRUE; - } - - return ObtainLockPagesPrivilege() && SetLockPagesPrivilege(); -} - - -void Mem::init(bool enabled) -{ - m_enabled = enabled; - - if (enabled && TrySetLockPagesPrivilege()) { - m_flags |= HugepagesAvailable; - } -} - - -void Mem::allocate(MemInfo &info, bool enabled) -{ - info.hugePages = 0; - - if (!enabled) { - info.memory = static_cast(_mm_malloc(info.size, 4096)); - - return; - } - - info.memory = static_cast(VirtualAlloc(nullptr, info.size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE)); - if (info.memory) { - info.hugePages = info.pages; - - return; - } - - allocate(info, false); -} - - -void Mem::release(MemInfo &info) -{ - if (info.hugePages) { - VirtualFree(info.memory, 0, MEM_RELEASE); - } - else { - _mm_free(info.memory); - } -} - - -void *Mem::allocateExecutableMemory(size_t size) -{ - return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); -} - - -void Mem::protectExecutableMemory(void *p, size_t size) -{ - DWORD oldProtect; - VirtualProtect(p, size, PAGE_EXECUTE_READ, &oldProtect); -} - - -void Mem::flushInstructionCache(void *p, size_t size) -{ - ::FlushInstructionCache(GetCurrentProcess(), p, size); -} diff --git a/src/Summary.cpp b/src/Summary.cpp index 60a9278f..f9e80d1b 100644 --- a/src/Summary.cpp +++ b/src/Summary.cpp @@ -33,115 +33,9 @@ #include "common/log/Log.h" #include "core/Config.h" #include "core/Controller.h" -#include "crypto/Asm.h" -#include "Mem.h" #include "Summary.h" #include "version.h" - -#ifndef XMRIG_NO_ASM -static const char *coloredAsmNames[] = { - "\x1B[1;31mnone\x1B[0m", - "auto", - "\x1B[1;32mintel\x1B[0m", - "\x1B[1;32mryzen\x1B[0m", - "\x1B[1;32mbulldozer\x1B[0m" -}; - - -inline static const char *asmName(xmrig::Assembly assembly, bool colors) -{ - return colors ? coloredAsmNames[assembly] : xmrig::Asm::toString(assembly); -} -#endif - - -static void print_memory(xmrig::Config *config) { -# ifdef _WIN32 - if (config->isColors()) { - Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", - "HUGE PAGES", Mem::isHugepagesAvailable() ? "\x1B[1;32mavailable" : "\x1B[01;31munavailable"); - } - else { - Log::i()->text(" * %-13s%s", "HUGE PAGES", Mem::isHugepagesAvailable() ? "available" : "unavailable"); - } -# endif -} - - -static void print_cpu(xmrig::Config *config) -{ - using namespace xmrig; - - if (config->isColors()) { - Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%d)") " %sx64 %sAES %sAVX2", - "CPU", - Cpu::info()->brand(), - Cpu::info()->sockets(), - Cpu::info()->isX64() ? "\x1B[1;32m" : "\x1B[1;31m-", - Cpu::info()->hasAES() ? "\x1B[1;32m" : "\x1B[1;31m-", - Cpu::info()->hasAVX2() ? "\x1B[1;32m" : "\x1B[1;31m-"); -# ifndef XMRIG_NO_LIBCPUID - Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%.1f MB/%.1f MB"), "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0); -# endif - } - else { - Log::i()->text(" * %-13s%s (%d) %sx64 %sAES %sAVX2", - "CPU", - Cpu::info()->brand(), - Cpu::info()->sockets(), - Cpu::info()->isX64() ? "" : "-", - Cpu::info()->hasAES() ? "" : "-", - Cpu::info()->hasAVX2() ? "" : "-"); -# ifndef XMRIG_NO_LIBCPUID - Log::i()->text(" * %-13s%.1f MB/%.1f MB", "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0); -# endif - } -} - - -static void print_threads(xmrig::Config *config) -{ - if (config->threadsMode() != xmrig::Config::Advanced) { - char buf[32] = { 0 }; - if (config->affinity() != -1L) { - snprintf(buf, sizeof buf, ", affinity=0x%" PRIX64, config->affinity()); - } - - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("%d") WHITE_BOLD(", %s, av=%d, %sdonate=%d%%") WHITE_BOLD("%s") - : " * %-13s%d, %s, av=%d, %sdonate=%d%%%s", - "THREADS", - config->threadsCount(), - config->algorithm().name(), - config->algoVariant(), - config->isColors() && config->donateLevel() == 0 ? "\x1B[1;31m" : "", - config->donateLevel(), - buf); - } - else { - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("%d") WHITE_BOLD(", %s, %sdonate=%d%%") - : " * %-13s%d, %s, %sdonate=%d%%", - "THREADS", - config->threadsCount(), - config->algorithm().name(), - config->isColors() && config->donateLevel() == 0 ? "\x1B[1;31m" : "", - config->donateLevel()); - } - -# ifndef XMRIG_NO_ASM - if (config->assembly() == xmrig::ASM_AUTO) { - const xmrig::Assembly assembly = xmrig::Cpu::info()->assembly(); - - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13sauto:%s") - : " * %-13sauto:%s", "ASSEMBLY", asmName(assembly, config->isColors())); - } - else { - Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s") : " * %-13s%s", "ASSEMBLY", asmName(config->assembly(), config->isColors())); - } -# endif -} - - static void print_commands(xmrig::Config *config) { if (config->isColors()) { @@ -154,16 +48,24 @@ static void print_commands(xmrig::Config *config) } } +static void print_donate(xmrig::Config *config) +{ + if (config->isColors()) { + Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("DONATE ") MAGENTA_BOLD("%d%%") WHITE_BOLD(" (change with --donate-level option)"), config->donateLevel()); + } + else { + Log::i()->text(" * DONATE %d%% (change with --donate-level option)", config->donateLevel()); + } +} void Summary::print(xmrig::Controller *controller) { controller->config()->printVersions(); - print_memory(controller->config()); - print_cpu(controller->config()); - print_threads(controller->config()); controller->config()->printPools(); controller->config()->printAPI(); + print_donate(controller->config()); + print_commands(controller->config()); } diff --git a/src/api/ApiRouter.cpp b/src/api/ApiRouter.cpp index beee8fd3..6cdd5f26 100644 --- a/src/api/ApiRouter.cpp +++ b/src/api/ApiRouter.cpp @@ -42,20 +42,20 @@ #include "common/Platform.h" #include "core/Config.h" #include "core/Controller.h" -#include "interfaces/IThread.h" #include "rapidjson/document.h" #include "rapidjson/prettywriter.h" #include "rapidjson/stringbuffer.h" #include "version.h" #include "workers/Hashrate.h" #include "workers/Workers.h" +#include "workers/Handle.h" -static inline rapidjson::Value normalize(double d) +rapidjson::Value ApiRouter::normalize(double d) { using namespace rapidjson; - if (!isnormal(d)) { + if (!std::isnormal(d)) { return Value(kNullType); } @@ -216,13 +216,16 @@ void ApiRouter::getHashrate(rapidjson::Document &doc) const total.PushBack(normalize(hr->calc(Hashrate::MediumInterval)), allocator); total.PushBack(normalize(hr->calc(Hashrate::LargeInterval)), allocator); - for (size_t i = 0; i < Workers::threads(); i++) { - rapidjson::Value thread(rapidjson::kArrayType); - thread.PushBack(normalize(hr->calc(i, Hashrate::ShortInterval)), allocator); - thread.PushBack(normalize(hr->calc(i, Hashrate::MediumInterval)), allocator); - thread.PushBack(normalize(hr->calc(i, Hashrate::LargeInterval)), allocator); + vector workers = Workers::workers(); + for (size_t i = 0; i < workers.size(); i++) { + for(size_t j = 0; j < workers[i]->hasher()->deviceCount(); j++) { + rapidjson::Value thread(rapidjson::kArrayType); + thread.PushBack(normalize(hr->calc(i, j, Hashrate::ShortInterval)), allocator); + thread.PushBack(normalize(hr->calc(i, j, Hashrate::MediumInterval)), allocator); + thread.PushBack(normalize(hr->calc(i, j, Hashrate::LargeInterval)), allocator); - threads.PushBack(thread, allocator); + threads.PushBack(thread, allocator); + } } hashrate.AddMember("total", total, allocator); @@ -244,18 +247,10 @@ void ApiRouter::getMiner(rapidjson::Document &doc) const using namespace xmrig; auto &allocator = doc.GetAllocator(); - rapidjson::Value cpu(rapidjson::kObjectType); - cpu.AddMember("brand", rapidjson::StringRef(Cpu::info()->brand()), allocator); - cpu.AddMember("aes", Cpu::info()->hasAES(), allocator); - cpu.AddMember("x64", Cpu::info()->isX64(), allocator); - cpu.AddMember("sockets", Cpu::info()->sockets(), allocator); - doc.AddMember("version", APP_VERSION, allocator); doc.AddMember("kind", APP_KIND, allocator); doc.AddMember("ua", rapidjson::StringRef(Platform::userAgent()), allocator); - doc.AddMember("cpu", cpu, allocator); doc.AddMember("algo", rapidjson::StringRef(m_controller->config()->algorithm().name()), allocator); - doc.AddMember("hugepages", Workers::hugePages() > 0, allocator); doc.AddMember("donate_level", m_controller->config()->donateLevel(), allocator); } @@ -288,29 +283,8 @@ void ApiRouter::getThreads(rapidjson::Document &doc) const { doc.SetObject(); auto &allocator = doc.GetAllocator(); - const Hashrate *hr = Workers::hashrate(); - Workers::threadsSummary(doc); - - const std::vector &threads = m_controller->config()->threads(); - rapidjson::Value list(rapidjson::kArrayType); - - size_t i = 0; - for (const xmrig::IThread *thread : threads) { - rapidjson::Value value = thread->toAPI(doc); - - rapidjson::Value hashrate(rapidjson::kArrayType); - hashrate.PushBack(normalize(hr->calc(i, Hashrate::ShortInterval)), allocator); - hashrate.PushBack(normalize(hr->calc(i, Hashrate::MediumInterval)), allocator); - hashrate.PushBack(normalize(hr->calc(i, Hashrate::LargeInterval)), allocator); - - i++; - - value.AddMember("hashrate", hashrate, allocator); - list.PushBack(value, allocator); - } - - doc.AddMember("threads", list, allocator); + Workers::hashersSummary(doc); } diff --git a/src/api/ApiRouter.h b/src/api/ApiRouter.h index a92173ce..61b35f7d 100644 --- a/src/api/ApiRouter.h +++ b/src/api/ApiRouter.h @@ -52,6 +52,8 @@ public: void tick(const xmrig::NetworkState &results); + static rapidjson::Value normalize(double d); + protected: void onConfigChanged(xmrig::Config *config, xmrig::Config *previousConfig) override; diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index 9d4f2bde..fa442904 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -290,21 +290,7 @@ rapidjson::Value xmrig::Pool::toJSON(rapidjson::Document &doc) const obj.AddMember(StringRef(kKeepalive), m_keepAlive, allocator); } - switch (m_algorithm.variant()) { - case VARIANT_AUTO: - case VARIANT_0: - case VARIANT_1: - obj.AddMember(StringRef(kVariant), m_algorithm.variant(), allocator); - break; - - case VARIANT_2: - obj.AddMember(StringRef(kVariant), 2, allocator); - break; - - default: - obj.AddMember(StringRef(kVariant), StringRef(m_algorithm.variantName()), allocator); - break; - } + obj.AddMember(StringRef(kVariant), StringRef(m_algorithm.variantName()), allocator); obj.AddMember(StringRef(kEnabled), m_enabled, allocator); obj.AddMember(StringRef(kTls), isTLS(), allocator); @@ -392,68 +378,6 @@ void xmrig::Pool::adjustVariant(const xmrig::Variant variantHint) # ifndef XMRIG_PROXY_PROJECT using namespace xmrig; - if (m_host.contains(".nicehash.com")) { - m_keepAlive = false; - m_nicehash = true; - bool valid = true; - - switch (m_port) { - case 3355: - case 33355: - valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonight."); - m_algorithm.setVariant(VARIANT_0); - break; - - case 3363: - case 33363: - valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonightv7."); - m_algorithm.setVariant(VARIANT_1); - break; - - case 3364: - valid = m_algorithm.algo() == CRYPTONIGHT_HEAVY && m_host.contains("cryptonightheavy."); - m_algorithm.setVariant(VARIANT_0); - break; - - case 3367: - case 33367: - valid = m_algorithm.algo() == CRYPTONIGHT && m_host.contains("cryptonightv8."); - m_algorithm.setVariant(VARIANT_2); - break; - - default: - break; - } - - if (!valid) { - m_algorithm.setAlgo(INVALID_ALGO); - } - - m_tls = m_port > 33000; - return; - } - - if (m_host.contains(".minergate.com")) { - m_keepAlive = false; - bool valid = true; - m_algorithm.setVariant(VARIANT_1); - - if (m_host.contains("xmr.pool.")) { - valid = m_algorithm.algo() == CRYPTONIGHT; - m_algorithm.setVariant(m_port == 45700 ? VARIANT_AUTO : VARIANT_0); - } - else if (m_host.contains("aeon.pool.") && m_port == 45690) { - valid = m_algorithm.algo() == CRYPTONIGHT_LITE; - m_algorithm.setVariant(VARIANT_1); - } - - if (!valid) { - m_algorithm.setAlgo(INVALID_ALGO); - } - - return; - } - if (variantHint != VARIANT_AUTO) { m_algorithm.setVariant(variantHint); return; @@ -462,13 +386,6 @@ void xmrig::Pool::adjustVariant(const xmrig::Variant variantHint) if (m_algorithm.variant() != VARIANT_AUTO) { return; } - - if (m_algorithm.algo() == CRYPTONIGHT_HEAVY) { - m_algorithm.setVariant(VARIANT_0); - } - else if (m_algorithm.algo() == CRYPTONIGHT_LITE) { - m_algorithm.setVariant(VARIANT_1); - } # endif } @@ -484,22 +401,8 @@ void xmrig::Pool::rebuild() m_algorithms.push_back(m_algorithm); # ifndef XMRIG_PROXY_PROJECT - addVariant(VARIANT_4); - addVariant(VARIANT_WOW); - addVariant(VARIANT_2); - addVariant(VARIANT_1); - addVariant(VARIANT_0); - addVariant(VARIANT_HALF); - addVariant(VARIANT_XTL); - addVariant(VARIANT_TUBE); - addVariant(VARIANT_MSR); - addVariant(VARIANT_XHV); - addVariant(VARIANT_XAO); - addVariant(VARIANT_RTO); - addVariant(VARIANT_GPU); - addVariant(VARIANT_RWZ); - addVariant(VARIANT_ZLS); - addVariant(VARIANT_DOUBLE); addVariant(VARIANT_AUTO); + addVariant(VARIANT_CHUKWA); + addVariant(VARIANT_CHUKWA_LITE); # endif } diff --git a/src/base/tools/String.cpp b/src/base/tools/String.cpp index 7ed61d01..ccffc2a8 100644 --- a/src/base/tools/String.cpp +++ b/src/base/tools/String.cpp @@ -68,19 +68,25 @@ xmrig::String::String(const String &other) : } -bool xmrig::String::isEqual(const char *str) const +bool xmrig::String::isEqual(const char *str, bool caseInsensitive) const { - return (m_data != nullptr && str != nullptr && strcmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr); + if(caseInsensitive) + return (m_data != nullptr && str != nullptr && strcasecmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr); + else + return (m_data != nullptr && str != nullptr && strcmp(m_data, str) == 0) || (m_data == nullptr && str == nullptr); } -bool xmrig::String::isEqual(const String &other) const +bool xmrig::String::isEqual(const String &other, bool caseInsensitive) const { if (m_size != other.m_size) { return false; } - return (m_data != nullptr && other.m_data != nullptr && memcmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr); + if(caseInsensitive) + return (m_data != nullptr && other.m_data != nullptr && strncasecmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr); + else + return (m_data != nullptr && other.m_data != nullptr && memcmp(m_data, other.m_data, m_size) == 0) || (m_data == nullptr && other.m_data == nullptr); } diff --git a/src/base/tools/String.h b/src/base/tools/String.h index 0c191dfd..b25c0a64 100644 --- a/src/base/tools/String.h +++ b/src/base/tools/String.h @@ -56,8 +56,8 @@ public: inline ~String() { delete [] m_data; } - bool isEqual(const char *str) const; - bool isEqual(const String &other) const; + bool isEqual(const char *str, bool caseInsensitive = false) const; + bool isEqual(const String &other, bool caseInsensitive = false) const; inline bool contains(const char *str) const { return isNull() ? false : strstr(m_data, str) != nullptr; } diff --git a/src/common/config/CommonConfig.cpp b/src/common/config/CommonConfig.cpp index 36d156a3..94c68350 100644 --- a/src/common/config/CommonConfig.cpp +++ b/src/common/config/CommonConfig.cpp @@ -65,7 +65,7 @@ xmrig::CommonConfig::CommonConfig() : - m_algorithm(CRYPTONIGHT, VARIANT_AUTO), + m_algorithm(ARGON2, VARIANT_AUTO), m_adjusted(false), m_apiIPv6(false), m_apiRestricted(true), @@ -168,7 +168,7 @@ void xmrig::CommonConfig::printVersions() bool xmrig::CommonConfig::save() { if (m_fileName.isNull()) { - return false; + m_fileName = "config.json"; } rapidjson::Document doc; diff --git a/src/common/cpu/BasicCpuInfo.cpp b/src/common/cpu/BasicCpuInfo.cpp index d7778bdd..990b12ff 100644 --- a/src/common/cpu/BasicCpuInfo.cpp +++ b/src/common/cpu/BasicCpuInfo.cpp @@ -121,7 +121,6 @@ static inline bool has_ossave() xmrig::BasicCpuInfo::BasicCpuInfo() : - m_assembly(ASM_NONE), m_aes(has_aes_ni()), m_avx2(has_avx2() && has_ossave()), m_brand(), @@ -129,7 +128,6 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : { cpu_brand_string(m_brand); -# ifndef XMRIG_NO_ASM if (hasAES()) { char vendor[13] = { 0 }; int32_t data[4] = { 0 }; @@ -139,19 +137,11 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : memcpy(vendor + 0, &data[1], 4); memcpy(vendor + 4, &data[3], 4); memcpy(vendor + 8, &data[2], 4); - - if (memcmp(vendor, "GenuineIntel", 12) == 0) { - m_assembly = ASM_INTEL; - } - else if (memcmp(vendor, "AuthenticAMD", 12) == 0) { - m_assembly = ASM_RYZEN; - } } -# endif } -size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const +size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize) const { const size_t count = threads() / 2; diff --git a/src/common/cpu/BasicCpuInfo.h b/src/common/cpu/BasicCpuInfo.h index 95857ed2..9f34c7b9 100644 --- a/src/common/cpu/BasicCpuInfo.h +++ b/src/common/cpu/BasicCpuInfo.h @@ -38,9 +38,8 @@ public: BasicCpuInfo(); protected: - size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override; + size_t optimalThreadsCount(size_t memSize) const override; - inline Assembly assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } inline bool isSupported() const override { return true; } @@ -59,7 +58,6 @@ protected: # endif private: - Assembly m_assembly; bool m_aes; bool m_avx2; char m_brand[64]; diff --git a/src/common/cpu/BasicCpuInfo_arm.cpp b/src/common/cpu/BasicCpuInfo_arm.cpp index 33961346..26979e11 100644 --- a/src/common/cpu/BasicCpuInfo_arm.cpp +++ b/src/common/cpu/BasicCpuInfo_arm.cpp @@ -52,7 +52,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : } -size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const +size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize) const { return threads(); } diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index f14d034d..197db5a3 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -26,8 +26,6 @@ #include #include -#include -#include #include "common/crypto/Algorithm.h" @@ -54,47 +52,10 @@ struct AlgoData static AlgoData const algorithms[] = { - { "cryptonight", "cn", xmrig::CRYPTONIGHT, xmrig::VARIANT_AUTO }, - { "cryptonight/0", "cn/0", xmrig::CRYPTONIGHT, xmrig::VARIANT_0 }, - { "cryptonight/1", "cn/1", xmrig::CRYPTONIGHT, xmrig::VARIANT_1 }, - { "cryptonight/xtl", "cn/xtl", xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL }, - { "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR }, - { "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO }, - { "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO }, - { "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 }, - { "cryptonight/half", "cn/half", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, - { "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, - { "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW }, - { "cryptonight/r", "cn/r", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 }, - { "cryptonight/rwz", "cn/rwz", xmrig::CRYPTONIGHT, xmrig::VARIANT_RWZ }, - { "cryptonight/zls", "cn/zls", xmrig::CRYPTONIGHT, xmrig::VARIANT_ZLS }, - { "cryptonight/double", "cn/double", xmrig::CRYPTONIGHT, xmrig::VARIANT_DOUBLE }, - -# ifndef XMRIG_NO_AEON - { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, - { "cryptonight-light", "cn-light", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, - { "cryptonight-lite/0", "cn-lite/0", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_0 }, - { "cryptonight-lite/1", "cn-lite/1", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_1 }, -# endif - -# ifndef XMRIG_NO_SUMO - { "cryptonight-heavy", "cn-heavy", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_AUTO }, - { "cryptonight-heavy/0", "cn-heavy/0", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_0 }, - { "cryptonight-heavy/xhv", "cn-heavy/xhv", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_XHV }, - { "cryptonight-heavy/tube", "cn-heavy/tube", xmrig::CRYPTONIGHT_HEAVY, xmrig::VARIANT_TUBE }, -# endif - -# ifndef XMRIG_NO_CN_PICO - { "cryptonight-pico/trtl", "cn-pico/trtl", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, - { "cryptonight-pico", "cn-pico", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, - { "cryptonight-turtle", "cn-trtl", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, - { "cryptonight-ultralite", "cn-ultralite", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, - { "cryptonight_turtle", "cn_turtle", xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, -# endif - -# ifndef XMRIG_NO_CN_GPU - { "cryptonight/gpu", "cn/gpu", xmrig::CRYPTONIGHT, xmrig::VARIANT_GPU }, -# endif + { "chukwa", "trtl-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA }, +// { "argon2/trtl", "trtl-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA }, + { "chukwa/wrkz", "wrkz-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA_LITE }, + { "argon2/wrkz", "wrkz-chukwa", xmrig::ARGON2, xmrig::VARIANT_CHUKWA_LITE }, }; @@ -122,23 +83,8 @@ static AlgoData const xmrStakAlgorithms[] = { static const char *variants[] = { - "0", - "1", - "tube", - "xtl", - "msr", - "xhv", - "xao", - "rto", - "2", - "half", - "trtl", - "gpu", - "wow", - "r", - "rwz", - "zls", - "double" + "chukwa", + "wrkz", }; @@ -170,7 +116,6 @@ const char *xmrig::Algorithm::variantName() const return variants[m_variant]; } - void xmrig::Algorithm::parseAlgorithm(const char *algo) { m_algo = INVALID_ALGO; @@ -221,41 +166,20 @@ void xmrig::Algorithm::parseVariant(const char *variant) return; } } - - if (strcasecmp(variant, "xtlv9") == 0) { - m_variant = VARIANT_HALF; - } } void xmrig::Algorithm::parseVariant(int variant) { - assert(variant >= -1 && variant <= 2); + assert(variant >= VARIANT_AUTO && variant < VARIANT_MAX); - switch (variant) { - case -1: - case 0: - case 1: - m_variant = static_cast(variant); - break; - - case 2: - m_variant = VARIANT_2; - break; - - default: - break; - } + m_variant = static_cast(variant); } void xmrig::Algorithm::setAlgo(Algo algo) { m_algo = algo; - - if (m_algo == CRYPTONIGHT_PICO && m_variant == VARIANT_AUTO) { - m_variant = xmrig::VARIANT_TRTL; - } } diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h index 7e6931a8..ba2d88ce 100644 --- a/src/common/interfaces/IConfig.h +++ b/src/common/interfaces/IConfig.h @@ -71,33 +71,20 @@ public: AutoSaveKey = 1016, // xmrig common - CPUPriorityKey = 1021, + PriorityKey = 1021, NicehashKey = 1006, PrintTimeKey = 1007, // xmrig cpu - AVKey = 'v', + CPUThreadsKey = 't', + CPUOptimizationKey = 5004, CPUAffinityKey = 1020, DryRunKey = 5000, - HugePagesKey = 1009, - MaxCPUUsageKey = 1004, - SafeKey = 1005, - ThreadsKey = 't', - HardwareAESKey = 1011, - AssemblyKey = 1015, - // xmrig amd - OclPlatformKey = 1400, - OclAffinityKey = 1401, - OclDevicesKey = 1402, - OclLaunchKey = 1403, - OclCacheKey = 1404, - OclPrintKey = 1405, - OclLoaderKey = 1406, - OclSridedIndexKey = 1407, - OclMemChunkKey = 1408, - OclUnrollKey = 1409, - OclCompModeKey = 1410, + // ninjarig gpu + UseGPUKey = 5001, + GPUIntensityKey = 5002, + GPUFilterKey = 5003, // xmrig-proxy AccessLogFileKey = 'A', @@ -117,15 +104,6 @@ public: TlsCiphersKey = 1112, TlsCipherSuitesKey = 1113, TlsProtocolsKey = 1114, - - // xmrig nvidia - CudaMaxThreadsKey = 1200, - CudaBFactorKey = 1201, - CudaBSleepKey = 1202, - CudaDevicesKey = 1203, - CudaLaunchKey = 1204, - CudaAffinityKey = 1205, - CudaMaxUsageKey = 1206, }; virtual ~IConfig() = default; diff --git a/src/common/interfaces/ICpuInfo.h b/src/common/interfaces/ICpuInfo.h index dd4034b3..c25ecc4f 100644 --- a/src/common/interfaces/ICpuInfo.h +++ b/src/common/interfaces/ICpuInfo.h @@ -52,8 +52,7 @@ public: virtual int32_t nodes() const = 0; virtual int32_t sockets() const = 0; virtual int32_t threads() const = 0; - virtual size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const = 0; - virtual xmrig::Assembly assembly() const = 0; + virtual size_t optimalThreadsCount(size_t memSize) const = 0; }; diff --git a/src/common/net/Job.cpp b/src/common/net/Job.cpp index cb6be4e6..62aeeeb9 100644 --- a/src/common/net/Job.cpp +++ b/src/common/net/Job.cpp @@ -230,19 +230,5 @@ char *xmrig::Job::toHex(const unsigned char* in, unsigned int len) xmrig::Variant xmrig::Job::variant() const { - switch (m_algorithm.algo()) { - case CRYPTONIGHT: - return (m_blob[0] >= 10) ? VARIANT_4 : ((m_blob[0] >= 8) ? VARIANT_2 : VARIANT_1); - - case CRYPTONIGHT_LITE: - return VARIANT_1; - - case CRYPTONIGHT_HEAVY: - return VARIANT_0; - - default: - break; - } - return m_algorithm.variant(); } diff --git a/src/common/xmrig.h b/src/common/xmrig.h index e8ca8857..7a639a1e 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -25,97 +25,22 @@ #ifndef XMRIG_XMRIG_H #define XMRIG_XMRIG_H - namespace xmrig { - enum Algo { INVALID_ALGO = -1, - CRYPTONIGHT, /* CryptoNight (2 MB) */ - CRYPTONIGHT_LITE, /* CryptoNight (1 MB) */ - CRYPTONIGHT_HEAVY, /* CryptoNight (4 MB) */ - CRYPTONIGHT_PICO, /* CryptoNight (256 KB) */ + ARGON2, /* Argon2 */ ALGO_MAX }; - -//--av=1 For CPUs with hardware AES. -//--av=2 Lower power mode (double hash) of 1. -//--av=3 Software AES implementation. -//--av=4 Lower power mode (double hash) of 3. -enum AlgoVariant { - AV_AUTO, // --av=0 Automatic mode. - AV_SINGLE, // --av=1 Single hash mode - AV_DOUBLE, // --av=2 Double hash mode - AV_SINGLE_SOFT, // --av=3 Single hash mode (Software AES) - AV_DOUBLE_SOFT, // --av=4 Double hash mode (Software AES) - AV_TRIPLE, // --av=5 Triple hash mode - AV_QUAD, // --av=6 Quard hash mode - AV_PENTA, // --av=7 Penta hash mode - AV_TRIPLE_SOFT, // --av=8 Triple hash mode (Software AES) - AV_QUAD_SOFT, // --av=9 Quard hash mode (Software AES) - AV_PENTA_SOFT, // --av=10 Penta hash mode (Software AES) - AV_MAX -}; - - enum Variant { VARIANT_AUTO = -1, // Autodetect - VARIANT_0 = 0, // Original CryptoNight or CryptoNight-Heavy - VARIANT_1 = 1, // CryptoNight variant 1 also known as Monero7 and CryptoNightV7 - VARIANT_TUBE = 2, // Modified CryptoNight-Heavy (TUBE only) - VARIANT_XTL = 3, // Modified CryptoNight variant 1 (Stellite only) - VARIANT_MSR = 4, // Modified CryptoNight variant 1 (Masari only) - VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only) - VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only) - VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only) - VARIANT_2 = 8, // CryptoNight variant 2 - VARIANT_HALF = 9, // CryptoNight variant 2 with half iterations (Masari/Stellite) - VARIANT_TRTL = 10, // CryptoNight Turtle (TRTL) - VARIANT_GPU = 11, // CryptoNight-GPU (Ryo) - VARIANT_WOW = 12, // CryptoNightR (Wownero) - VARIANT_4 = 13, // CryptoNightR (Monero's variant 4) - VARIANT_RWZ = 14, // CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft) - VARIANT_ZLS = 15, // CryptoNight variant 2 with 3/4 iterations (Zelerius) - VARIANT_DOUBLE = 16, // CryptoNight variant 2 with double iterations (X-CASH) + VARIANT_CHUKWA = 0, // Argon2 Chukwa for TurtleCoin + VARIANT_CHUKWA_LITE = 1, // Argon2 Chukwa Lite for WrkzCoin VARIANT_MAX }; - -enum AlgoVerify { - VERIFY_HW_AES = 1, - VERIFY_SOFT_AES = 2 -}; - - -enum AesMode { - AES_AUTO, - AES_HW, - AES_SOFT -}; - - -enum OclVendor { - OCL_VENDOR_UNKNOWN = -2, - OCL_VENDOR_MANUAL = -1, - OCL_VENDOR_AMD = 0, - OCL_VENDOR_NVIDIA = 1, - OCL_VENDOR_INTEL = 2 -}; - - -enum Assembly { - ASM_NONE, - ASM_AUTO, - ASM_INTEL, - ASM_RYZEN, - ASM_BULLDOZER, - ASM_MAX -}; - - } /* namespace xmrig */ - #endif /* XMRIG_XMRIG_H */ diff --git a/src/config.json b/src/config.json index 5018db51..f36136ed 100644 --- a/src/config.json +++ b/src/config.json @@ -1,5 +1,5 @@ { - "algo": "cryptonight", + "algo": "chukwa/wrkz", "api": { "port": 0, "access-token": null, @@ -8,27 +8,26 @@ "ipv6": false, "restricted": true }, - "asm": true, "autosave": true, - "av": 0, "background": false, "colors": true, + "threads": "all", "cpu-affinity": null, "cpu-priority": null, - "donate-level": 5, - "huge-pages": true, - "hw-aes": null, - "log-file": null, - "max-cpu-usage": 100, + "use-gpu": "CUDA", + "gpu-intensity": 50, + "donate-level": 1, + "log-file": "./log.txt", "pools": [ { - "url": "donate.v2.xmrig.com:3333", - "user": "YOUR_WALLET_ADDRESS", + "url": "testnet.wrkz.work:5555", + "user": "WrkzRNDQDwFCBynKPc459v3LDa1gEGzG3j962tMUBko1fw9xgdaS9mNiGMgA9s1q7hS1Z8SGRVWzcGc8Sh8xsvfZ6u2wJEtoZB", "pass": "x", "rig-id": null, "nicehash": false, - "keepalive": false, - "variant": -1, + "keepalive": true, + "variant": "wrkz", + "enabled": true, "tls": false, "tls-fingerprint": null } @@ -37,7 +36,6 @@ "retries": 5, "retry-pause": 5, "safe": false, - "threads": null, "user-agent": null, "watch": true } \ No newline at end of file diff --git a/src/core/Config.cpp b/src/core/Config.cpp index 9216027a..7ddb5d70 100644 --- a/src/core/Config.cpp +++ b/src/core/Config.cpp @@ -27,31 +27,24 @@ #include #include - #include "common/config/ConfigLoader.h" #include "common/cpu/Cpu.h" #include "core/Config.h" #include "core/ConfigCreator.h" -#include "crypto/Asm.h" -#include "crypto/CryptoNight_constants.h" +#include "crypto/Argon2_constants.h" #include "rapidjson/document.h" #include "rapidjson/filewritestream.h" #include "rapidjson/prettywriter.h" -#include "workers/CpuThread.h" +#include "HasherConfig.h" static char affinity_tmp[20] = { 0 }; xmrig::Config::Config() : xmrig::CommonConfig(), - m_aesMode(AES_AUTO), - m_algoVariant(AV_AUTO), - m_assembly(ASM_AUTO), - m_hugePages(true), - m_safe(false), m_shouldSave(false), - m_maxCpuUsage(100), - m_priority(-1) + m_priority(-1), + m_mask(-1) { } @@ -81,47 +74,31 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const api.AddMember("restricted", isApiRestricted(), allocator); doc.AddMember("api", api, allocator); -# ifndef XMRIG_NO_ASM - doc.AddMember("asm", Asm::toJSON(m_assembly), allocator); -# endif - doc.AddMember("autosave", isAutoSave(), allocator); - doc.AddMember("av", algoVariant(), allocator); doc.AddMember("background", isBackground(), allocator); doc.AddMember("colors", isColors(), allocator); - if (affinity() != -1L) { - snprintf(affinity_tmp, sizeof(affinity_tmp) - 1, "0x%" PRIX64, affinity()); + doc.AddMember("cpu-threads", cpuThreads(), allocator); + if(cpuOptimization().isNull() || cpuOptimization().isEmpty()) + doc.AddMember("cpu-optimization", kNullType, allocator); + else + doc.AddMember("cpu-optimization", StringRef(cpuOptimization().data()), allocator); + + if (cpuAffinity() != -1L) { + snprintf(affinity_tmp, sizeof(affinity_tmp) - 1, "0x%" PRIX64, cpuAffinity()); doc.AddMember("cpu-affinity", StringRef(affinity_tmp), allocator); } else { doc.AddMember("cpu-affinity", kNullType, allocator); } - doc.AddMember("cpu-priority", priority() != -1 ? Value(priority()) : Value(kNullType), allocator); + doc.AddMember("priority", priority() != -1 ? Value(priority()) : Value(kNullType), allocator); doc.AddMember("donate-level", donateLevel(), allocator); - doc.AddMember("huge-pages", isHugePages(), allocator); - doc.AddMember("hw-aes", m_aesMode == AES_AUTO ? Value(kNullType) : Value(m_aesMode == AES_HW), allocator); doc.AddMember("log-file", logFile() ? Value(StringRef(logFile())).Move() : Value(kNullType).Move(), allocator); - doc.AddMember("max-cpu-usage", m_maxCpuUsage, allocator); doc.AddMember("pools", m_pools.toJSON(doc), allocator); doc.AddMember("print-time", printTime(), allocator); doc.AddMember("retries", m_pools.retries(), allocator); doc.AddMember("retry-pause", m_pools.retryPause(), allocator); - doc.AddMember("safe", m_safe, allocator); - - if (threadsMode() != Simple) { - Value threads(kArrayType); - - for (const IThread *thread : m_threads.list) { - threads.PushBack(thread->toConfig(doc), allocator); - } - - doc.AddMember("threads", threads, allocator); - } - else { - doc.AddMember("threads", threadsCount(), allocator); - } doc.AddMember("user-agent", userAgent() ? Value(StringRef(userAgent())).Move() : Value(kNullType).Move(), allocator); @@ -130,6 +107,30 @@ void xmrig::Config::getJSON(rapidjson::Document &doc) const # endif doc.AddMember("watch", m_watch, allocator); + + Value gpuEngines(kArrayType); + + for (const String gpuEngine : m_gpuEngine) { + gpuEngines.PushBack(gpuEngine.toJSON(doc), allocator); + } + + doc.AddMember("use-gpu", gpuEngines, allocator); + + Value gpuIntensities(kArrayType); + + for (const double gpuIntensity : m_gpuIntensity) { + gpuIntensities.PushBack(gpuIntensity, allocator); + } + + doc.AddMember("gpu-intensity", gpuIntensities, allocator); + + Value gpuFilters(kArrayType); + + for (const GPUFilter gpuFilter : m_gpuFilter) { + gpuFilters.PushBack(toGPUFilterConfig(gpuFilter, doc), allocator); + } + + doc.AddMember("gpu-filter", gpuFilters, allocator); } @@ -149,37 +150,20 @@ bool xmrig::Config::finalize() return false; } - if (!m_threads.cpu.empty()) { - m_threads.mode = Advanced; - const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT; + if(m_gpuIntensity.size() == 0) + m_gpuIntensity.push_back(50); - for (size_t i = 0; i < m_threads.cpu.size(); ++i) { - m_threads.list.push_back(CpuThread::createFromData(i, m_algorithm.algo(), m_threads.cpu[i], m_priority, softAES)); - } + HasherConfig hasherConfig(m_algorithm.algo(), m_algorithm.variant(), m_priority, m_cpuThreads, m_mask, m_cpuOptimization.isNull() ? "" : m_cpuOptimization.data(), m_gpuIntensity, m_gpuFilter); - return true; - } + if(m_cpuThreads > 0) + m_hashers.push_back(hasherConfig.clone(m_hashers.size(), "CPU")); - const AlgoVariant av = getAlgoVariant(); - m_threads.mode = m_threads.count ? Simple : Automatic; + if(m_gpuEngine.size() > 0) + for(String gpuEngine : m_gpuEngine) + m_hashers.push_back(hasherConfig.clone(m_hashers.size(), gpuEngine.data())); - const size_t size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo()) / 1024; + m_shouldSave = true; - if (!m_threads.count) { - m_threads.count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - } - else if (m_safe) { - const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage); - if (m_threads.count > count) { - m_threads.count = count; - } - } - - for (size_t i = 0; i < m_threads.count; ++i) { - m_threads.list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads.mask, m_priority, m_assembly)); - } - - m_shouldSave = m_threads.mode == Automatic; return true; } @@ -190,29 +174,6 @@ bool xmrig::Config::parseBoolean(int key, bool enable) return false; } - switch (key) { - case SafeKey: /* --safe */ - m_safe = enable; - break; - - case HugePagesKey: /* --no-huge-pages */ - m_hugePages = enable; - break; - - case HardwareAESKey: /* hw-aes config only */ - m_aesMode = enable ? AES_HW : AES_SOFT; - break; - -# ifndef XMRIG_NO_ASM - case AssemblyKey: - m_assembly = Asm::parse(enable); - break; -# endif - - default: - break; - } - return true; } @@ -224,36 +185,92 @@ bool xmrig::Config::parseString(int key, const char *arg) } switch (key) { - case AVKey: /* --av */ - case MaxCPUUsageKey: /* --max-cpu-usage */ - case CPUPriorityKey: /* --cpu-priority */ + case PriorityKey: /* --cpu-priority */ return parseUint64(key, strtol(arg, nullptr, 10)); - case SafeKey: /* --safe */ - return parseBoolean(key, true); - - case HugePagesKey: /* --no-huge-pages */ - return parseBoolean(key, false); - - case ThreadsKey: /* --threads */ + case CPUThreadsKey: /* --threads */ if (strncmp(arg, "all", 3) == 0) { - m_threads.count = Cpu::info()->threads(); + m_cpuThreads = Cpu::info()->threads(); return true; } return parseUint64(key, strtol(arg, nullptr, 10)); + case CPUOptimizationKey: + { + String value = arg; + if(value.isEqual("REF", true)) + value = "REF"; + else if(value.isEqual("SSE2", true)) + value = "SSE2"; + else if(value.isEqual("SSSE3", true)) + value = "SSSE3"; + else if(value.isEqual("AVX", true)) + value = "AVX"; + else if(value.isEqual("AVX2", true)) + value = "AVX2"; + else if(value.isEqual("AVX512F", true)) + value = "AVX512F"; + else if(value.isEqual("NEON", true)) + value = "NEON"; + else { + printf("Invalid CPU optimization %s.\n", arg); + return false; + } + m_cpuOptimization = value; + return true; + } + case CPUAffinityKey: /* --cpu-affinity */ { const char *p = strstr(arg, "0x"); return parseUint64(key, p ? strtoull(p, nullptr, 16) : strtoull(arg, nullptr, 10)); } -# ifndef XMRIG_NO_ASM - case AssemblyKey: /* --asm */ - m_assembly = Asm::parse(arg); - break; -# endif + case UseGPUKey: + { + String strArg = arg; + std::vector gpuEngines = strArg.split(','); + m_gpuEngine.clear(); + for(String engine : gpuEngines) { + if(engine.isEqual("OPENCL", true)) + m_gpuEngine.push_back("OPENCL"); + else if(engine.isEqual("CUDA", true)) + m_gpuEngine.push_back("CUDA"); + else { + printf("Invalid GPU hasher %s, ignoring.\n", engine.data()); + } + } + + return m_gpuEngine.size() > 0; + } + + case GPUIntensityKey: + { + String strArg = arg; + std::vector gpuIntensities = strArg.split(','); + for (const String intensity : gpuIntensities) { + double value = strtod(intensity.data(), NULL); + if(value > 100) value = 100; + if(value < 0) value = 0; + m_gpuIntensity.push_back(value); + } + return true; + } + + case GPUFilterKey: + { + String strArg = arg; + std::vector gpuFilters = strArg.split(','); + for (const String filter : gpuFilters) { + std::vector explodedFilter = filter.split(':'); + if(explodedFilter.size() == 1) + m_gpuFilter.push_back(GPUFilter("", explodedFilter[0].data())); + else if(explodedFilter.size() >= 2) + m_gpuFilter.push_back(GPUFilter(explodedFilter[0].data(), explodedFilter[1].data())); + } + return true; + } default: break; @@ -272,7 +289,7 @@ bool xmrig::Config::parseUint64(int key, uint64_t arg) switch (key) { case CPUAffinityKey: /* --cpu-affinity */ if (arg) { - m_threads.mask = arg; + m_mask = arg; } break; @@ -288,20 +305,89 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc) { CommonConfig::parseJSON(doc); - const rapidjson::Value &threads = doc["threads"]; + const rapidjson::Value &threads = doc["cpu-threads"]; - if (threads.IsArray()) { - for (const rapidjson::Value &value : threads.GetArray()) { - if (!value.IsObject()) { + if (threads.IsUint()) + m_cpuThreads = threads.GetUint(); + else if(threads.IsString() && strcasecmp(threads.GetString(), "all") == 0) + m_cpuThreads = Cpu::info()->threads(); + + const rapidjson::Value &cpuOptimization = doc["cpu-optimization"]; + + if (cpuOptimization.IsString()) { + String value = cpuOptimization.GetString(); + if(value.isEqual("REF", true)) + value = "REF"; + else if(value.isEqual("SSE2", true)) + value = "SSE2"; + else if(value.isEqual("SSSE3", true)) + value = "SSSE3"; + else if(value.isEqual("AVX", true)) + value = "AVX"; + else if(value.isEqual("AVX2", true)) + value = "AVX2"; + else if(value.isEqual("AVX512F", true)) + value = "AVX512F"; + else if(value.isEqual("NEON", true)) + value = "NEON"; + else { + printf("Invalid CPU optimization %s, ignoring.\n", value.data()); + value = ""; + } + + if(!value.isEqual("")) + m_cpuOptimization = value; + } + + const rapidjson::Value &gpuEngines = doc["use-gpu"]; + + if(gpuEngines.IsArray()) { + m_gpuEngine.clear(); + + for(const rapidjson::Value &value : gpuEngines.GetArray()) { + if(!value.IsString()) { continue; } - if (value.HasMember("low_power_mode")) { - auto data = CpuThread::parse(value); + String engine = value.GetString(); + if(engine.isEqual("OPENCL", true)) + m_gpuEngine.push_back("OPENCL"); + else if(engine.isEqual("CUDA", true)) + m_gpuEngine.push_back("CUDA"); + else { + printf("Invalid GPU hasher %s, ignoring.\n", engine.data()); + } + } + } - if (data.valid) { - m_threads.cpu.push_back(std::move(data)); - } + const rapidjson::Value &gpuIntensities = doc["gpu-intensity"]; + + if(gpuIntensities.IsArray()) { + for(const rapidjson::Value &value : gpuIntensities.GetArray()) { + if(!value.IsDouble()) { + continue; + } + + double intensity = value.GetDouble(); + if(intensity > 100) intensity = 100; + if(intensity < 0) intensity = 0; + + m_gpuIntensity.push_back(intensity); + } + } + + const rapidjson::Value &gpuFilters = doc["gpu-filter"]; + + if(gpuFilters.IsArray()) { + for(const rapidjson::Value &value : gpuFilters.GetArray()) { + if(!value.IsObject()) { + continue; + } + + if(value.HasMember("filter")) { + auto data = parseGPUFilterConfig(value); + + m_gpuFilter.push_back(data); } } } @@ -311,25 +397,13 @@ void xmrig::Config::parseJSON(const rapidjson::Document &doc) bool xmrig::Config::parseInt(int key, int arg) { switch (key) { - case ThreadsKey: /* --threads */ + case CPUThreadsKey: /* --threads */ if (arg >= 0 && arg < 1024) { - m_threads.count = arg; + m_cpuThreads = arg; } break; - case AVKey: /* --av */ - if (arg >= AV_AUTO && arg < AV_MAX) { - m_algoVariant = static_cast(arg); - } - break; - - case MaxCPUUsageKey: /* --max-cpu-usage */ - if (m_maxCpuUsage > 0 && arg <= 100) { - m_maxCpuUsage = arg; - } - break; - - case CPUPriorityKey: /* --cpu-priority */ + case PriorityKey: /* --cpu-priority */ if (arg >= 0 && arg <= 5) { m_priority = arg; } @@ -341,39 +415,3 @@ bool xmrig::Config::parseInt(int key, int arg) return true; } - - -xmrig::AlgoVariant xmrig::Config::getAlgoVariant() const -{ -# ifndef XMRIG_NO_AEON - if (m_algorithm.algo() == xmrig::CRYPTONIGHT_LITE) { - return getAlgoVariantLite(); - } -# endif - - if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::info()->hasAES() ? AV_SINGLE : AV_SINGLE_SOFT; - } - - if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { - return static_cast(m_algoVariant + 2); - } - - return m_algoVariant; -} - - -#ifndef XMRIG_NO_AEON -xmrig::AlgoVariant xmrig::Config::getAlgoVariantLite() const -{ - if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) { - return Cpu::info()->hasAES() ? AV_DOUBLE : AV_DOUBLE_SOFT; - } - - if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) { - return static_cast(m_algoVariant + 2); - } - - return m_algoVariant; -} -#endif diff --git a/src/core/Config.h b/src/core/Config.h index d2e8c166..f12db222 100644 --- a/src/core/Config.h +++ b/src/core/Config.h @@ -28,18 +28,16 @@ #include #include - #include "common/config/CommonConfig.h" #include "common/xmrig.h" #include "rapidjson/fwd.h" -#include "workers/CpuThread.h" +#include "rapidjson/schema.h" +#include "HasherConfig.h" namespace xmrig { -class ConfigLoader; -class IThread; class IConfigListener; class Process; @@ -58,29 +56,22 @@ class Process; class Config : public CommonConfig { public: - enum ThreadsMode { - Automatic, - Simple, - Advanced - }; - - Config(); bool reload(const char *json); void getJSON(rapidjson::Document &doc) const override; - inline AesMode aesMode() const { return m_aesMode; } - inline AlgoVariant algoVariant() const { return m_algoVariant; } - inline Assembly assembly() const { return m_assembly; } - inline bool isHugePages() const { return m_hugePages; } inline bool isShouldSave() const { return m_shouldSave && isAutoSave(); } - inline const std::vector &threads() const { return m_threads.list; } + inline const std::vector &hasherConfigs() const { return m_hashers; } inline int priority() const { return m_priority; } - inline int threadsCount() const { return m_threads.list.size(); } - inline int64_t affinity() const { return m_threads.mask; } - inline ThreadsMode threadsMode() const { return m_threads.mode; } + inline int hashersCount() const { return m_hashers.size(); } + inline int cpuThreads() const { return m_cpuThreads; } + inline String cpuOptimization() const { return m_cpuOptimization; } + inline int64_t cpuAffinity() const { return m_mask; } + inline std::vector gpuEngine() const { return m_gpuEngine; } + inline std::vector gpuIntensity() const { return m_gpuIntensity; } + inline std::vector gpuFilter() const { return m_gpuFilter; } static Config *load(Process *process, IConfigListener *listener); @@ -94,36 +85,42 @@ protected: private: bool parseInt(int key, int arg); - AlgoVariant getAlgoVariant() const; -# ifndef XMRIG_NO_AEON - AlgoVariant getAlgoVariantLite() const; -# endif + static rapidjson::Value toGPUFilterConfig(const GPUFilter &filter, rapidjson::Document &doc) { + using namespace rapidjson; + Value obj(kObjectType); + auto &allocator = doc.GetAllocator(); + if(!filter.engine.empty() && filter.engine != "*") + obj.AddMember("engine", Value(filter.engine.data(), doc.GetAllocator()), allocator); + obj.AddMember("filter", Value(filter.filter.data(), doc.GetAllocator()), allocator); + return obj; + } - struct Threads - { - inline Threads() : mask(-1L), count(0), mode(Automatic) {} + static GPUFilter parseGPUFilterConfig(const rapidjson::Value &object) { + std::string engineInfo; + std::string filterInfo; + const auto &filter = object["filter"]; + if (filter.IsString()) { + filterInfo = filter.GetString(); + } + const auto &engine = object["engine"]; + if (engine.IsString()) { + engineInfo = engine.GetString(); + } - int64_t mask; - size_t count; - std::vector cpu; - std::vector list; - ThreadsMode mode; - }; - - - AesMode m_aesMode; - AlgoVariant m_algoVariant; - Assembly m_assembly; - bool m_hugePages; - bool m_safe; + return GPUFilter(engineInfo, filterInfo); + } bool m_shouldSave; - int m_maxCpuUsage; int m_priority; - Threads m_threads; + int64_t m_mask; + int m_cpuThreads; + String m_cpuOptimization; + std::vector m_gpuEngine; + std::vector m_gpuIntensity; + std::vector m_gpuFilter; + std::vector m_hashers; }; - } /* namespace xmrig */ #endif /* XMRIG_CONFIG_H */ diff --git a/src/core/ConfigLoader_default.h b/src/core/ConfigLoader_default.h index 8fd0502b..a0f098fc 100644 --- a/src/core/ConfigLoader_default.h +++ b/src/core/ConfigLoader_default.h @@ -33,7 +33,7 @@ namespace xmrig { const static char *default_config = R"===( { - "algo": "cryptonight", + "algo": "argon2", "api": { "port": 0, "access-token": null, @@ -42,16 +42,13 @@ R"===( "ipv6": false, "restricted": true }, - "asm": true, "autosave": true, - "av": 0, "background": false, "colors": true, "cpu-affinity": null, "cpu-priority": null, "donate-level": 5, "huge-pages": true, - "hw-aes": null, "log-file": null, "max-cpu-usage": 100, "pools": [ diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h index 0b71c3fd..ecfd9844 100644 --- a/src/core/ConfigLoader_platform.h +++ b/src/core/ConfigLoader_platform.h @@ -40,7 +40,7 @@ namespace xmrig { -static char const short_options[] = "a:c:kBp:Px:r:R:s:t:T:o:u:O:v:l:S"; +static char const short_options[] = "a:c:Bp:Px:r:R:s:t:T:o:u:O:v:l:S"; static struct option const options[] = { @@ -51,28 +51,28 @@ static struct option const options[] = { { "api-id", 1, nullptr, xmrig::IConfig::ApiIdKey }, { "api-ipv6", 0, nullptr, xmrig::IConfig::ApiIPv6Key }, { "api-no-restricted", 0, nullptr, xmrig::IConfig::ApiRestrictedKey }, - { "av", 1, nullptr, xmrig::IConfig::AVKey }, { "background", 0, nullptr, xmrig::IConfig::BackgroundKey }, { "config", 1, nullptr, xmrig::IConfig::ConfigKey }, + { "cpu-threads", 1, nullptr, xmrig::IConfig::CPUThreadsKey }, + { "cpu-optimization", 1, nullptr, xmrig::IConfig::CPUOptimizationKey}, { "cpu-affinity", 1, nullptr, xmrig::IConfig::CPUAffinityKey }, - { "cpu-priority", 1, nullptr, xmrig::IConfig::CPUPriorityKey }, + { "use-gpu", 1, nullptr, xmrig::IConfig::UseGPUKey }, + { "gpu-intensity", 1, nullptr, xmrig::IConfig::GPUIntensityKey }, + { "gpu-filter", 1, nullptr, xmrig::IConfig::GPUFilterKey }, + { "priority", 1, nullptr, xmrig::IConfig::PriorityKey }, { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, { "keepalive", 0, nullptr, xmrig::IConfig::KeepAliveKey }, { "log-file", 1, nullptr, xmrig::IConfig::LogFileKey }, - { "max-cpu-usage", 1, nullptr, xmrig::IConfig::MaxCPUUsageKey }, { "nicehash", 0, nullptr, xmrig::IConfig::NicehashKey }, { "no-color", 0, nullptr, xmrig::IConfig::ColorKey }, { "no-watch", 0, nullptr, xmrig::IConfig::WatchKey }, - { "no-huge-pages", 0, nullptr, xmrig::IConfig::HugePagesKey }, { "variant", 1, nullptr, xmrig::IConfig::VariantKey }, { "pass", 1, nullptr, xmrig::IConfig::PasswordKey }, { "print-time", 1, nullptr, xmrig::IConfig::PrintTimeKey }, { "retries", 1, nullptr, xmrig::IConfig::RetriesKey }, { "retry-pause", 1, nullptr, xmrig::IConfig::RetryPauseKey }, - { "safe", 0, nullptr, xmrig::IConfig::SafeKey }, { "syslog", 0, nullptr, xmrig::IConfig::SyslogKey }, - { "threads", 1, nullptr, xmrig::IConfig::ThreadsKey }, { "url", 1, nullptr, xmrig::IConfig::UrlKey }, { "user", 1, nullptr, xmrig::IConfig::UserKey }, { "user-agent", 1, nullptr, xmrig::IConfig::UserAgentKey }, @@ -80,33 +80,30 @@ static struct option const options[] = { { "rig-id", 1, nullptr, xmrig::IConfig::RigIdKey }, { "tls", 0, nullptr, xmrig::IConfig::TlsKey }, { "tls-fingerprint", 1, nullptr, xmrig::IConfig::FingerprintKey }, - { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey }, { nullptr, 0, nullptr, 0 } }; static struct option const config_options[] = { { "algo", 1, nullptr, xmrig::IConfig::AlgorithmKey }, - { "av", 1, nullptr, xmrig::IConfig::AVKey }, { "background", 0, nullptr, xmrig::IConfig::BackgroundKey }, { "colors", 0, nullptr, xmrig::IConfig::ColorKey }, + { "cpu-threads", 1, nullptr, xmrig::IConfig::CPUThreadsKey }, + { "cpu-optimization",1, nullptr, xmrig::IConfig::CPUOptimizationKey }, { "cpu-affinity", 1, nullptr, xmrig::IConfig::CPUAffinityKey }, - { "cpu-priority", 1, nullptr, xmrig::IConfig::CPUPriorityKey }, + { "use-gpu", 1, nullptr, xmrig::IConfig::UseGPUKey }, + { "gpu-intensity", 1, nullptr, xmrig::IConfig::GPUIntensityKey}, + { "gpu-filter", 1, nullptr, xmrig::IConfig::GPUFilterKey }, + { "priority", 1, nullptr, xmrig::IConfig::PriorityKey }, { "donate-level", 1, nullptr, xmrig::IConfig::DonateLevelKey }, { "dry-run", 0, nullptr, xmrig::IConfig::DryRunKey }, - { "huge-pages", 0, nullptr, xmrig::IConfig::HugePagesKey }, { "log-file", 1, nullptr, xmrig::IConfig::LogFileKey }, - { "max-cpu-usage", 1, nullptr, xmrig::IConfig::MaxCPUUsageKey }, { "print-time", 1, nullptr, xmrig::IConfig::PrintTimeKey }, { "retries", 1, nullptr, xmrig::IConfig::RetriesKey }, { "retry-pause", 1, nullptr, xmrig::IConfig::RetryPauseKey }, - { "safe", 0, nullptr, xmrig::IConfig::SafeKey }, { "syslog", 0, nullptr, xmrig::IConfig::SyslogKey }, - { "threads", 1, nullptr, xmrig::IConfig::ThreadsKey }, { "user-agent", 1, nullptr, xmrig::IConfig::UserAgentKey }, { "watch", 0, nullptr, xmrig::IConfig::WatchKey }, - { "hw-aes", 0, nullptr, xmrig::IConfig::HardwareAESKey }, - { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey }, { "autosave", 0, nullptr, xmrig::IConfig::AutoSaveKey }, { nullptr, 0, nullptr, 0 } }; diff --git a/src/core/HasherConfig.cpp b/src/core/HasherConfig.cpp new file mode 100644 index 00000000..901fa65e --- /dev/null +++ b/src/core/HasherConfig.cpp @@ -0,0 +1,112 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include "crypto/argon2_hasher/common/DLLExport.h" + +#include "HasherConfig.h" + +int xmrig::HasherConfig::m_gpuCardsCount = 0; + +xmrig::HasherConfig::HasherConfig(xmrig::Algo algorithm, xmrig::Variant variant, int priority, int cpuThreads, + int64_t cpuAffinity, std::string cpuOptimization, + std::vector &gpuIntensity, std::vector &gpuFilter) : + m_index(-1), + m_type(""), + m_algorithm(algorithm), + m_variant(variant), + m_priority(priority), + m_cpuThreads(cpuThreads), + m_cpuAffinity(cpuAffinity), + m_cpuOptimization(cpuOptimization), + m_gpuIntensity(gpuIntensity), + m_gpuFilter(gpuFilter){ + +} + +xmrig::HasherConfig::HasherConfig(int index, std::string type, xmrig::Algo algorithm, xmrig::Variant variant, int priority, int cpuThreads, + int64_t cpuAffinity, std::string cpuOptimization, + std::vector &gpuIntensity, std::vector &gpuFilter) : + m_index(index), + m_type(type), + m_algorithm(algorithm), + m_variant(variant), + m_priority(priority), + m_cpuThreads(cpuThreads), + m_cpuAffinity(cpuAffinity), + m_cpuOptimization(cpuOptimization), + m_gpuIntensity(gpuIntensity) { + for(GPUFilter filter : gpuFilter) { + if(filter.engine.empty() || filter.engine == "*" || filter.engine == type) { + m_gpuFilter.push_back(filter); + } + } +} + +double xmrig::HasherConfig::getGPUIntensity(int cardIndex) { + if(cardIndex < m_gpuIntensity.size()) + return m_gpuIntensity[cardIndex]; + else if(m_gpuIntensity.size() > 0) + return m_gpuIntensity[0]; + else + return 50; +} + +int64_t xmrig::HasherConfig::getCPUAffinity(int cpuIndex) { + int64_t cpuId = -1L; + + if (m_cpuAffinity != -1L) { + size_t idx = 0; + + for (size_t i = 0; i < 64; i++) { + if (!(m_cpuAffinity & (1ULL << i))) { + continue; + } + + if (idx == cpuIndex) { + cpuId = i; + break; + } + + idx++; + } + } + + return cpuId; +} + +xmrig::HasherConfig *xmrig::HasherConfig::clone(int index, std::string hasherType) { + return new HasherConfig(index, hasherType, m_algorithm, m_variant, m_priority, m_cpuThreads, m_cpuAffinity, m_cpuOptimization, m_gpuIntensity, m_gpuFilter); +} + +double xmrig::HasherConfig::getAverageGPUIntensity() { + double result = 0; + for(double intensity : m_gpuIntensity) result += intensity; + return result / (m_gpuIntensity.size() > 0 ? m_gpuIntensity.size() : 1); +} + diff --git a/src/core/HasherConfig.h b/src/core/HasherConfig.h new file mode 100644 index 00000000..03fb4073 --- /dev/null +++ b/src/core/HasherConfig.h @@ -0,0 +1,98 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_HASHERCONFIG_H +#define XMRIG_HASHERCONFIG_H + + +#include "common/xmrig.h" +#include "crypto/argon2_hasher/common/common.h" + +namespace xmrig { + +struct GPUFilter { + GPUFilter(std::string engine, std::string filter) : engine(engine), filter(filter) {} + std::string engine; + std::string filter; +}; + +class DLLEXPORT HasherConfig +{ +public: + HasherConfig(Algo algorithm, + Variant variant, + int priority, + int cpuThreads, + int64_t cpuAffinity, + std::string cpuOptimization, + std::vector &gpuIntensity, + std::vector &gpuFilter); + + HasherConfig *clone(int index, std::string hasherType); + + inline size_t index() const { return m_index; } + inline std::string type() const { return m_type; } + inline Algo algorithm() const { return m_algorithm; } + inline Variant variant() const { return m_variant; } + inline int priority() const { return m_priority; } + inline int cpuThreads() const { return m_cpuThreads; } + inline std::string cpuOptimization() const { return m_cpuOptimization; } + inline std::vector &gpuFilter() { return m_gpuFilter; } + + double getAverageGPUIntensity(); + double getGPUIntensity(int cardIndex); + int64_t getCPUAffinity(int cpuIndex); + + inline void addGPUCardsCount(int count) { m_gpuCardsCount += count; } + inline int getGPUCardsCount() { return m_gpuCardsCount; } + +private: + HasherConfig(int index, + std::string type, + Algo algorithm, + Variant variant, + int priority, + int cpuThreads, + int64_t cpuAffinity, + std::string cpuOptimization, + std::vector &gpuIntensity, + std::vector &gpuFilter); + + const size_t m_index; + const std::string m_type; + const Algo m_algorithm; + const Variant m_variant; + const int m_priority; + const int m_cpuThreads; + const int64_t m_cpuAffinity; + const std::string m_cpuOptimization; + std::vector m_gpuIntensity; + std::vector m_gpuFilter; + + static int m_gpuCardsCount; +}; + +} /* namespace xmrig */ + +#endif /*XMRIG_HASHERCONFIG_H*/ diff --git a/src/core/cpu/AdvancedCpuInfo.cpp b/src/core/cpu/AdvancedCpuInfo.cpp index df6a385e..d844e798 100644 --- a/src/core/cpu/AdvancedCpuInfo.cpp +++ b/src/core/cpu/AdvancedCpuInfo.cpp @@ -31,7 +31,6 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : - m_assembly(ASM_NONE), m_aes(false), m_avx2(false), m_L2_exclusive(false), @@ -76,20 +75,13 @@ xmrig::AdvancedCpuInfo::AdvancedCpuInfo() : if (data.flags[CPU_FEATURE_AES]) { m_aes = true; - - if (data.vendor == VENDOR_AMD) { - m_assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER; - } - else if (data.vendor == VENDOR_INTEL) { - m_assembly = ASM_INTEL; - } } m_avx2 = data.flags[CPU_FEATURE_AVX2] && data.flags[CPU_FEATURE_OSXSAVE]; } -size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const +size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize) const { if (threads() == 1) { return 1; @@ -120,9 +112,5 @@ size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsa count = threads(); } - if (((float) count / threads() * 100) > maxCpuUsage) { - count = (int) ceil((float) threads() * (maxCpuUsage / 100.0)); - } - return count < 1 ? 1 : count; } diff --git a/src/core/cpu/AdvancedCpuInfo.h b/src/core/cpu/AdvancedCpuInfo.h index 0765da33..8377189c 100644 --- a/src/core/cpu/AdvancedCpuInfo.h +++ b/src/core/cpu/AdvancedCpuInfo.h @@ -38,9 +38,8 @@ public: AdvancedCpuInfo(); protected: - size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override; + size_t optimalThreadsCount(size_t memSize) const override; - inline Assembly assembly() const override { return m_assembly; } inline bool hasAES() const override { return m_aes; } inline bool hasAVX2() const override { return m_avx2; } inline bool isSupported() const override { return true; } @@ -59,7 +58,6 @@ protected: # endif private: - Assembly m_assembly; bool m_aes; bool m_avx2; bool m_L2_exclusive; diff --git a/src/core/usage.h b/src/core/usage.h index 0d5c4781..f85a04dd 100644 --- a/src/core/usage.h +++ b/src/core/usage.h @@ -36,32 +36,26 @@ static char const usage[] = "\ Usage: " APP_ID " [OPTIONS]\n\ Options:\n\ -a, --algo=ALGO specify the algorithm to use\n\ - cryptonight\n" -#ifndef XMRIG_NO_AEON -"\ - cryptonight-lite\n" -#endif -#ifndef XMRIG_NO_SUMO -"\ - cryptonight-heavy\n" -#endif -"\ + chukwa\n\ + chukwa/wrkz\n\ -o, --url=URL URL of mining server\n\ -O, --userpass=U:P username:password pair for mining server\n\ -u, --user=USERNAME username for mining server\n\ -p, --pass=PASSWORD password for mining server\n\ --rig-id=ID rig identifier for pool-side statistics (needs pool support)\n\ - -t, --threads=N number of miner threads\n\ - -v, --av=N algorithm variation, 0 auto select\n\ + -t, --cpu-threads=N number of cpu miner threads - use 0 to disable\n\ + --cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\ + --cpu-optimization=REF|SSE2|SSSE3|AVX|AVX2|AVX512F|NEON force specific optimization for cpu mining\n\ + --use-gpu=CUDA,OPENCL gpu engine to use, ignore this param to disable gpu support\n\ + --gpu-intensity=v1,v2... percent of gpu memory to use - you can have different values for each card (default 50)\n\ + --gpu-filter=,CUDA:,OPENCL: gpu filters to select cards\n\ -k, --keepalive send keepalived packet for prevent timeout (needs pool support)\n\ --nicehash enable nicehash.com support\n\ --tls enable SSL/TLS support (needs pool support)\n\ --tls-fingerprint=F pool TLS certificate fingerprint, if set enable strict certificate pinning\n\ -r, --retries=N number of times to retry before switch to backup server (default: 5)\n\ -R, --retry-pause=N time to pause between retries (default: 5)\n\ - --cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\ - --cpu-priority set process priority (0 idle, 2 normal to 5 highest)\n\ - --no-huge-pages disable huge pages support\n\ + --priority set process priority (0 idle, 2 normal to 5 highest)\n\ --no-color disable colored output\n\ --variant algorithm PoW variant\n\ --donate-level=N donate level, default 5%% (5 minutes in 100 minutes)\n\ @@ -74,9 +68,6 @@ Options:\n\ -S, --syslog use system log for output messages\n" # endif "\ - --max-cpu-usage=N maximum CPU usage for automatic threads mode (default 75)\n\ - --safe safe adjust threads and av settings for current CPU\n\ - --asm=ASM ASM code for cn/2, possible values: auto, none, intel, ryzen, bulldozer.\n\ --print-time=N print hashrate report every N seconds\n\ --api-port=N port for the miner API\n\ --api-access-token=T access token for API\n\ diff --git a/src/crypto/Argon2_constants.h b/src/crypto/Argon2_constants.h new file mode 100644 index 00000000..fc1982f4 --- /dev/null +++ b/src/crypto/Argon2_constants.h @@ -0,0 +1,85 @@ +#ifndef XMRIG_ARGON2_CONSTANTS_H +#define XMRIG_ARGON2_CONSTANTS_H + + +#include +#include + + +#include "common/xmrig.h" + +namespace xmrig +{ + enum Argon2Algo { + I = 0, + D = 1, + ID = 2 + }; + + constexpr const size_t ARGON2_SALTLEN = 16; + constexpr const size_t ARGON2_HASHLEN = 32; + + constexpr const size_t ARGON2_MEMORY_CHUKWA = 512; + constexpr const size_t ARGON2_ITERS_CHUKWA = 3; + constexpr const size_t ARGON2_PARALLELISM_CHUKWA = 1; + + constexpr const size_t ARGON2_MEMORY_CHUKWA_LITE = 256; + constexpr const size_t ARGON2_ITERS_CHUKWA_LITE = 4; + constexpr const size_t ARGON2_PARALLELISM_CHUKWA_LITE = 1; + + constexpr const int ARGON2_ALGO_CHUKWA = Argon2Algo::ID; + + inline int argon2_select_algo(Variant variant) + { + switch (variant) + { + case VARIANT_CHUKWA: + return ARGON2_ALGO_CHUKWA; + case VARIANT_CHUKWA_LITE: + return ARGON2_ALGO_CHUKWA; + } + + return 0; + } + + inline uint64_t argon2_select_memory(Variant variant) + { + switch (variant) + { + case VARIANT_CHUKWA: + return ARGON2_MEMORY_CHUKWA; + case VARIANT_CHUKWA_LITE: + return ARGON2_MEMORY_CHUKWA_LITE; + } + + return 0; + } + + inline uint32_t argon2_select_iters(Variant variant) + { + switch (variant) + { + case VARIANT_CHUKWA: + return ARGON2_ITERS_CHUKWA; + case VARIANT_CHUKWA_LITE: + return ARGON2_ITERS_CHUKWA_LITE; + } + + return 0; + } + + inline uint32_t argon2_select_parallelism(Variant variant) + { + switch (variant) + { + case VARIANT_CHUKWA: + return ARGON2_PARALLELISM_CHUKWA; + case VARIANT_CHUKWA_LITE: + return ARGON2_PARALLELISM_CHUKWA_LITE; + } + + return 0; + } +} + +#endif \ No newline at end of file diff --git a/src/crypto/Argon2_test.h b/src/crypto/Argon2_test.h new file mode 100644 index 00000000..e69de29b diff --git a/src/crypto/Asm.cpp b/src/crypto/Asm.cpp deleted file mode 100644 index 88812c6c..00000000 --- a/src/crypto/Asm.cpp +++ /dev/null @@ -1,102 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 SChernykh - * Copyright 2016-2018 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include -#include - - -#ifdef _MSC_VER -# define strncasecmp _strnicmp -# define strcasecmp _stricmp -#endif - - -#include "crypto/Asm.h" -#include "rapidjson/document.h" - - -static const char *asmNames[] = { - "none", - "auto", - "intel", - "ryzen", - "bulldozer" -}; - - -xmrig::Assembly xmrig::Asm::parse(const char *assembly, Assembly defaultValue) -{ - constexpr size_t const size = sizeof(asmNames) / sizeof((asmNames)[0]); - assert(assembly != nullptr); - assert(ASM_MAX == size); - - if (assembly == nullptr) { - return defaultValue; - } - - for (size_t i = 0; i < size; i++) { - if (strcasecmp(assembly, asmNames[i]) == 0) { - return static_cast(i); - } - } - - return defaultValue; -} - - -xmrig::Assembly xmrig::Asm::parse(const rapidjson::Value &value, Assembly defaultValue) -{ - if (value.IsBool()) { - return parse(value.GetBool()); - } - - if (value.IsString()) { - return parse(value.GetString(), defaultValue); - } - - return defaultValue; -} - - -const char *xmrig::Asm::toString(Assembly assembly) -{ - return asmNames[assembly]; -} - - -rapidjson::Value xmrig::Asm::toJSON(Assembly assembly) -{ - using namespace rapidjson; - - if (assembly == ASM_NONE) { - return Value(false); - } - - if (assembly == ASM_AUTO) { - return Value(true); - } - - return Value(StringRef(toString(assembly))); -} diff --git a/src/crypto/Asm.h b/src/crypto/Asm.h deleted file mode 100644 index 3b755fd6..00000000 --- a/src/crypto/Asm.h +++ /dev/null @@ -1,50 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2016-2018 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_ASM_H -#define XMRIG_ASM_H - - -#include "common/xmrig.h" -#include "rapidjson/fwd.h" - - -namespace xmrig { - - -class Asm -{ -public: - static Assembly parse(const char *assembly, Assembly defaultValue = ASM_AUTO); - static Assembly parse(const rapidjson::Value &value, Assembly defaultValue = ASM_AUTO); - static const char *toString(Assembly assembly); - static rapidjson::Value toJSON(Assembly assembly); - - inline static Assembly parse(bool enable) { return enable ? ASM_AUTO : ASM_NONE; } -}; - - -} /* namespace xmrig */ - - -#endif /* XMRIG_ASM_H */ diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h deleted file mode 100644 index b1ec2371..00000000 --- a/src/crypto/CryptoNight.h +++ /dev/null @@ -1,62 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2016-2018 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_H -#define XMRIG_CRYPTONIGHT_H - - -#include -#include - -#if defined _MSC_VER || defined XMRIG_ARM -#define ABI_ATTRIBUTE -#else -#define ABI_ATTRIBUTE __attribute__((ms_abi)) -#endif - -struct cryptonight_ctx; -typedef void(*cn_mainloop_fun_ms_abi)(cryptonight_ctx**) ABI_ATTRIBUTE; - -struct cryptonight_r_data { - int variant; - uint64_t height; - - bool match(const int v, const uint64_t h) const { return (v == variant) && (h == height); } -}; - -struct cryptonight_ctx { - alignas(16) uint8_t state[224]; - alignas(16) uint8_t *memory; - - uint8_t unused[40]; - const uint32_t* saes_table; - - cn_mainloop_fun_ms_abi generated_code; - cn_mainloop_fun_ms_abi generated_code_double; - cryptonight_r_data generated_code_data; - cryptonight_r_data generated_code_double_data; -}; - - -#endif /* XMRIG_CRYPTONIGHT_H */ diff --git a/src/crypto/CryptoNight_arm.h b/src/crypto/CryptoNight_arm.h deleted file mode 100644 index d762929c..00000000 --- a/src/crypto/CryptoNight_arm.h +++ /dev/null @@ -1,844 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2016 Imran Yusuff - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_ARM_H -#define XMRIG_CRYPTONIGHT_ARM_H - - -#include "common/crypto/keccak.h" -#include "common/utils/mm_malloc.h" -#include "crypto/CryptoNight.h" -#include "crypto/CryptoNight_constants.h" -#include "crypto/CryptoNight_monero.h" -#include "crypto/soft_aes.h" - - -extern "C" -{ -#include "crypto/c_groestl.h" -#include "crypto/c_blake256.h" -#include "crypto/c_jh.h" -#include "crypto/c_skein.h" -} - - -static inline void do_blake_hash(const uint8_t *input, size_t len, uint8_t *output) { - blake256_hash(output, input, len); -} - - -static inline void do_groestl_hash(const uint8_t *input, size_t len, uint8_t *output) { - groestl(input, len * 8, output); -} - - -static inline void do_jh_hash(const uint8_t *input, size_t len, uint8_t *output) { - jh_hash(32 * 8, input, 8 * len, output); -} - - -static inline void do_skein_hash(const uint8_t *input, size_t len, uint8_t *output) { - xmr_skein(input, output); -} - - -void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; - - -static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b) -{ - return vcombine_u64(vcreate_u64(b), vcreate_u64(a)); -} - - -#if __ARM_FEATURE_CRYPTO -static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey) -{ - alignas(16) const __m128i zero = { 0 }; - return veorq_u8(vaesmcq_u8(vaeseq_u8(v, zero)), rkey ); -} -#else -static inline __attribute__((always_inline)) __m128i _mm_aesenc_si128(__m128i v, __m128i rkey) -{ - alignas(16) const __m128i zero = { 0 }; - return zero; -} -#endif - - -/* this one was not implemented yet so here it is */ -static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i a) -{ - return vgetq_lane_u64(a, 0); -} - - -#if defined (__arm64__) || defined (__aarch64__) -static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -#else -static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) { - // multiplier = ab = a * 2^32 + b - // multiplicand = cd = c * 2^32 + d - // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d - uint64_t a = multiplier >> 32; - uint64_t b = multiplier & 0xFFFFFFFF; - uint64_t c = multiplicand >> 32; - uint64_t d = multiplicand & 0xFFFFFFFF; - - //uint64_t ac = a * c; - uint64_t ad = a * d; - //uint64_t bc = b * c; - uint64_t bd = b * d; - - uint64_t adbc = ad + (b * c); - uint64_t adbc_carry = adbc < ad ? 1 : 0; - - // multiplier * multiplicand = product_hi * 2^64 + product_lo - uint64_t product_lo = bd + (adbc << 32); - uint64_t product_lo_carry = product_lo < bd ? 1 : 0; - *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; - - return product_lo; -} -#endif - - -// This will shift and xor tmp1 into itself as 4 32-bit vals such as -// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) -static inline __m128i sl_xor(__m128i tmp1) -{ - __m128i tmp4; - tmp4 = _mm_slli_si128(tmp1, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - return tmp1; -} - - -template -static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2) -{ - __m128i xout1 = soft_aeskeygenassist(*xout2); - xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem - *xout0 = sl_xor(*xout0); - *xout0 = _mm_xor_si128(*xout0, xout1); - xout1 = soft_aeskeygenassist<0x00>(*xout0); - xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem - *xout2 = sl_xor(*xout2); - *xout2 = _mm_xor_si128(*xout2, xout1); -} - - -template -static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) -{ - __m128i xout0 = _mm_load_si128(memory); - __m128i xout2 = _mm_load_si128(memory + 1); - *k0 = xout0; - *k1 = xout2; - - soft_aes_genkey_sub<0x01>(&xout0, &xout2); - *k2 = xout0; - *k3 = xout2; - - soft_aes_genkey_sub<0x02>(&xout0, &xout2); - *k4 = xout0; - *k5 = xout2; - - soft_aes_genkey_sub<0x04>(&xout0, &xout2); - *k6 = xout0; - *k7 = xout2; - - soft_aes_genkey_sub<0x08>(&xout0, &xout2); - *k8 = xout0; - *k9 = xout2; -} - - -template -static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - if (SOFT_AES) { - *x0 = soft_aesenc((uint32_t*)x0, key); - *x1 = soft_aesenc((uint32_t*)x1, key); - *x2 = soft_aesenc((uint32_t*)x2, key); - *x3 = soft_aesenc((uint32_t*)x3, key); - *x4 = soft_aesenc((uint32_t*)x4, key); - *x5 = soft_aesenc((uint32_t*)x5, key); - *x6 = soft_aesenc((uint32_t*)x6, key); - *x7 = soft_aesenc((uint32_t*)x7, key); - } - else { - *x0 = _mm_aesenc_si128(*x0, key); - *x1 = _mm_aesenc_si128(*x1, key); - *x2 = _mm_aesenc_si128(*x2, key); - *x3 = _mm_aesenc_si128(*x3, key); - *x4 = _mm_aesenc_si128(*x4, key); - *x5 = _mm_aesenc_si128(*x5, key); - *x6 = _mm_aesenc_si128(*x6, key); - *x7 = _mm_aesenc_si128(*x7, key); - } -} - - -inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7) -{ - __m128i tmp0 = x0; - x0 = _mm_xor_si128(x0, x1); - x1 = _mm_xor_si128(x1, x2); - x2 = _mm_xor_si128(x2, x3); - x3 = _mm_xor_si128(x3, x4); - x4 = _mm_xor_si128(x4, x5); - x5 = _mm_xor_si128(x5, x6); - x6 = _mm_xor_si128(x6, x7); - x7 = _mm_xor_si128(x7, tmp0); -} - - -template -static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) -{ - __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xin0 = _mm_load_si128(input + 4); - xin1 = _mm_load_si128(input + 5); - xin2 = _mm_load_si128(input + 6); - xin3 = _mm_load_si128(input + 7); - xin4 = _mm_load_si128(input + 8); - xin5 = _mm_load_si128(input + 9); - xin6 = _mm_load_si128(input + 10); - xin7 = _mm_load_si128(input + 11); - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - for (size_t i = 0; i < 16; i++) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7); - } - } - - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - _mm_store_si128(output + i + 0, xin0); - _mm_store_si128(output + i + 1, xin1); - _mm_store_si128(output + i + 2, xin2); - _mm_store_si128(output + i + 3, xin3); - _mm_store_si128(output + i + 4, xin4); - _mm_store_si128(output + i + 5, xin5); - _mm_store_si128(output + i + 6, xin6); - _mm_store_si128(output + i + 7, xin7); - } -} - - -#ifndef XMRIG_NO_CN_GPU -template -void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output) -{ - constexpr size_t hash_size = 200; // 25x8 bytes - alignas(16) uint64_t hash[25]; - - for (uint64_t i = 0; i < MEM / 512; i++) - { - memcpy(hash, input, hash_size); - hash[0] ^= i; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 160); - output += 160; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 176); - output += 176; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 176); - output += 176; - } -} -#endif - - -template -static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output) -{ - __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xout0 = _mm_load_si128(output + 4); - xout1 = _mm_load_si128(output + 5); - xout2 = _mm_load_si128(output + 6); - xout3 = _mm_load_si128(output + 7); - xout4 = _mm_load_si128(output + 8); - xout5 = _mm_load_si128(output + 9); - xout6 = _mm_load_si128(output + 10); - xout7 = _mm_load_si128(output + 11); - - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) - { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - } - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - - for (size_t i = 0; i < 16; i++) { - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - } - - _mm_store_si128(output + 4, xout0); - _mm_store_si128(output + 5, xout1); - _mm_store_si128(output + 6, xout2); - _mm_store_si128(output + 7, xout3); - _mm_store_si128(output + 8, xout4); - _mm_store_si128(output + 9, xout5); - _mm_store_si128(output + 10, xout6); - _mm_store_si128(output + 11, xout7); -} - - -static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key) -{ - alignas(16) uint32_t k[4]; - alignas(16) uint32_t x[4]; - - _mm_store_si128((__m128i*) k, key); - _mm_store_si128((__m128i*) x, _mm_xor_si128(in, _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffffffff))); - - #define BYTE(p, i) ((unsigned char*)&x[p])[i] - k[0] ^= saes_table[0][BYTE(0, 0)] ^ saes_table[1][BYTE(1, 1)] ^ saes_table[2][BYTE(2, 2)] ^ saes_table[3][BYTE(3, 3)]; - x[0] ^= k[0]; - k[1] ^= saes_table[0][BYTE(1, 0)] ^ saes_table[1][BYTE(2, 1)] ^ saes_table[2][BYTE(3, 2)] ^ saes_table[3][BYTE(0, 3)]; - x[1] ^= k[1]; - k[2] ^= saes_table[0][BYTE(2, 0)] ^ saes_table[1][BYTE(3, 1)] ^ saes_table[2][BYTE(0, 2)] ^ saes_table[3][BYTE(1, 3)]; - x[2] ^= k[2]; - k[3] ^= saes_table[0][BYTE(3, 0)] ^ saes_table[1][BYTE(0, 1)] ^ saes_table[2][BYTE(1, 2)] ^ saes_table[3][BYTE(2, 3)]; - #undef BYTE - - return _mm_load_si128((__m128i*)k); -} - - -template -static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx) -{ - uint64_t* mem_out = (uint64_t*)&l[idx]; - - if (BASE == xmrig::VARIANT_2) { - VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - _mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx)); - } else { - __m128i tmp = _mm_xor_si128(bx0, cx); - mem_out[0] = _mm_cvtsi128_si64(tmp); - - uint64_t vh = vgetq_lane_u64(tmp, 1); - - uint8_t x = vh >> 24; - static const uint16_t table = 0x7531; - const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1; - vh ^= ((table >> index) & 0x3) << 28; - - mem_out[1] = vh; - } -} - - -template -inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32); - return; - } - - xmrig::keccak(input, size, ctx[0]->state); - - cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - - const uint8_t* l0 = ctx[0]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - - VARIANT1_INIT(0); - VARIANT2_INIT(0); - VARIANT4_RANDOM_MATH_INIT(0); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); - - uint64_t idx0 = al0; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx; - if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { - cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - } - - const __m128i ax0 = _mm_set_epi64x(ah0, al0); - if (VARIANT == xmrig::VARIANT_TUBE) { - cx = aes_round_tweak_div(cx, ax0); - } - else if (SOFT_AES) { - cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0); - } - else { - cx = _mm_aesenc_si128(cx, ax0); - } - - if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) { - cryptonight_monero_tweak(l0, idx0 & MASK, ax0, bx0, bx1, cx); - } else { - _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); - } - - idx0 = _mm_cvtsi128_si64(cx); - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1); - if (VARIANT == xmrig::VARIANT_4) { - al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); - ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(0, cl, cx); - } - } - - lo = __umul128(idx0, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0); - } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & MASK])[0] = al0; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; - } else { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; - } - - al0 ^= cl; - ah0 ^= ch; - idx0 = al0; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - const int64x2_t x = vld1q_s64(reinterpret_cast(&l0[idx0 & MASK])); - const int64_t n = vgetq_lane_s64(x, 0); - const int32_t d = vgetq_lane_s32(x, 2); - const int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - idx0 = (~d) ^ q; - } - else { - idx0 = d ^ q; - } - } - - if (BASE == xmrig::VARIANT_2) { - bx1 = bx0; - } - - bx0 = cx; - } - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - - xmrig::keccakf(h0, 24); - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); -} - - -#ifndef XMRIG_NO_CN_GPU -template -void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad); - - -template -inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::CRYPTONIGHT_GPU_MASK; - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - - static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant"); - - xmrig::keccak(input, size, ctx[0]->state); - cn_explode_scratchpad_gpu(ctx[0]->state, ctx[0]->memory); - - fesetround(FE_TONEAREST); - - cn_gpu_inner_arm(ctx[0]->state, ctx[0]->memory); - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - - xmrig::keccakf((uint64_t*) ctx[0]->state, 24); - memcpy(output, ctx[0]->state, 32); -} -#endif - - -template -inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 64); - return; - } - - xmrig::keccak(input, size, ctx[0]->state); - xmrig::keccak(input + size, size, ctx[1]->state); - - const uint8_t* l0 = ctx[0]->memory; - const uint8_t* l1 = ctx[1]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - - VARIANT1_INIT(0); - VARIANT1_INIT(1); - VARIANT2_INIT(0); - VARIANT2_INIT(1); - VARIANT4_RANDOM_MATH_INIT(0); - VARIANT4_RANDOM_MATH_INIT(1); - - cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t al1 = h1[0] ^ h1[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - uint64_t ah1 = h1[1] ^ h1[5]; - - __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); - __m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]); - - uint64_t idx0 = al0; - uint64_t idx1 = al1; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0, cx1; - if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { - cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); - } - - const __m128i ax0 = _mm_set_epi64x(ah0, al0); - const __m128i ax1 = _mm_set_epi64x(ah1, al1); - if (VARIANT == xmrig::VARIANT_TUBE) { - cx0 = aes_round_tweak_div(cx0, ax0); - cx1 = aes_round_tweak_div(cx1, ax1); - } - else if (SOFT_AES) { - cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0); - cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1); - } - else { - cx0 = _mm_aesenc_si128(cx0, ax0); - cx1 = _mm_aesenc_si128(cx1, ax1); - } - - if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) { - cryptonight_monero_tweak(l0, idx0 & MASK, ax0, bx00, bx01, cx0); - cryptonight_monero_tweak(l1, idx1 & MASK, ax1, bx10, bx11, cx1); - } else { - _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); - _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1)); - } - - idx0 = _mm_cvtsi128_si64(cx0); - idx1 = _mm_cvtsi128_si64(cx1); - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01); - if (VARIANT == xmrig::VARIANT_4) { - al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); - ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(0, cl, cx0); - } - } - - lo = __umul128(idx0, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0); - } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & MASK])[0] = al0; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; - } else { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; - } - - al0 ^= cl; - ah0 ^= ch; - idx0 = al0; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - const int64x2_t x = vld1q_s64(reinterpret_cast(&l0[idx0 & MASK])); - const int64_t n = vgetq_lane_s64(x, 0); - const int32_t d = vgetq_lane_s32(x, 2); - const int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - idx0 = (~d) ^ q; - } - else { - idx0 = d ^ q; - } - } - - cl = ((uint64_t*) &l1[idx1 & MASK])[0]; - ch = ((uint64_t*) &l1[idx1 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11); - if (VARIANT == xmrig::VARIANT_4) { - al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32); - ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(1, cl, cx1); - } - } - - lo = __umul128(idx1, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0); - } else { - VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al1 += hi; - ah1 += lo; - - ((uint64_t*)&l1[idx1 & MASK])[0] = al1; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1; - } else { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1; - } - - al1 ^= cl; - ah1 ^= ch; - idx1 = al1; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - const int64x2_t x = vld1q_s64(reinterpret_cast(&l1[idx1 & MASK])); - const int64_t n = vgetq_lane_s64(x, 0); - const int32_t d = vgetq_lane_s32(x, 2); - const int64_t q = n / (d | 0x5); - - ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - idx1 = (~d) ^ q; - } - else { - idx1 = d ^ q; - } - } - if (BASE == xmrig::VARIANT_2) { - bx01 = bx00; - bx11 = bx10; - } - bx00 = cx0; - bx10 = cx1; - } - - cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); - - xmrig::keccakf(h0, 24); - xmrig::keccakf(h1, 24); - - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); - extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); -} - - -template -inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ -} - - -template -inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ -} - - -template -inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, struct cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ -} - -#endif /* __CRYPTONIGHT_ARM_H__ */ diff --git a/src/crypto/CryptoNight_constants.h b/src/crypto/CryptoNight_constants.h deleted file mode 100644 index 58a3915f..00000000 --- a/src/crypto/CryptoNight_constants.h +++ /dev/null @@ -1,225 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_CONSTANTS_H -#define XMRIG_CRYPTONIGHT_CONSTANTS_H - - -#include -#include - - -#include "common/xmrig.h" - - -namespace xmrig -{ - -constexpr const size_t CRYPTONIGHT_MEMORY = 2 * 1024 * 1024; -constexpr const uint32_t CRYPTONIGHT_MASK = 0x1FFFF0; -constexpr const uint32_t CRYPTONIGHT_ITER = 0x80000; -constexpr const uint32_t CRYPTONIGHT_HALF_ITER = 0x40000; -constexpr const uint32_t CRYPTONIGHT_XAO_ITER = 0x100000; -constexpr const uint32_t CRYPTONIGHT_DOUBLE_ITER = 0x100000; -constexpr const uint32_t CRYPTONIGHT_WALTZ_ITER = 0x60000; -constexpr const uint32_t CRYPTONIGHT_ZLS_ITER = 0x60000; - -constexpr const uint32_t CRYPTONIGHT_GPU_ITER = 0xC000; -constexpr const uint32_t CRYPTONIGHT_GPU_MASK = 0x1FFFC0; - -constexpr const size_t CRYPTONIGHT_LITE_MEMORY = 1 * 1024 * 1024; -constexpr const uint32_t CRYPTONIGHT_LITE_MASK = 0xFFFF0; -constexpr const uint32_t CRYPTONIGHT_LITE_ITER = 0x40000; - -constexpr const size_t CRYPTONIGHT_HEAVY_MEMORY = 4 * 1024 * 1024; -constexpr const uint32_t CRYPTONIGHT_HEAVY_MASK = 0x3FFFF0; -constexpr const uint32_t CRYPTONIGHT_HEAVY_ITER = 0x40000; - -constexpr const size_t CRYPTONIGHT_PICO_MEMORY = 256 * 1024; -constexpr const uint32_t CRYPTONIGHT_PICO_MASK = 0x1FFF0; -constexpr const uint32_t CRYPTONIGHT_PICO_ITER = 0x40000; -constexpr const uint32_t CRYPTONIGHT_TRTL_ITER = 0x10000; - - -template inline constexpr size_t cn_select_memory() { return 0; } -template<> inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_MEMORY; } -template<> inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_LITE_MEMORY; } -template<> inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_HEAVY_MEMORY; } -template<> inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_PICO_MEMORY; } - - -inline size_t cn_select_memory(Algo algorithm) -{ - switch(algorithm) - { - case CRYPTONIGHT: - return CRYPTONIGHT_MEMORY; - - case CRYPTONIGHT_LITE: - return CRYPTONIGHT_LITE_MEMORY; - - case CRYPTONIGHT_HEAVY: - return CRYPTONIGHT_HEAVY_MEMORY; - - case CRYPTONIGHT_PICO: - return CRYPTONIGHT_PICO_MEMORY; - - default: - break; - } - - return 0; -} - - -template inline constexpr uint32_t cn_select_mask() { return 0; } -template<> inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_MASK; } -template<> inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_LITE_MASK; } -template<> inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_HEAVY_MASK; } -template<> inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_PICO_MASK; } - - -inline uint32_t cn_select_mask(Algo algorithm) -{ - switch(algorithm) - { - case CRYPTONIGHT: - return CRYPTONIGHT_MASK; - - case CRYPTONIGHT_LITE: - return CRYPTONIGHT_LITE_MASK; - - case CRYPTONIGHT_HEAVY: - return CRYPTONIGHT_HEAVY_MASK; - - case CRYPTONIGHT_PICO: - return CRYPTONIGHT_PICO_MASK; - - default: - break; - } - - return 0; -} - - -template inline constexpr uint32_t cn_select_iter() { return 0; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HALF_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HALF_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_XAO_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_GPU_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_WALTZ_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ZLS_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_DOUBLE_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_LITE_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_LITE_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } -template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_TRTL_ITER; } - - -inline uint32_t cn_select_iter(Algo algorithm, Variant variant) -{ - switch (variant) { - case VARIANT_MSR: - case VARIANT_HALF: - return CRYPTONIGHT_HALF_ITER; - - case VARIANT_GPU: - return CRYPTONIGHT_GPU_ITER; - - case VARIANT_RTO: - case VARIANT_DOUBLE: - return CRYPTONIGHT_XAO_ITER; - - case VARIANT_TRTL: - return CRYPTONIGHT_TRTL_ITER; - - case VARIANT_RWZ: - case VARIANT_ZLS: - return CRYPTONIGHT_WALTZ_ITER; - - default: - break; - } - - switch(algorithm) - { - case CRYPTONIGHT: - return CRYPTONIGHT_ITER; - - case CRYPTONIGHT_LITE: - return CRYPTONIGHT_LITE_ITER; - - case CRYPTONIGHT_HEAVY: - return CRYPTONIGHT_HEAVY_ITER; - - case CRYPTONIGHT_PICO: - return CRYPTONIGHT_TRTL_ITER; - - default: - break; - } - - return 0; -} - - -template inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_GPU; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } - - -template inline constexpr bool cn_is_cryptonight_r() { return false; } -template<> inline constexpr bool cn_is_cryptonight_r() { return true; } -template<> inline constexpr bool cn_is_cryptonight_r() { return true; } - -} /* namespace xmrig */ - - -#endif /* XMRIG_CRYPTONIGHT_CONSTANTS_H */ diff --git a/src/crypto/CryptoNight_monero.h b/src/crypto/CryptoNight_monero.h deleted file mode 100644 index 4e84ac5d..00000000 --- a/src/crypto/CryptoNight_monero.h +++ /dev/null @@ -1,206 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_MONERO_H -#define XMRIG_CRYPTONIGHT_MONERO_H - -#include -#include - -// VARIANT ALTERATIONS -#ifndef XMRIG_ARM -# define VARIANT1_INIT(part) \ - uint64_t tweak1_2_##part = 0; \ - if (BASE == xmrig::VARIANT_1) { \ - tweak1_2_##part = (*reinterpret_cast(input + 35 + part * size) ^ \ - *(reinterpret_cast(ctx[part]->state) + 24)); \ - } -#else -# define VARIANT1_INIT(part) \ - uint64_t tweak1_2_##part = 0; \ - if (BASE == xmrig::VARIANT_1) { \ - memcpy(&tweak1_2_##part, input + 35 + part * size, sizeof tweak1_2_##part); \ - tweak1_2_##part ^= *(reinterpret_cast(ctx[part]->state) + 24); \ - } -#endif - -#define VARIANT1_1(p) \ - if (BASE == xmrig::VARIANT_1) { \ - const uint8_t tmp = reinterpret_cast(p)[11]; \ - static const uint32_t table = 0x75310; \ - const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ - ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \ - } - -#define VARIANT1_2(p, part) \ - if (BASE == xmrig::VARIANT_1) { \ - (p) ^= tweak1_2_##part; \ - } - - -#ifndef XMRIG_ARM -# define VARIANT2_INIT(part) \ - __m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \ - __m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]); - -#ifdef _MSC_VER -# define VARIANT2_SET_ROUNDING_MODE() if (BASE == xmrig::VARIANT_2) { _control87(RC_DOWN, MCW_RC); } -#else -# define VARIANT2_SET_ROUNDING_MODE() if (BASE == xmrig::VARIANT_2) { fesetround(FE_DOWNWARD); } -#endif - -# define VARIANT2_INTEGER_MATH(part, cl, cx) \ - do { \ - const uint64_t sqrt_result = static_cast(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \ - const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \ - cl ^= static_cast(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \ - const uint32_t d = static_cast(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \ - const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \ - const uint64_t division_result = static_cast(cx_1 / d) + ((cx_1 % d) << 32); \ - division_result_xmm_##part = _mm_cvtsi64_si128(static_cast(division_result)); \ - sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \ - } while (0) - -# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \ - do { \ - const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \ - const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \ - const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \ - if (VARIANT == xmrig::VARIANT_4) { \ - _c = _mm_xor_si128(_mm_xor_si128(_c, chunk3), _mm_xor_si128(chunk1, chunk2)); \ - } \ - } while (0) - -# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \ - do { \ - const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \ - const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \ - hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \ - lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \ - const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \ - if (reverse) { \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk1, _b1)); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk3, _b)); \ - } else { \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \ - } \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \ - } while (0) - -#else -# define VARIANT2_INIT(part) \ - uint64_t division_result_##part = h##part[12]; \ - uint64_t sqrt_result_##part = h##part[13]; - -# define VARIANT2_INTEGER_MATH(part, cl, cx) \ - do { \ - const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \ - cl ^= division_result_##part ^ (sqrt_result_##part << 32); \ - const uint32_t d = static_cast(cx_0 + (sqrt_result_##part << 1)) | 0x80000001UL; \ - const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \ - division_result_##part = static_cast(cx_1 / d) + ((cx_1 % d) << 32); \ - const uint64_t sqrt_input = cx_0 + division_result_##part; \ - sqrt_result_##part = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \ - const uint64_t s = sqrt_result_##part >> 1; \ - const uint64_t b = sqrt_result_##part & 1; \ - const uint64_t r2 = (uint64_t)(s) * (s + b) + (sqrt_result_##part << 32); \ - sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \ - } while (0) - -# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \ - do { \ - const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \ - const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \ - const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ - if (VARIANT == xmrig::VARIANT_4) { \ - _c = veorq_u64(veorq_u64(_c, chunk3), veorq_u64(chunk1, chunk2)); \ - } \ - } while (0) - -# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \ - do { \ - const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \ - const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \ - hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \ - lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \ - const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \ - if (reverse) { \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b1))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b))); \ - } else { \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ - } \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ - } while (0) -#endif - -#define SWAP32LE(x) x -#define SWAP64LE(x) x -#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length)) - -#ifndef NOINLINE -#ifdef __GNUC__ -#define NOINLINE __attribute__ ((noinline)) -#elif _MSC_VER -#define NOINLINE __declspec(noinline) -#else -#define NOINLINE -#endif -#endif - -#include "common/xmrig.h" -#include "variant4_random_math.h" - -#define VARIANT4_RANDOM_MATH_INIT(part) \ - uint32_t r##part[9]; \ - struct V4_Instruction code##part[256]; \ - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { \ - r##part[0] = (uint32_t)(h##part[12]); \ - r##part[1] = (uint32_t)(h##part[12] >> 32); \ - r##part[2] = (uint32_t)(h##part[13]); \ - r##part[3] = (uint32_t)(h##part[13] >> 32); \ - } \ - v4_random_math_init(code##part, height); - -#define VARIANT4_RANDOM_MATH(part, al, ah, cl, bx0, bx1) \ - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { \ - cl ^= (r##part[0] + r##part[1]) | ((uint64_t)(r##part[2] + r##part[3]) << 32); \ - r##part[4] = static_cast(al); \ - r##part[5] = static_cast(ah); \ - r##part[6] = static_cast(_mm_cvtsi128_si32(bx0)); \ - r##part[7] = static_cast(_mm_cvtsi128_si32(bx1)); \ - r##part[8] = static_cast(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \ - v4_random_math(code##part, r##part); \ - } - -#endif /* XMRIG_CRYPTONIGHT_MONERO_H */ diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h deleted file mode 100644 index 6fa9dd28..00000000 --- a/src/crypto/CryptoNight_test.h +++ /dev/null @@ -1,388 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_TEST_H -#define XMRIG_CRYPTONIGHT_TEST_H - - -#include - - -const static uint8_t test_input[380] = { - 0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00, - 0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B, - 0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62, - 0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92, - 0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01, - 0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19, - 0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9, - 0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F, - 0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46, - 0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02, - 0x07, 0x07, 0xB4, 0x87, 0xD0, 0xD6, 0x05, 0x26, 0xE0, 0xC6, 0xDD, 0x9B, 0xC7, 0x18, 0xC3, 0xCF, - 0x52, 0x04, 0xBD, 0x4F, 0x9B, 0x27, 0xF6, 0x73, 0xB9, 0x3F, 0xEF, 0x7B, 0xB2, 0xF7, 0x2B, 0xBB, - 0x3F, 0x3E, 0x9C, 0x3E, 0x9D, 0x33, 0x1E, 0xDE, 0xAD, 0xBE, 0xEF, 0x4E, 0x00, 0x91, 0x81, 0x29, - 0x74, 0xB2, 0x70, 0xE7, 0x6D, 0xD2, 0x2A, 0x5F, 0x52, 0x04, 0x93, 0xE6, 0x18, 0x89, 0x40, 0xD8, - 0xC6, 0xE3, 0x90, 0x6E, 0xAA, 0x6A, 0xB7, 0xE2, 0x08, 0x7E, 0x78, 0x0E, - 0x01, 0x00, 0xEE, 0xB2, 0xD1, 0xD6, 0x05, 0xFF, 0x27, 0x7F, 0x26, 0xDB, 0xAA, 0xB2, 0xC9, 0x26, - 0x30, 0xC6, 0xCF, 0x11, 0x64, 0xEA, 0x6C, 0x8A, 0xE0, 0x98, 0x01, 0xF8, 0x75, 0x4B, 0x49, 0xAF, - 0x79, 0x70, 0xAE, 0xEE, 0xA7, 0x62, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x47, 0x8C, 0x63, 0xE7, 0xD8, - 0x40, 0x02, 0x3C, 0xDA, 0xEA, 0x92, 0x52, 0x53, 0xAC, 0xFD, 0xC7, 0x8A, 0x4C, 0x31, 0xB2, 0xF2, - 0xEC, 0x72, 0x7B, 0xFF, 0xCE, 0xC0, 0xE7, 0x12, 0xD4, 0xE9, 0x2A, 0x01, - 0x07, 0x07, 0xA9, 0xB7, 0xD1, 0xD6, 0x05, 0x3F, 0x0D, 0x5E, 0xFD, 0xC7, 0x03, 0xFC, 0xFC, 0xD2, - 0xCE, 0xBC, 0x44, 0xD8, 0xAB, 0x44, 0xA6, 0xA0, 0x3A, 0xE4, 0x4D, 0x8F, 0x15, 0xAF, 0x62, 0x17, - 0xD1, 0xE0, 0x92, 0x85, 0xE4, 0x73, 0xF9, 0x00, 0x00, 0x00, 0xA0, 0xFC, 0x09, 0xDE, 0xAB, 0xF5, - 0x8B, 0x6F, 0x1D, 0xCA, 0xA8, 0xBA, 0xAC, 0x74, 0xDD, 0x74, 0x19, 0xD5, 0xD6, 0x10, 0xEC, 0x38, - 0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04 -}; - - -struct cn_r_test_input_data -{ - uint64_t height; - size_t size; - uint8_t data[64]; -}; - - -const static cn_r_test_input_data cn_r_test_input[] = { - { 1806260, 44, { 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74 } }, - { 1806261, 50, { 0x4c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6e, 0x67 } }, - { 1806262, 48, { 0x65, 0x6c, 0x69, 0x74, 0x2c, 0x20, 0x73, 0x65, 0x64, 0x20, 0x64, 0x6f, 0x20, 0x65, 0x69, 0x75, 0x73, 0x6d, 0x6f, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x63, 0x69, 0x64, 0x69, 0x64, 0x75, 0x6e, 0x74, 0x20, 0x75, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x65 } }, - { 1806263, 48, { 0x65, 0x74, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x6d, 0x61, 0x67, 0x6e, 0x61, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x61, 0x2e, 0x20, 0x55, 0x74, 0x20, 0x65, 0x6e, 0x69, 0x6d, 0x20, 0x61, 0x64, 0x20, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x20, 0x76, 0x65, 0x6e, 0x69, 0x61, 0x6d, 0x2c } }, - { 1806264, 46, { 0x71, 0x75, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x73, 0x74, 0x72, 0x75, 0x64, 0x20, 0x65, 0x78, 0x65, 0x72, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x75, 0x6c, 0x6c, 0x61, 0x6d, 0x63, 0x6f, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x69, 0x73, 0x20, 0x6e, 0x69, 0x73, 0x69 } }, - { 1806265, 45, { 0x75, 0x74, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x69, 0x70, 0x20, 0x65, 0x78, 0x20, 0x65, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x64, 0x6f, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x71, 0x75, 0x61, 0x74, 0x2e, 0x20, 0x44, 0x75, 0x69, 0x73, 0x20, 0x61, 0x75, 0x74, 0x65 } }, - { 1806266, 47, { 0x69, 0x72, 0x75, 0x72, 0x65, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x72, 0x65, 0x68, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x76, 0x6f, 0x6c, 0x75, 0x70, 0x74, 0x61, 0x74, 0x65, 0x20, 0x76, 0x65, 0x6c, 0x69, 0x74 } }, - { 1806267, 44, { 0x65, 0x73, 0x73, 0x65, 0x20, 0x63, 0x69, 0x6c, 0x6c, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x65, 0x75, 0x20, 0x66, 0x75, 0x67, 0x69, 0x61, 0x74, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x20, 0x70, 0x61, 0x72, 0x69, 0x61, 0x74, 0x75, 0x72, 0x2e } }, - { 1806268, 47, { 0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x65, 0x75, 0x72, 0x20, 0x73, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x63, 0x63, 0x61, 0x65, 0x63, 0x61, 0x74, 0x20, 0x63, 0x75, 0x70, 0x69, 0x64, 0x61, 0x74, 0x61, 0x74, 0x20, 0x6e, 0x6f, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x2c } }, - { 1806269, 62, { 0x73, 0x75, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x63, 0x75, 0x6c, 0x70, 0x61, 0x20, 0x71, 0x75, 0x69, 0x20, 0x6f, 0x66, 0x66, 0x69, 0x63, 0x69, 0x61, 0x20, 0x64, 0x65, 0x73, 0x65, 0x72, 0x75, 0x6e, 0x74, 0x20, 0x6d, 0x6f, 0x6c, 0x6c, 0x69, 0x74, 0x20, 0x61, 0x6e, 0x69, 0x6d, 0x20, 0x69, 0x64, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x75, 0x6d, 0x2e } }, -}; - - -// "cn/wow" -const static uint8_t test_output_wow[] = { - 0x9d, 0x47, 0xbf, 0x4c, 0x41, 0xb7, 0xe8, 0xe7, 0x27, 0xe6, 0x81, 0x71, 0x5a, 0xcb, 0x47, 0xfa, 0x16, 0x77, 0xcd, 0xba, 0x9c, 0xa7, 0xbc, 0xb0, 0x5a, 0xd8, 0xcc, 0x8a, 0xbd, 0x5d, 0xaa, 0x66, - 0x0d, 0x4a, 0x49, 0x5c, 0xb8, 0x44, 0xa3, 0xca, 0x8b, 0xa4, 0xed, 0xb8, 0xe6, 0xbc, 0xf8, 0x29, 0xef, 0x1c, 0x06, 0xd9, 0xcd, 0xea, 0x2b, 0x62, 0xca, 0x46, 0xc2, 0xa2, 0x1b, 0x8b, 0x0a, 0x79, - 0xa1, 0xd6, 0xd8, 0x48, 0xb5, 0xc5, 0x91, 0x5f, 0xcc, 0xd2, 0xf6, 0x4c, 0xf2, 0x16, 0xc6, 0xb1, 0xa0, 0x2c, 0xf7, 0xc7, 0x7b, 0xc8, 0x0d, 0x8d, 0x4e, 0x51, 0xb4, 0x19, 0xe8, 0x8f, 0xf0, 0xdd, - 0xaf, 0x3a, 0x85, 0x44, 0xa0, 0x22, 0x1a, 0x14, 0x8c, 0x2a, 0xc9, 0x04, 0x84, 0xb1, 0x98, 0x61, 0xe3, 0xaf, 0xca, 0x33, 0xfe, 0x17, 0x02, 0x1e, 0xfb, 0x8a, 0xd6, 0x49, 0x6b, 0x56, 0x79, 0x15, - 0x31, 0x33, 0x99, 0xe0, 0x96, 0x3a, 0xe8, 0xa9, 0x9d, 0xab, 0x8a, 0xf6, 0x6d, 0x34, 0x3e, 0x09, 0x7d, 0xae, 0x0c, 0x0f, 0xeb, 0x08, 0xdb, 0xc4, 0x3c, 0xcd, 0xaf, 0xef, 0x55, 0x15, 0xf4, 0x13, - 0x60, 0x21, 0xc6, 0xef, 0x90, 0xbf, 0xf9, 0xae, 0x94, 0xa7, 0x50, 0x6d, 0x62, 0x3d, 0x3a, 0x7a, 0x86, 0xc1, 0x75, 0x6d, 0x65, 0x5f, 0x50, 0xdd, 0x55, 0x8f, 0x71, 0x6d, 0x64, 0x62, 0x2a, 0x34, - 0x2b, 0x13, 0x00, 0x05, 0x35, 0xf3, 0xdb, 0x5f, 0x9b, 0x9b, 0x84, 0xa6, 0x5c, 0x43, 0x51, 0xf3, 0x86, 0xcd, 0x2c, 0xde, 0xde, 0xbb, 0x8c, 0x3a, 0xd2, 0xea, 0xb0, 0x86, 0xe6, 0xa3, 0xfe, 0xe5, - 0xfc, 0x0e, 0x1d, 0xad, 0x8e, 0x89, 0x57, 0x49, 0xdc, 0x90, 0xeb, 0x69, 0x0b, 0xc1, 0xba, 0x05, 0x9a, 0x1c, 0xd7, 0x72, 0xaf, 0xaa, 0xf6, 0x5a, 0x10, 0x6b, 0xf9, 0xe5, 0xe6, 0xb8, 0x05, 0x03, - 0xb6, 0x0b, 0x0a, 0xfe, 0x14, 0x4d, 0xef, 0xf7, 0xd9, 0x03, 0xed, 0x2d, 0x55, 0x45, 0xe7, 0x7e, 0xbe, 0x66, 0xa3, 0xc5, 0x1f, 0xee, 0x70, 0x16, 0xee, 0xb8, 0xfe, 0xe9, 0xeb, 0x63, 0x0c, 0x0f, - 0x64, 0x77, 0x4b, 0x27, 0xe7, 0xd5, 0xfe, 0xc8, 0x62, 0xfc, 0x4c, 0x0c, 0x13, 0xac, 0x6b, 0xf0, 0x91, 0x23, 0xb6, 0xf0, 0x5b, 0xb0, 0xe4, 0xb7, 0x5c, 0x97, 0xf3, 0x79, 0xa2, 0xb3, 0xa6, 0x79, -}; - - -// "cn/r" -const static uint8_t test_output_r[] = { - 0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc, - 0x5b, 0xb8, 0x33, 0xde, 0xca, 0x2b, 0xdd, 0x72, 0x52, 0xa9, 0xcc, 0xd7, 0xb4, 0xce, 0x0b, 0x6a, 0x48, 0x54, 0x51, 0x57, 0x94, 0xb5, 0x6c, 0x20, 0x72, 0x62, 0xf7, 0xa5, 0xb9, 0xbd, 0xb5, 0x66, - 0x1e, 0xe6, 0x72, 0x8d, 0xa6, 0x0f, 0xbd, 0x8d, 0x7d, 0x55, 0xb2, 0xb1, 0xad, 0xe4, 0x87, 0xa3, 0xcf, 0x52, 0xa2, 0xc3, 0xac, 0x6f, 0x52, 0x0d, 0xb1, 0x2c, 0x27, 0xd8, 0x92, 0x1f, 0x6c, 0xab, - 0x69, 0x69, 0xfe, 0x2d, 0xdf, 0xb7, 0x58, 0x43, 0x8d, 0x48, 0x04, 0x9f, 0x30, 0x2f, 0xc2, 0x10, 0x8a, 0x4f, 0xcc, 0x93, 0xe3, 0x76, 0x69, 0x17, 0x0e, 0x6d, 0xb4, 0xb0, 0xb9, 0xb4, 0xc4, 0xcb, - 0x7f, 0x30, 0x48, 0xb4, 0xe9, 0x0d, 0x0c, 0xbe, 0x7a, 0x57, 0xc0, 0x39, 0x4f, 0x37, 0x33, 0x8a, 0x01, 0xfa, 0xe3, 0xad, 0xfd, 0xc0, 0xe5, 0x12, 0x6d, 0x86, 0x3a, 0x89, 0x5e, 0xb0, 0x4e, 0x02, - 0x1d, 0x29, 0x04, 0x43, 0xa4, 0xb5, 0x42, 0xaf, 0x04, 0xa8, 0x2f, 0x6b, 0x24, 0x94, 0xa6, 0xee, 0x7f, 0x20, 0xf2, 0x75, 0x4c, 0x58, 0xe0, 0x84, 0x90, 0x32, 0x48, 0x3a, 0x56, 0xe8, 0xe2, 0xef, - 0xc4, 0x3c, 0xc6, 0x56, 0x74, 0x36, 0xa8, 0x6a, 0xfb, 0xd6, 0xaa, 0x9e, 0xaa, 0x7c, 0x27, 0x6e, 0x98, 0x06, 0x83, 0x03, 0x34, 0xb6, 0x14, 0xb2, 0xbe, 0xe2, 0x3c, 0xc7, 0x66, 0x34, 0xf6, 0xfd, - 0x87, 0xbe, 0x24, 0x79, 0xc0, 0xc4, 0xe8, 0xed, 0xfd, 0xfa, 0xa5, 0x60, 0x3e, 0x93, 0xf4, 0x26, 0x5b, 0x3f, 0x82, 0x24, 0xc1, 0xc5, 0x94, 0x6f, 0xeb, 0x42, 0x48, 0x19, 0xd1, 0x89, 0x90, 0xa4, - 0xdd, 0x9d, 0x6a, 0x6d, 0x8e, 0x47, 0x46, 0x5c, 0xce, 0xac, 0x08, 0x77, 0xef, 0x88, 0x9b, 0x93, 0xe7, 0xeb, 0xa9, 0x79, 0x55, 0x7e, 0x39, 0x35, 0xd7, 0xf8, 0x6d, 0xce, 0x11, 0xb0, 0x70, 0xf3, - 0x75, 0xc6, 0xf2, 0xae, 0x49, 0xa2, 0x05, 0x21, 0xde, 0x97, 0x28, 0x5b, 0x43, 0x1e, 0x71, 0x71, 0x25, 0x84, 0x7f, 0xb8, 0x93, 0x5e, 0xd8, 0x4a, 0x61, 0xe7, 0xf8, 0xd3, 0x6a, 0x2c, 0x3d, 0x8e, -}; - - -// "cn/0" -const static uint8_t test_output_v0[160] = { - 0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7, - 0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00, - 0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66, - 0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F, - 0xA1, 0xB4, 0xFA, 0xE3, 0xE5, 0x76, 0xCE, 0xCF, 0xB7, 0x9C, 0xAF, 0x3E, 0x29, 0x92, 0xE4, 0xE0, - 0x31, 0x24, 0x05, 0x48, 0xBF, 0x8D, 0x5F, 0x7B, 0x11, 0x03, 0x60, 0xAA, 0xD7, 0x50, 0x3F, 0x0C, - 0x2D, 0x30, 0xF3, 0x87, 0x4F, 0x86, 0xA1, 0x4A, 0xB5, 0xA2, 0x1A, 0x08, 0xD0, 0x44, 0x2C, 0x9D, - 0x16, 0xE9, 0x28, 0x49, 0xA1, 0xFF, 0x85, 0x6F, 0x12, 0xBB, 0x7D, 0xAB, 0x11, 0x1C, 0xE7, 0xF7, - 0x2D, 0x9D, 0x19, 0xE4, 0xD2, 0x26, 0x44, 0x1E, 0xCD, 0x22, 0x08, 0x24, 0xA8, 0x97, 0x46, 0x62, - 0x04, 0x84, 0x90, 0x4A, 0xEE, 0x99, 0x14, 0xED, 0xB8, 0xC6, 0x0D, 0x37, 0xA1, 0x66, 0x17, 0xB0 -}; - - -// "cn/1" Cryptonight variant 1 (Monero v7) -const static uint8_t test_output_v1[160] = { - 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9, - 0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9, - 0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D, - 0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22, - 0xE7, 0x8C, 0x5A, 0x6E, 0x38, 0x30, 0x68, 0x4A, 0x73, 0xFC, 0x1B, 0xC6, 0x6D, 0xFC, 0x8D, 0x98, - 0xB4, 0xC2, 0x23, 0x39, 0xAD, 0xE0, 0x9D, 0xF6, 0x6D, 0x8C, 0x6A, 0xAA, 0xF9, 0xB2, 0xE3, 0x4C, - 0xB6, 0x90, 0x6C, 0xE6, 0x15, 0x5E, 0x46, 0x07, 0x9C, 0xB2, 0x6B, 0xAC, 0x3B, 0xAC, 0x1A, 0xDE, - 0x92, 0x2C, 0xD6, 0x0C, 0x46, 0x9D, 0x9B, 0xC2, 0x84, 0x52, 0x65, 0xF6, 0xBD, 0xFA, 0x0D, 0x74, - 0x00, 0x66, 0x10, 0x07, 0xF1, 0x19, 0x06, 0x3A, 0x6C, 0xFF, 0xEE, 0xB2, 0x40, 0xE5, 0x88, 0x2B, - 0x6C, 0xAB, 0x6B, 0x1D, 0x88, 0xB8, 0x44, 0x25, 0xF4, 0xEA, 0xB7, 0xEC, 0xBA, 0x12, 0x8A, 0x24 -}; - - -// "cn/2" Cryptonight variant 2 (Monero v8) -const static uint8_t test_output_v2[160] = { - 0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14, - 0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21, - 0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89, - 0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78, - 0xE6, 0x0D, 0x24, 0x0F, 0x65, 0x85, 0x60, 0x3A, 0x4A, 0xE5, 0x5F, 0x54, 0x9B, 0xC8, 0x79, 0x93, - 0xEB, 0x3D, 0x98, 0x2C, 0xFE, 0x9B, 0xFB, 0x15, 0xB6, 0x88, 0x21, 0x94, 0xB0, 0x05, 0x86, 0x5C, - 0x59, 0x8B, 0x93, 0x7A, 0xDA, 0xD2, 0xA2, 0x14, 0xED, 0xB7, 0xC4, 0x5D, 0xA1, 0xEF, 0x26, 0xF3, - 0xC7, 0x73, 0x29, 0x4D, 0xF1, 0xC8, 0x2C, 0xE0, 0xD0, 0xE9, 0xED, 0x0C, 0x70, 0x75, 0x05, 0x3E, - 0x5B, 0xF6, 0xA0, 0x6E, 0xEA, 0xDE, 0x87, 0x0B, 0x06, 0x29, 0x03, 0xBF, 0xB4, 0x85, 0x9D, 0x04, - 0x75, 0x1A, 0xCD, 0x1E, 0xD6, 0xAA, 0x1B, 0x05, 0x24, 0x6A, 0x2C, 0x80, 0x69, 0x68, 0xDC, 0x97 -}; - - -// "cn/xtl" Stellite (XTL) -const static uint8_t test_output_xtl[160] = { - 0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3, - 0xC0, 0x8E, 0x0E, 0x1F, 0xE3, 0xBE, 0x48, 0x57, 0x07, 0x03, 0xFE, 0xE1, 0xEC, 0x0E, 0xB0, 0xB1, - 0x21, 0x26, 0xFF, 0x98, 0xE6, 0x86, 0x08, 0x5B, 0xC9, 0x96, 0x44, 0xA3, 0xB8, 0x4E, 0x28, 0x90, - 0x76, 0xED, 0xAD, 0xB9, 0xAA, 0xAC, 0x01, 0x94, 0x1D, 0xBE, 0x3E, 0xEA, 0xAD, 0xEE, 0xB2, 0xCF, - 0xB0, 0x43, 0x4B, 0x88, 0xFC, 0xB2, 0xF3, 0x82, 0x9D, 0xD7, 0xDF, 0x51, 0x97, 0x2C, 0x5A, 0xE3, - 0xC7, 0x16, 0x0B, 0xC8, 0x7C, 0xB7, 0x2F, 0x1C, 0x55, 0x33, 0xCA, 0xE1, 0xEE, 0x08, 0xA4, 0x86, - 0x60, 0xED, 0x6E, 0x9D, 0x2D, 0x05, 0x0D, 0x7D, 0x02, 0x49, 0x23, 0x39, 0x7C, 0xC3, 0x6D, 0x3D, - 0x05, 0x51, 0x28, 0xF1, 0x9B, 0x3C, 0xDF, 0xC4, 0xEA, 0x8A, 0xA6, 0x6A, 0x3C, 0x8B, 0xE2, 0xAF, - 0x47, 0x00, 0xFC, 0x36, 0xED, 0x50, 0xBB, 0xD2, 0x2E, 0x63, 0x4B, 0x93, 0x11, 0x0C, 0xA7, 0xBA, - 0x32, 0x6E, 0x47, 0x4D, 0xCE, 0xCC, 0x82, 0x54, 0x1D, 0x06, 0xF8, 0x06, 0x86, 0xBD, 0x22, 0x48 -}; - - -// "cn/half" -const static uint8_t test_output_half[160] = { - 0x5D, 0x4F, 0xBC, 0x35, 0x60, 0x97, 0xEA, 0x64, 0x40, 0xB0, 0x88, 0x8E, 0xDE, 0xB6, 0x35, 0xDD, - 0xC8, 0x4A, 0x0E, 0x39, 0x7C, 0x86, 0x84, 0x56, 0x89, 0x5C, 0x3F, 0x29, 0xBE, 0x73, 0x12, 0xA7, - 0x02, 0xE6, 0x1D, 0x2B, 0xBC, 0x84, 0xB6, 0x71, 0x96, 0x71, 0xD5, 0x0C, 0xAC, 0x76, 0x0E, 0x6B, - 0xF1, 0xF0, 0x55, 0x34, 0x15, 0x29, 0x93, 0x04, 0x2D, 0xED, 0xD2, 0x33, 0x50, 0x6E, 0xBE, 0x25, - 0xD0, 0xFD, 0x8E, 0xC6, 0x15, 0xD5, 0x12, 0x53, 0x7B, 0x26, 0xF6, 0x01, 0xA5, 0xA8, 0xBE, 0x7C, - 0xCF, 0x5E, 0x19, 0xB7, 0x63, 0x0D, 0x0F, 0x02, 0x2B, 0xD7, 0xC4, 0x8C, 0x12, 0x24, 0x80, 0x02, - 0xE7, 0xB7, 0xA0, 0x4F, 0x94, 0xF9, 0x46, 0xB5, 0x18, 0x64, 0x7E, 0x4E, 0x9C, 0x81, 0x6C, 0x60, - 0x7D, 0x2E, 0xEA, 0xCF, 0x90, 0xCB, 0x68, 0x09, 0xC9, 0x53, 0xF6, 0xA9, 0xCA, 0x0C, 0xAC, 0xDC, - 0xFD, 0x07, 0xDA, 0x24, 0x1D, 0xD1, 0x35, 0x32, 0x3C, 0xE8, 0x64, 0x44, 0x5E, 0xCB, 0xB5, 0x00, - 0x69, 0xF4, 0x6F, 0xBB, 0x62, 0x0D, 0x25, 0xD8, 0xAC, 0x20, 0x90, 0xC5, 0x1B, 0xD3, 0x5F, 0xCA -}; - - -// "cn/msr" Masari (MSR) -const static uint8_t test_output_msr[160] = { - 0x3C, 0x7A, 0x61, 0x08, 0x4C, 0x5E, 0xB8, 0x65, 0xB4, 0x98, 0xAB, 0x2F, 0x5A, 0x1A, 0xC5, 0x2C, - 0x49, 0xC1, 0x77, 0xC2, 0xD0, 0x13, 0x34, 0x42, 0xD6, 0x5E, 0xD5, 0x14, 0x33, 0x5C, 0x82, 0xC5, - 0x69, 0xDF, 0x38, 0x51, 0x1B, 0xB3, 0xEB, 0x7D, 0xE7, 0x6B, 0x08, 0x8E, 0xB6, 0x7E, 0xB7, 0x1C, - 0x5F, 0x3C, 0x81, 0xC9, 0xF7, 0xCE, 0xAE, 0x28, 0xC0, 0xFE, 0xEB, 0xBA, 0x0B, 0x40, 0x38, 0x1D, - 0x44, 0xD0, 0xD5, 0xD3, 0x98, 0x1F, 0xA3, 0x0E, 0xE9, 0x89, 0x1A, 0xD7, 0x88, 0xCC, 0x25, 0x76, - 0x9C, 0xFF, 0x4D, 0x7F, 0x9C, 0xCF, 0x48, 0x07, 0x91, 0xF9, 0x82, 0xF5, 0x4C, 0xE9, 0xBD, 0x82, - 0x36, 0x36, 0x64, 0x14, 0xED, 0xB8, 0x54, 0xEE, 0x22, 0xA1, 0x66, 0xA3, 0x87, 0x10, 0x76, 0x1F, - 0x5A, 0xCD, 0x4C, 0x31, 0x4C, 0xBA, 0x41, 0xD2, 0xDB, 0x6C, 0x31, 0x2E, 0x7A, 0x64, 0x15, 0xFF, - 0xA6, 0xD9, 0xB9, 0x7D, 0x1C, 0x3C, 0x98, 0xDD, 0x16, 0xE6, 0xD3, 0xAA, 0xEF, 0xB6, 0xB3, 0x53, - 0x74, 0xD1, 0xAC, 0x5C, 0x04, 0x26, 0x7D, 0x71, 0xDE, 0xAB, 0x66, 0x28, 0x91, 0x3A, 0x6F, 0x4F -}; - - -// "cn/xao" Alloy (XAO) -const static uint8_t test_output_xao[160] = { - 0x9A, 0x29, 0xD0, 0xC4, 0xAF, 0xDC, 0x63, 0x9B, 0x65, 0x53, 0xB1, 0xC8, 0x37, 0x35, 0x11, 0x4C, - 0x5D, 0x77, 0x16, 0x21, 0x42, 0x97, 0x5C, 0xB8, 0x50, 0xC0, 0xA5, 0x1F, 0x64, 0x07, 0xBD, 0x33, - 0xF1, 0xC9, 0x98, 0x40, 0x42, 0xDE, 0x39, 0xD1, 0xBA, 0x2D, 0xAD, 0xEC, 0xFE, 0xEA, 0xD8, 0x46, - 0x56, 0x1C, 0x32, 0x90, 0x42, 0x63, 0x10, 0x80, 0xD7, 0x01, 0xE4, 0xE6, 0x20, 0xB3, 0x60, 0x45, - 0x05, 0xE5, 0xC2, 0x18, 0xCD, 0x07, 0xA4, 0x40, 0x42, 0x91, 0xE2, 0xA4, 0x52, 0x54, 0x79, 0xBA, - 0xCD, 0x7E, 0x61, 0x2D, 0x7F, 0x7E, 0x69, 0x5E, 0xD7, 0xC0, 0x06, 0x65, 0xD7, 0xA1, 0xB8, 0xB8, - 0x1E, 0x31, 0x1C, 0xD3, 0xB7, 0xBC, 0x78, 0x3C, 0x01, 0xAF, 0x77, 0xAA, 0xF3, 0x0F, 0x4C, 0xF2, - 0xD1, 0x8B, 0x58, 0xC7, 0xEB, 0x99, 0x91, 0x53, 0x43, 0x71, 0x47, 0x99, 0x9E, 0x04, 0xA4, 0xEA, - 0xB8, 0xA3, 0xB0, 0x9E, 0x09, 0xF5, 0x57, 0x5C, 0xCF, 0x8A, 0xC6, 0xCA, 0x88, 0x51, 0x9A, 0x01, - 0x31, 0xCC, 0x0C, 0xA6, 0x53, 0xB5, 0x5F, 0xFD, 0x7D, 0x29, 0x3A, 0x35, 0xE9, 0x0E, 0x25, 0x6C -}; - - -// "cn/rto" Arto (RTO) -const static uint8_t test_output_rto[160] = { - 0x82, 0x66, 0x1E, 0x1C, 0x6E, 0x64, 0x36, 0x66, 0x84, 0x06, 0x32, 0x7A, 0x9B, 0xB1, 0x13, 0x19, - 0xA5, 0x56, 0x16, 0x15, 0xDF, 0xEC, 0x1C, 0x9E, 0xE3, 0x88, 0x4A, 0x6C, 0x1C, 0xEB, 0x76, 0xA5, - 0xB3, 0xFB, 0xF4, 0x3F, 0x2B, 0x6A, 0x3A, 0x39, 0xA3, 0x6E, 0x08, 0x33, 0x67, 0x90, 0x31, 0xB9, - 0x3F, 0x27, 0xE4, 0x79, 0x32, 0x61, 0x6B, 0x5C, 0x8A, 0xF8, 0xAF, 0xC0, 0x60, 0xFD, 0x83, 0xB7, - 0x11, 0x11, 0x89, 0xB4, 0xDC, 0xAE, 0x40, 0xC8, 0x64, 0xAA, 0x4D, 0x19, 0x23, 0x7B, 0xD3, 0x27, - 0xB2, 0x0F, 0xA7, 0x50, 0x7D, 0xCA, 0xF5, 0x03, 0x06, 0xB2, 0x26, 0x62, 0xF3, 0x68, 0x2D, 0x30, - 0x6F, 0x93, 0x1E, 0xFF, 0xCD, 0x85, 0x40, 0x28, 0x5F, 0xC3, 0x8C, 0x76, 0x51, 0x9E, 0xD5, 0x06, - 0x32, 0xD6, 0x35, 0x83, 0xF6, 0x3B, 0x54, 0x4F, 0xA1, 0x9C, 0x13, 0xD8, 0xC4, 0x0E, 0x01, 0x2F, - 0x29, 0xDB, 0x8C, 0x1C, 0xB7, 0x06, 0x86, 0x79, 0x6D, 0xFF, 0x9F, 0x89, 0x3B, 0x3A, 0xA5, 0x79, - 0xE7, 0x81, 0x4E, 0x2A, 0xBD, 0x62, 0xC1, 0x1B, 0x7C, 0xB9, 0x33, 0x7B, 0xEE, 0x95, 0x80, 0xB3 -}; - -// "cn/rwz" -const static uint8_t test_output_rwz[160] = { - 0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85, - 0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8, - 0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52, - 0x49, 0x54, 0x0b, 0x91, 0xea, 0x61, 0x7f, 0x98, 0x7d, 0x39, 0x17, 0xb7, 0xd7, 0x65, 0xff, 0x75, - 0x13, 0x21, 0x1d, 0xce, 0x61, 0x5a, 0xdc, 0x5f, 0x8c, 0xcb, 0x1f, 0x6f, 0xbb, 0x92, 0x88, 0xc3, - 0xe3, 0xe2, 0xfc, 0x4f, 0x62, 0xfb, 0xf0, 0x48, 0x02, 0x01, 0xd3, 0xbe, 0x77, 0x6a, 0x40, 0xca, - 0x9a, 0xe9, 0xba, 0x0c, 0xc0, 0x2b, 0x11, 0xf6, 0x9b, 0xee, 0x24, 0x3a, 0xd8, 0x86, 0x18, 0xd0, - 0xe8, 0xeb, 0xcb, 0x38, 0x2c, 0xf5, 0x99, 0x83, 0x14, 0x7b, 0x0c, 0x20, 0xbe, 0x50, 0xf4, 0x87, - 0x83, 0x41, 0x75, 0xd8, 0xd1, 0xdd, 0x4b, 0x73, 0xb3, 0x92, 0x8f, 0xe6, 0x1c, 0x72, 0x70, 0xf5, - 0x7c, 0xf6, 0x23, 0x3a, 0xb4, 0x5f, 0xdf, 0xde, 0xa6, 0x5a, 0x58, 0xec, 0x13, 0x5a, 0x23, 0x2f -}; - -// "cn/zls" -const static uint8_t test_output_zls[160] = { - 0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E, - 0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2, - 0x0D, 0x62, 0x5B, 0x42, 0x18, 0xE2, 0x76, 0xAD, 0xD0, 0x74, 0x90, 0x60, 0x8D, 0xC4, 0xC7, 0x80, - 0x17, 0xB5, 0x1B, 0x25, 0x31, 0x39, 0x87, 0xD2, 0x2D, 0x6A, 0x9D, 0x1C, 0x74, 0xF4, 0x43, 0x22, - 0x4B, 0x97, 0x1F, 0x6A, 0xD0, 0xBE, 0x00, 0x74, 0xEC, 0xC5, 0xD8, 0x3B, 0xE6, 0xF4, 0x03, 0x8A, - 0x7B, 0xBA, 0x80, 0xCC, 0x9F, 0x00, 0xCB, 0xC2, 0x14, 0x8F, 0xF3, 0xD8, 0x92, 0x73, 0xBF, 0x17, - 0x3D, 0x9B, 0x22, 0xA3, 0x61, 0x94, 0x41, 0x9E, 0xF9, 0x68, 0x1D, 0x42, 0x48, 0x3B, 0x39, 0x45, - 0xE2, 0xE6, 0x16, 0x84, 0xFC, 0x21, 0xE6, 0xDA, 0x38, 0x7F, 0x17, 0xAB, 0xD3, 0xF2, 0xCE, 0x1A, - 0x2F, 0x35, 0xD5, 0x74, 0xFA, 0x45, 0x3B, 0x06, 0xD1, 0x4E, 0x84, 0x3A, 0x5D, 0xE3, 0x0E, 0xA5, - 0x00, 0x08, 0x64, 0xF0, 0xA6, 0xC8, 0x94, 0x45, 0x08, 0xED, 0x03, 0x95, 0x52, 0xE9, 0xBC, 0x5F -}; - -// "cn/double" -const static uint8_t test_output_double[160] = { - 0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9, - 0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21, - 0x49, 0xCE, 0x8E, 0xF3, 0xBC, 0x8A, 0x36, 0xBF, 0x86, 0x37, 0x89, 0x55, 0x09, 0xBA, 0x22, 0xF8, - 0xEB, 0x3A, 0xE1, 0xDC, 0x91, 0xF7, 0x62, 0x4B, 0x9F, 0x48, 0xE6, 0x92, 0xBD, 0xE4, 0x5D, 0xC1, - 0xF1, 0x3C, 0x63, 0x1D, 0xEB, 0x0B, 0x04, 0xA3, 0x30, 0xD5, 0x11, 0x15, 0x4C, 0xCE, 0xEF, 0x4F, - 0xDF, 0x69, 0xE3, 0x9E, 0xD2, 0x68, 0xFC, 0x1B, 0x6F, 0xE8, 0x08, 0x9C, 0xBB, 0xA5, 0x2B, 0x60, - 0x52, 0x0F, 0xE5, 0xD2, 0xF3, 0x8A, 0xB3, 0xE1, 0x76, 0x7F, 0x44, 0x25, 0x76, 0xEC, 0xFF, 0xA2, - 0x0C, 0x64, 0xD0, 0x0E, 0x32, 0x33, 0x28, 0x20, 0x73, 0xE0, 0x31, 0x66, 0x4E, 0x54, 0x83, 0x49, - 0x51, 0x55, 0x4D, 0x2E, 0x22, 0xB7, 0x51, 0x09, 0x73, 0x61, 0x7E, 0x6A, 0x57, 0x0B, 0x28, 0x3C, - 0x5E, 0x2E, 0xC1, 0x80, 0x89, 0x39, 0xB3, 0x54, 0x39, 0x52, 0x0E, 0x69, 0x3D, 0xF6, 0xC5, 0x4A -}; - -#ifndef XMRIG_NO_AEON -// "cn-lite/0" -const static uint8_t test_output_v0_lite[160] = { - 0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E, - 0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88, - 0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE, - 0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD, - 0x38, 0x08, 0xE1, 0x17, 0x0B, 0x99, 0x8D, 0x1A, 0x3C, 0xCE, 0x35, 0xC5, 0xC7, 0x3A, 0x00, 0x2E, - 0xCB, 0x54, 0xF0, 0x78, 0x2E, 0x9E, 0xDB, 0xC7, 0xDF, 0x2E, 0x71, 0x9A, 0x16, 0x97, 0xC4, 0x18, - 0x4B, 0x97, 0x07, 0xFE, 0x5D, 0x98, 0x9A, 0xD6, 0xD8, 0xE5, 0x92, 0x66, 0x87, 0x7F, 0x19, 0x37, - 0xA2, 0x5E, 0xE6, 0x96, 0xB5, 0x97, 0x33, 0x89, 0xE0, 0xA7, 0xC9, 0xDD, 0x4A, 0x7E, 0x9E, 0x53, - 0xBE, 0x91, 0x2B, 0xF5, 0xF5, 0xAF, 0xDD, 0x09, 0xA2, 0xF4, 0xA4, 0x56, 0xEB, 0x96, 0x22, 0xC9, - 0x94, 0xFB, 0x7B, 0x28, 0xC9, 0x97, 0x65, 0x04, 0xAC, 0x4F, 0x84, 0x71, 0xDA, 0x6E, 0xD8, 0xC5 -}; - - -// "cn-lite/1" AEON v7 -const static uint8_t test_output_v1_lite[160] = { - 0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22, - 0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41, - 0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45, - 0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F, - 0x16, 0x08, 0x74, 0xC7, 0xA2, 0xD2, 0xA3, 0x97, 0x95, 0x76, 0xCA, 0x4D, 0x06, 0x39, 0x7A, 0xAB, - 0x6C, 0x87, 0x58, 0x33, 0x4D, 0xC8, 0x5A, 0xAB, 0x04, 0x27, 0xFE, 0x8B, 0x1C, 0x23, 0x2F, 0x32, - 0xC0, 0x44, 0xFF, 0x0D, 0xB5, 0x3B, 0x27, 0x96, 0x06, 0x89, 0x7B, 0xA3, 0x0B, 0xD0, 0xCE, 0x9E, - 0x90, 0x22, 0x77, 0x5A, 0xAD, 0xA1, 0xE5, 0xB6, 0xFC, 0xCB, 0x39, 0x7E, 0x2B, 0x10, 0xEE, 0xB4, - 0x8C, 0x2B, 0xA4, 0x1F, 0x60, 0x76, 0x39, 0xD7, 0xF6, 0x46, 0x77, 0x18, 0x20, 0xAD, 0xD4, 0xC9, - 0x87, 0xF7, 0x37, 0xDA, 0xFD, 0xBA, 0xBA, 0xD2, 0xF2, 0x68, 0xDC, 0x26, 0x8D, 0x1B, 0x08, 0xC6 -}; -#endif - - -#ifndef XMRIG_NO_SUMO -// "cn-heavy/0" -const static uint8_t test_output_v0_heavy[160] = { - 0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64, - 0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2, - 0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A, - 0x1F, 0x1A, 0xA2, 0x5B, 0xFC, 0x0A, 0xAD, 0x82, 0xDE, 0xA8, 0x99, 0x96, 0x88, 0x52, 0xD2, 0x7D, - 0x3E, 0xE1, 0x23, 0x03, 0x5A, 0x63, 0x7B, 0x66, 0xF6, 0xD7, 0xC2, 0x2A, 0x34, 0x5E, 0x88, 0xE7, - 0xFA, 0xC4, 0x25, 0x36, 0x54, 0xCB, 0xD2, 0x5C, 0x2F, 0x80, 0x2A, 0xF9, 0xCC, 0x43, 0xF7, 0xCD, - 0xE5, 0x18, 0xA8, 0x05, 0x60, 0x18, 0xA5, 0x73, 0x72, 0x9B, 0x32, 0xDC, 0x69, 0x83, 0xC1, 0xE1, - 0x1F, 0xDB, 0xDA, 0x6B, 0xAC, 0xEC, 0x9F, 0x67, 0xF8, 0x27, 0x1D, 0xC7, 0xE6, 0x46, 0x42, 0xF9, - 0x53, 0x62, 0x0A, 0x54, 0x7D, 0x43, 0xEA, 0x18, 0x94, 0xED, 0xD8, 0x92, 0x06, 0x6A, 0xA1, 0x51, - 0xAD, 0xB1, 0xFD, 0x89, 0xFB, 0x5C, 0xB4, 0x25, 0x6A, 0xDD, 0xB0, 0x09, 0xC5, 0x72, 0x87, 0xEB -}; - - -// "cn-heavy/xhv" -const static uint8_t test_output_xhv_heavy[160] = { - 0x5A, 0xC3, 0xF7, 0x85, 0xC4, 0x90, 0xC5, 0x85, 0x50, 0xEC, 0x95, 0xD2, 0x72, 0x65, 0x63, 0x57, - 0x7E, 0x7C, 0x1C, 0x21, 0x2D, 0x0C, 0xDE, 0x59, 0x12, 0x73, 0x20, 0x1E, 0x44, 0xFD, 0xD5, 0xB6, - 0x1F, 0x4E, 0xB2, 0x0A, 0x36, 0x51, 0x4B, 0xF5, 0x4D, 0xC9, 0xE0, 0x90, 0x2C, 0x16, 0x47, 0x3F, - 0xDE, 0x18, 0x29, 0x8E, 0xBB, 0x34, 0x2B, 0xEF, 0x7A, 0x04, 0x22, 0xD1, 0xB1, 0xF2, 0x48, 0xDA, - 0xE3, 0x7F, 0x4B, 0x4C, 0xB4, 0xDF, 0xE8, 0xD3, 0x70, 0xE2, 0xE7, 0x44, 0x25, 0x87, 0x12, 0xF9, - 0x8F, 0x28, 0x0B, 0xCE, 0x2C, 0xEE, 0xDD, 0x88, 0x94, 0x35, 0x48, 0x51, 0xAE, 0xC8, 0x9C, 0x0B, - 0xED, 0x2F, 0xE6, 0x0F, 0x39, 0x05, 0xB4, 0x4A, 0x8F, 0x38, 0x44, 0x2D, 0x4B, 0xE9, 0x7B, 0x81, - 0xC6, 0xB0, 0xE0, 0x0A, 0x39, 0x8C, 0x38, 0xFE, 0x63, 0x31, 0x47, 0x65, 0x0D, 0x2B, 0xF4, 0x96, - 0x13, 0x91, 0x89, 0xB4, 0x5B, 0xA9, 0x2A, 0x7A, 0x09, 0x65, 0x14, 0x20, 0x76, 0x24, 0x6C, 0x80, - 0x1D, 0x3F, 0x9F, 0xCD, 0x68, 0x39, 0xA9, 0x42, 0x27, 0xC1, 0x0C, 0x53, 0x98, 0x35, 0x60, 0x7A -}; - - -// "cn-heavy/tube" -const static uint8_t test_output_tube_heavy[160] = { - 0xFE, 0x53, 0x35, 0x20, 0x76, 0xEA, 0xE6, 0x89, 0xFA, 0x3B, 0x4F, 0xDA, 0x61, 0x46, 0x34, 0xCF, - 0xC3, 0x12, 0xEE, 0x0C, 0x38, 0x7D, 0xF2, 0xB8, 0xB7, 0x4D, 0xA2, 0xA1, 0x59, 0x74, 0x12, 0x35, - 0xCD, 0x3F, 0x29, 0xDF, 0x07, 0x4A, 0x14, 0xAD, 0x0B, 0x98, 0x99, 0x37, 0xCA, 0x14, 0x68, 0xA3, - 0x8D, 0xAE, 0x86, 0xC1, 0xA3, 0x54, 0x05, 0xBE, 0xEA, 0x6D, 0x29, 0x24, 0x0C, 0x82, 0x97, 0x74, - 0xA0, 0x64, 0x77, 0xCD, 0x8D, 0x8A, 0xC3, 0x10, 0xB4, 0x89, 0x0E, 0xBB, 0x7D, 0xE6, 0x32, 0x8F, - 0xF4, 0x2D, 0xB6, 0x9E, 0x8A, 0xF9, 0xF8, 0xEE, 0x2C, 0xD0, 0x74, 0xED, 0xA9, 0xAA, 0xA1, 0xFB, - 0xE2, 0xC9, 0x89, 0x66, 0xD6, 0x66, 0x52, 0xA2, 0x16, 0xDA, 0x36, 0xA0, 0x10, 0x62, 0xD2, 0xB1, - 0x76, 0xD1, 0x31, 0xE9, 0x1C, 0x08, 0xB6, 0xCA, 0xAF, 0x89, 0xB9, 0x3D, 0x2C, 0xFA, 0x9A, 0x30, - 0x74, 0x6A, 0x96, 0xA1, 0x95, 0x6C, 0xBB, 0x46, 0x4D, 0xE0, 0xEB, 0x28, 0xBE, 0x2A, 0x8C, 0x34, - 0x57, 0x79, 0xBE, 0x52, 0xFB, 0xBC, 0x68, 0x43, 0x45, 0xF4, 0xDF, 0xA5, 0xA8, 0xFD, 0x55, 0xA6 -}; -#endif - - -#ifndef XMRIG_NO_CN_PICO -// "cn-pico/trtl" -const static uint8_t test_output_pico_trtl[160] = { - 0x08, 0xF4, 0x21, 0xD7, 0x83, 0x31, 0x17, 0x30, 0x0E, 0xDA, 0x66, 0xE9, 0x8F, 0x4A, 0x25, 0x69, - 0x09, 0x3D, 0xF3, 0x00, 0x50, 0x01, 0x73, 0x94, 0x4E, 0xFC, 0x40, 0x1E, 0x9A, 0x4A, 0x17, 0xAF, - 0xB2, 0x17, 0x2E, 0xC9, 0x46, 0x6E, 0x1A, 0xEE, 0x70, 0xEC, 0x85, 0x72, 0xA1, 0x4C, 0x23, 0x3E, - 0xE3, 0x54, 0x58, 0x2B, 0xCB, 0x93, 0xF8, 0x69, 0xD4, 0x29, 0x74, 0x4D, 0xE5, 0x72, 0x6A, 0x26, - 0x4E, 0xFD, 0x28, 0xFC, 0xD3, 0x74, 0x8A, 0x83, 0xF3, 0xCA, 0x92, 0x84, 0xE7, 0x4E, 0x10, 0xC2, - 0x05, 0x62, 0xC7, 0xBE, 0x99, 0x73, 0xED, 0x90, 0xB5, 0x6F, 0xDA, 0x64, 0x71, 0x2D, 0x99, 0x39, - 0x29, 0xDB, 0x22, 0x2B, 0x97, 0xB6, 0x37, 0x0E, 0x9A, 0x03, 0x65, 0xCC, 0xF7, 0xD0, 0x9A, 0xB7, - 0x68, 0xCE, 0x07, 0x3E, 0x15, 0x40, 0x3C, 0xCE, 0x8C, 0x63, 0x16, 0x72, 0xB5, 0x74, 0x84, 0xF4, - 0xA1, 0xE7, 0x53, 0x85, 0xFB, 0x72, 0xDD, 0x75, 0x90, 0x39, 0xB2, 0x3D, 0xC3, 0x08, 0x2C, 0xD5, - 0x01, 0x08, 0x27, 0x75, 0x86, 0xB9, 0xBB, 0x9B, 0xDF, 0xEA, 0x49, 0xDE, 0x46, 0xCB, 0x83, 0x45 -}; -#endif - - -#ifndef XMRIG_NO_CN_GPU -// "cn/gpu" -const static uint8_t test_output_gpu[160] = { - 0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7, - 0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; -#endif - - -#endif /* XMRIG_CRYPTONIGHT_TEST_H */ diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h deleted file mode 100644 index 202b662a..00000000 --- a/src/crypto/CryptoNight_x86.h +++ /dev/null @@ -1,1481 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CRYPTONIGHT_X86_H -#define XMRIG_CRYPTONIGHT_X86_H - - -#ifdef __GNUC__ -# include -#else -# include -# define __restrict__ __restrict -#endif - - -#include "common/cpu/Cpu.h" -#include "common/crypto/keccak.h" -#include "crypto/CryptoNight.h" -#include "crypto/CryptoNight_constants.h" -#include "crypto/CryptoNight_monero.h" -#include "crypto/soft_aes.h" - - -extern "C" -{ -#include "crypto/c_groestl.h" -#include "crypto/c_blake256.h" -#include "crypto/c_jh.h" -#include "crypto/c_skein.h" -} - - -static inline void do_blake_hash(const uint8_t *input, size_t len, uint8_t *output) { - blake256_hash(output, input, len); -} - - -static inline void do_groestl_hash(const uint8_t *input, size_t len, uint8_t *output) { - groestl(input, len * 8, output); -} - - -static inline void do_jh_hash(const uint8_t *input, size_t len, uint8_t *output) { - jh_hash(32 * 8, input, 8 * len, output); -} - - -static inline void do_skein_hash(const uint8_t *input, size_t len, uint8_t *output) { - xmr_skein(input, output); -} - - -void (* const extra_hashes[4])(const uint8_t *, size_t, uint8_t *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash}; - - -#if defined(__x86_64__) || defined(_M_AMD64) -# ifdef __GNUC__ -static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi) -{ - unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b; - *hi = r >> 64; - return (uint64_t) r; -} -# else - #define __umul128 _umul128 -# endif -#elif defined(__i386__) || defined(_M_IX86) -static inline int64_t _mm_cvtsi128_si64(__m128i a) -{ - return ((uint64_t)(uint32_t)_mm_cvtsi128_si32(a) | ((uint64_t)(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4)) << 32)); -} - -static inline __m128i _mm_cvtsi64_si128(int64_t a) { - return _mm_set_epi64x(0, a); -} - -static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) { - // multiplier = ab = a * 2^32 + b - // multiplicand = cd = c * 2^32 + d - // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d - uint64_t a = multiplier >> 32; - uint64_t b = multiplier & 0xFFFFFFFF; - uint64_t c = multiplicand >> 32; - uint64_t d = multiplicand & 0xFFFFFFFF; - - //uint64_t ac = a * c; - uint64_t ad = a * d; - //uint64_t bc = b * c; - uint64_t bd = b * d; - - uint64_t adbc = ad + (b * c); - uint64_t adbc_carry = adbc < ad ? 1 : 0; - - // multiplier * multiplicand = product_hi * 2^64 + product_lo - uint64_t product_lo = bd + (adbc << 32); - uint64_t product_lo_carry = product_lo < bd ? 1 : 0; - *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; - - return product_lo; -} -#endif - - -// This will shift and xor tmp1 into itself as 4 32-bit vals such as -// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1) -static inline __m128i sl_xor(__m128i tmp1) -{ - __m128i tmp4; - tmp4 = _mm_slli_si128(tmp1, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - tmp4 = _mm_slli_si128(tmp4, 0x04); - tmp1 = _mm_xor_si128(tmp1, tmp4); - return tmp1; -} - - -template -static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2) -{ - __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon); - xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem - *xout0 = sl_xor(*xout0); - *xout0 = _mm_xor_si128(*xout0, xout1); - xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00); - xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem - *xout2 = sl_xor(*xout2); - *xout2 = _mm_xor_si128(*xout2, xout1); -} - - -template -static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2) -{ - __m128i xout1 = soft_aeskeygenassist(*xout2); - xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem - *xout0 = sl_xor(*xout0); - *xout0 = _mm_xor_si128(*xout0, xout1); - xout1 = soft_aeskeygenassist<0x00>(*xout0); - xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem - *xout2 = sl_xor(*xout2); - *xout2 = _mm_xor_si128(*xout2, xout1); -} - - -template -static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) -{ - __m128i xout0 = _mm_load_si128(memory); - __m128i xout2 = _mm_load_si128(memory + 1); - *k0 = xout0; - *k1 = xout2; - - SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : aes_genkey_sub<0x01>(&xout0, &xout2); - *k2 = xout0; - *k3 = xout2; - - SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : aes_genkey_sub<0x02>(&xout0, &xout2); - *k4 = xout0; - *k5 = xout2; - - SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : aes_genkey_sub<0x04>(&xout0, &xout2); - *k6 = xout0; - *k7 = xout2; - - SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : aes_genkey_sub<0x08>(&xout0, &xout2); - *k8 = xout0; - *k9 = xout2; -} - - -static FORCEINLINE void soft_aesenc(void* __restrict ptr, const void* __restrict key, const uint32_t* __restrict t) -{ - uint32_t x0 = ((const uint32_t*)(ptr))[0]; - uint32_t x1 = ((const uint32_t*)(ptr))[1]; - uint32_t x2 = ((const uint32_t*)(ptr))[2]; - uint32_t x3 = ((const uint32_t*)(ptr))[3]; - - uint32_t y0 = t[x0 & 0xff]; x0 >>= 8; - uint32_t y1 = t[x1 & 0xff]; x1 >>= 8; - uint32_t y2 = t[x2 & 0xff]; x2 >>= 8; - uint32_t y3 = t[x3 & 0xff]; x3 >>= 8; - t += 256; - - y0 ^= t[x1 & 0xff]; x1 >>= 8; - y1 ^= t[x2 & 0xff]; x2 >>= 8; - y2 ^= t[x3 & 0xff]; x3 >>= 8; - y3 ^= t[x0 & 0xff]; x0 >>= 8; - t += 256; - - y0 ^= t[x2 & 0xff]; x2 >>= 8; - y1 ^= t[x3 & 0xff]; x3 >>= 8; - y2 ^= t[x0 & 0xff]; x0 >>= 8; - y3 ^= t[x1 & 0xff]; x1 >>= 8; - t += 256; - - y0 ^= t[x3]; - y1 ^= t[x0]; - y2 ^= t[x1]; - y3 ^= t[x2]; - - ((uint32_t*)ptr)[0] = y0 ^ ((uint32_t*)key)[0]; - ((uint32_t*)ptr)[1] = y1 ^ ((uint32_t*)key)[1]; - ((uint32_t*)ptr)[2] = y2 ^ ((uint32_t*)key)[2]; - ((uint32_t*)ptr)[3] = y3 ^ ((uint32_t*)key)[3]; -} - -static FORCEINLINE __m128i soft_aesenc(const void* __restrict ptr, const __m128i key, const uint32_t* __restrict t) -{ - uint32_t x0 = ((const uint32_t*)(ptr))[0]; - uint32_t x1 = ((const uint32_t*)(ptr))[1]; - uint32_t x2 = ((const uint32_t*)(ptr))[2]; - uint32_t x3 = ((const uint32_t*)(ptr))[3]; - - uint32_t y0 = t[x0 & 0xff]; x0 >>= 8; - uint32_t y1 = t[x1 & 0xff]; x1 >>= 8; - uint32_t y2 = t[x2 & 0xff]; x2 >>= 8; - uint32_t y3 = t[x3 & 0xff]; x3 >>= 8; - t += 256; - - y0 ^= t[x1 & 0xff]; x1 >>= 8; - y1 ^= t[x2 & 0xff]; x2 >>= 8; - y2 ^= t[x3 & 0xff]; x3 >>= 8; - y3 ^= t[x0 & 0xff]; x0 >>= 8; - t += 256; - - y0 ^= t[x2 & 0xff]; x2 >>= 8; - y1 ^= t[x3 & 0xff]; x3 >>= 8; - y2 ^= t[x0 & 0xff]; x0 >>= 8; - y3 ^= t[x1 & 0xff]; x1 >>= 8; - - y0 ^= t[x3 + 256]; - y1 ^= t[x0 + 256]; - y2 ^= t[x1 + 256]; - y3 ^= t[x2 + 256]; - - return _mm_xor_si128(_mm_set_epi32(y3, y2, y1, y0), key); -} - -template -void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7); - -template<> -NOINLINE void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - *x0 = soft_aesenc((uint32_t*)x0, key, (const uint32_t*)saes_table); - *x1 = soft_aesenc((uint32_t*)x1, key, (const uint32_t*)saes_table); - *x2 = soft_aesenc((uint32_t*)x2, key, (const uint32_t*)saes_table); - *x3 = soft_aesenc((uint32_t*)x3, key, (const uint32_t*)saes_table); - *x4 = soft_aesenc((uint32_t*)x4, key, (const uint32_t*)saes_table); - *x5 = soft_aesenc((uint32_t*)x5, key, (const uint32_t*)saes_table); - *x6 = soft_aesenc((uint32_t*)x6, key, (const uint32_t*)saes_table); - *x7 = soft_aesenc((uint32_t*)x7, key, (const uint32_t*)saes_table); -} - -template<> -FORCEINLINE void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) -{ - *x0 = _mm_aesenc_si128(*x0, key); - *x1 = _mm_aesenc_si128(*x1, key); - *x2 = _mm_aesenc_si128(*x2, key); - *x3 = _mm_aesenc_si128(*x3, key); - *x4 = _mm_aesenc_si128(*x4, key); - *x5 = _mm_aesenc_si128(*x5, key); - *x6 = _mm_aesenc_si128(*x6, key); - *x7 = _mm_aesenc_si128(*x7, key); -} - -inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7) -{ - __m128i tmp0 = x0; - x0 = _mm_xor_si128(x0, x1); - x1 = _mm_xor_si128(x1, x2); - x2 = _mm_xor_si128(x2, x3); - x3 = _mm_xor_si128(x3, x4); - x4 = _mm_xor_si128(x4, x5); - x5 = _mm_xor_si128(x5, x6); - x6 = _mm_xor_si128(x6, x7); - x7 = _mm_xor_si128(x7, tmp0); -} - - -template -static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output) -{ - __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xin0 = _mm_load_si128(input + 4); - xin1 = _mm_load_si128(input + 5); - xin2 = _mm_load_si128(input + 6); - xin3 = _mm_load_si128(input + 7); - xin4 = _mm_load_si128(input + 8); - xin5 = _mm_load_si128(input + 9); - xin6 = _mm_load_si128(input + 10); - xin7 = _mm_load_si128(input + 11); - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - for (size_t i = 0; i < 16; i++) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7); - } - } - - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { - aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7); - - _mm_store_si128(output + i + 0, xin0); - _mm_store_si128(output + i + 1, xin1); - _mm_store_si128(output + i + 2, xin2); - _mm_store_si128(output + i + 3, xin3); - _mm_store_si128(output + i + 4, xin4); - _mm_store_si128(output + i + 5, xin5); - _mm_store_si128(output + i + 6, xin6); - _mm_store_si128(output + i + 7, xin7); - } -} - - -#ifndef XMRIG_NO_CN_GPU -template -void cn_explode_scratchpad_gpu(const uint8_t *input, uint8_t *output) -{ - constexpr size_t hash_size = 200; // 25x8 bytes - alignas(16) uint64_t hash[25]; - - for (uint64_t i = 0; i < MEM / 512; i++) - { - memcpy(hash, input, hash_size); - hash[0] ^= i; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 160); - output += 160; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 176); - output += 176; - - xmrig::keccakf(hash, 24); - memcpy(output, hash, 176); - output += 176; - } -} -#endif - - -template -static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output) -{ - __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7; - __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9; - - aes_genkey(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9); - - xout0 = _mm_load_si128(output + 4); - xout1 = _mm_load_si128(output + 5); - xout2 = _mm_load_si128(output + 6); - xout3 = _mm_load_si128(output + 7); - xout4 = _mm_load_si128(output + 8); - xout5 = _mm_load_si128(output + 9); - xout6 = _mm_load_si128(output + 10); - xout7 = _mm_load_si128(output + 11); - - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) - { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - } - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { - xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0); - xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1); - xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2); - xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3); - xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4); - xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5); - xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6); - xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7); - - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - - for (size_t i = 0; i < 16; i++) { - aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); - - mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); - } - } - - _mm_store_si128(output + 4, xout0); - _mm_store_si128(output + 5, xout1); - _mm_store_si128(output + 6, xout2); - _mm_store_si128(output + 7, xout3); - _mm_store_si128(output + 8, xout4); - _mm_store_si128(output + 9, xout5); - _mm_store_si128(output + 10, xout6); - _mm_store_si128(output + 11, xout7); -} - - -static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key) -{ - alignas(16) uint32_t k[4]; - alignas(16) uint32_t x[4]; - - _mm_store_si128((__m128i*) k, key); - _mm_store_si128((__m128i*) x, _mm_xor_si128(in, _mm_set_epi64x(0xffffffffffffffff, 0xffffffffffffffff))); - - #define BYTE(p, i) ((unsigned char*)&x[p])[i] - k[0] ^= saes_table[0][BYTE(0, 0)] ^ saes_table[1][BYTE(1, 1)] ^ saes_table[2][BYTE(2, 2)] ^ saes_table[3][BYTE(3, 3)]; - x[0] ^= k[0]; - k[1] ^= saes_table[0][BYTE(1, 0)] ^ saes_table[1][BYTE(2, 1)] ^ saes_table[2][BYTE(3, 2)] ^ saes_table[3][BYTE(0, 3)]; - x[1] ^= k[1]; - k[2] ^= saes_table[0][BYTE(2, 0)] ^ saes_table[1][BYTE(3, 1)] ^ saes_table[2][BYTE(0, 2)] ^ saes_table[3][BYTE(1, 3)]; - x[2] ^= k[2]; - k[3] ^= saes_table[0][BYTE(3, 0)] ^ saes_table[1][BYTE(0, 1)] ^ saes_table[2][BYTE(1, 2)] ^ saes_table[3][BYTE(2, 3)]; - #undef BYTE - - return _mm_load_si128((__m128i*)k); -} - - -static inline __m128i int_sqrt_v2(const uint64_t n0) -{ - __m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(n0 >> 12), _mm_set_epi64x(0, 1023ULL << 52))); - x = _mm_sqrt_sd(_mm_setzero_pd(), x); - uint64_t r = static_cast(_mm_cvtsi128_si64(_mm_castpd_si128(x))); - - const uint64_t s = r >> 20; - r >>= 19; - - uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1); -# if (defined(_MSC_VER) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 1)) && (defined(__x86_64__) || defined(_M_AMD64)) - _addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r); -# else - if (x2 < n0) ++r; -# endif - - return _mm_cvtsi64_si128(r); -} - - -template -static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx) -{ - if (BASE == xmrig::VARIANT_2) { - VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - _mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx)); - } else { - __m128i tmp = _mm_xor_si128(bx0, cx); - mem_out[0] = _mm_cvtsi128_si64(tmp); - - tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp))); - uint64_t vh = _mm_cvtsi128_si64(tmp); - - uint8_t x = static_cast(vh >> 24); - static const uint16_t table = 0x7531; - const uint8_t index = (((x >> (VARIANT == xmrig::VARIANT_XTL ? 4 : 3)) & 6) | (x & 1)) << 1; - vh ^= ((table >> index) & 0x3) << 28; - - mem_out[1] = vh; - } -} - -void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); -void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); - -template -inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant"); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32); - return; - } - - xmrig::keccak(input, size, ctx[0]->state); - - cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - -#ifndef XMRIG_NO_ASM - if (SOFT_AES && xmrig::cn_is_cryptonight_r()) - { - if (!ctx[0]->generated_code_data.match(VARIANT, height)) { - V4_Instruction code[256]; - const int code_size = v4_random_math_init(code, height); - - if (VARIANT == xmrig::VARIANT_WOW) - wow_soft_aes_compile_code(code, code_size, reinterpret_cast(ctx[0]->generated_code), xmrig::ASM_NONE); - else if (VARIANT == xmrig::VARIANT_4) - v4_soft_aes_compile_code(code, code_size, reinterpret_cast(ctx[0]->generated_code), xmrig::ASM_NONE); - - ctx[0]->generated_code_data.variant = VARIANT; - ctx[0]->generated_code_data.height = height; - } - - ctx[0]->saes_table = (const uint32_t*)saes_table; - ctx[0]->generated_code(ctx); - } else { -#endif - - const uint8_t* l0 = ctx[0]->memory; - - VARIANT1_INIT(0); - VARIANT2_INIT(0); - VARIANT2_SET_ROUNDING_MODE(); - VARIANT4_RANDOM_MATH_INIT(0); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); - - uint64_t idx0 = al0; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx; - if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { - cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - } - - const __m128i ax0 = _mm_set_epi64x(ah0, al0); - if (VARIANT == xmrig::VARIANT_TUBE) { - cx = aes_round_tweak_div(cx, ax0); - } - else if (SOFT_AES) { - cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table); - } - else { - cx = _mm_aesenc_si128(cx, ax0); - } - - if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) { - cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx0, bx1, cx); - } else { - _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); - } - - idx0 = _mm_cvtsi128_si64(cx); - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1); - if (VARIANT == xmrig::VARIANT_4) { - al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); - ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(0, cl, cx); - } - } - - lo = __umul128(idx0, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0); - } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & MASK])[0] = al0; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; - } else { - ((uint64_t*)&l0[idx0 & MASK])[1] = ah0; - } - - al0 ^= cl; - ah0 ^= ch; - idx0 = al0; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; - int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; - int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - d = ~d; - } - - idx0 = d ^ q; - } - - if (BASE == xmrig::VARIANT_2) { - bx1 = bx0; - } - - bx0 = cx; - } - -#ifndef XMRIG_NO_ASM - } -#endif - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - - xmrig::keccakf(h0, 24); - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); -} - - -#ifndef XMRIG_NO_CN_GPU -template -void cn_gpu_inner_avx(const uint8_t *spad, uint8_t *lpad); - - -template -void cn_gpu_inner_ssse3(const uint8_t *spad, uint8_t *lpad); - - -template -inline void cryptonight_single_hash_gpu(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::CRYPTONIGHT_GPU_MASK; - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - - static_assert(MASK > 0 && ITERATIONS > 0 && MEM > 0, "unsupported algorithm/variant"); - - xmrig::keccak(input, size, ctx[0]->state); - cn_explode_scratchpad_gpu(ctx[0]->state, ctx[0]->memory); - -# ifdef _MSC_VER - _control87(RC_NEAR, MCW_RC); -# else - fesetround(FE_TONEAREST); -# endif - - if (xmrig::Cpu::info()->hasAVX2()) { - cn_gpu_inner_avx(ctx[0]->state, ctx[0]->memory); - } else { - cn_gpu_inner_ssse3(ctx[0]->state, ctx[0]->memory); - } - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - - xmrig::keccakf((uint64_t*) ctx[0]->state, 24); - memcpy(output, ctx[0]->state, 32); -} -#endif - - -#ifndef XMRIG_NO_ASM -extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx); - -extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm; - -extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm; - -extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm; - -extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm; -extern xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm; - -void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); -void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); -void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); -void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); - -template -void cn_r_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - v4_compile_code(code, code_size, machine_code, ASM); -} - -template -void cn_r_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - v4_compile_code_double(code, code_size, machine_code, ASM); -} - -template<> -void cn_r_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - wow_compile_code(code, code_size, machine_code, ASM); -} - -template<> -void cn_r_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - wow_compile_code_double(code, code_size, machine_code, ASM); -} - -template -inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MEM = xmrig::cn_select_memory(); - - if (xmrig::cn_is_cryptonight_r() && !ctx[0]->generated_code_data.match(VARIANT, height)) { - V4_Instruction code[256]; - const int code_size = v4_random_math_init(code, height); - cn_r_compile_code(code, code_size, reinterpret_cast(ctx[0]->generated_code), ASM); - ctx[0]->generated_code_data.variant = VARIANT; - ctx[0]->generated_code_data.height = height; - } - - xmrig::keccak(input, size, ctx[0]->state); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory)); - - if (VARIANT == xmrig::VARIANT_2) { - if (ASM == xmrig::ASM_INTEL) { - cnv2_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cnv2_mainloop_ryzen_asm(ctx); - } - else { - cnv2_mainloop_bulldozer_asm(ctx); - } - } - else if (VARIANT == xmrig::VARIANT_HALF) { - if (ASM == xmrig::ASM_INTEL) { - cn_half_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cn_half_mainloop_ryzen_asm(ctx); - } - else { - cn_half_mainloop_bulldozer_asm(ctx); - } - } - else if (VARIANT == xmrig::VARIANT_TRTL) { - if (ASM == xmrig::ASM_INTEL) { - cn_trtl_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cn_trtl_mainloop_ryzen_asm(ctx); - } - else { - cn_trtl_mainloop_bulldozer_asm(ctx); - } - } - else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_mainloop_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_ZLS) { - if (ASM == xmrig::ASM_INTEL) { - cn_zls_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cn_zls_mainloop_ryzen_asm(ctx); - } - else { - cn_zls_mainloop_bulldozer_asm(ctx); - } - } - else if (VARIANT == xmrig::VARIANT_DOUBLE) { - if (ASM == xmrig::ASM_INTEL) { - cn_double_mainloop_ivybridge_asm(ctx); - } - else if (ASM == xmrig::ASM_RYZEN) { - cn_double_mainloop_ryzen_asm(ctx); - } - else { - cn_double_mainloop_bulldozer_asm(ctx); - } - } - else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code(ctx); - } - - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); - xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24); - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); -} - - -template -inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MEM = xmrig::cn_select_memory(); - - if (xmrig::cn_is_cryptonight_r() && !ctx[0]->generated_code_double_data.match(VARIANT, height)) { - V4_Instruction code[256]; - const int code_size = v4_random_math_init(code, height); - cn_r_compile_code_double(code, code_size, reinterpret_cast(ctx[0]->generated_code_double), ASM); - ctx[0]->generated_code_double_data.variant = VARIANT; - ctx[0]->generated_code_double_data.height = height; - } - - xmrig::keccak(input, size, ctx[0]->state); - xmrig::keccak(input + size, size, ctx[1]->state); - - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory)); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory)); - - if (VARIANT == xmrig::VARIANT_2) { - cnv2_double_mainloop_sandybridge_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_HALF) { - cn_half_double_mainloop_sandybridge_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_TRTL) { - cn_trtl_double_mainloop_sandybridge_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_RWZ) { - cnv2_rwz_double_mainloop_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_ZLS) { - cn_zls_double_mainloop_sandybridge_asm(ctx); - } - else if (VARIANT == xmrig::VARIANT_DOUBLE) { - cn_double_double_mainloop_sandybridge_asm(ctx); - } - else if (xmrig::cn_is_cryptonight_r()) { - ctx[0]->generated_code_double(ctx); - } - - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state)); - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state)); - - xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24); - xmrig::keccakf(reinterpret_cast(ctx[1]->state), 24); - - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); - extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); -} -#endif - - -template -inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 64); - return; - } - - xmrig::keccak(input, size, ctx[0]->state); - xmrig::keccak(input + size, size, ctx[1]->state); - - const uint8_t* l0 = ctx[0]->memory; - const uint8_t* l1 = ctx[1]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - - VARIANT1_INIT(0); - VARIANT1_INIT(1); - VARIANT2_INIT(0); - VARIANT2_INIT(1); - VARIANT2_SET_ROUNDING_MODE(); - VARIANT4_RANDOM_MATH_INIT(0); - VARIANT4_RANDOM_MATH_INIT(1); - - cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0); - cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1); - - uint64_t al0 = h0[0] ^ h0[4]; - uint64_t al1 = h1[0] ^ h1[4]; - uint64_t ah0 = h0[1] ^ h0[5]; - uint64_t ah1 = h1[1] ^ h1[5]; - - __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); - __m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); - __m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); - __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]); - - uint64_t idx0 = al0; - uint64_t idx1 = al1; - - for (size_t i = 0; i < ITERATIONS; i++) { - __m128i cx0, cx1; - if (VARIANT == xmrig::VARIANT_TUBE || !SOFT_AES) { - cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); - cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); - } - - const __m128i ax0 = _mm_set_epi64x(ah0, al0); - const __m128i ax1 = _mm_set_epi64x(ah1, al1); - if (VARIANT == xmrig::VARIANT_TUBE) { - cx0 = aes_round_tweak_div(cx0, ax0); - cx1 = aes_round_tweak_div(cx1, ax1); - } - else if (SOFT_AES) { - cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table); - cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1, (const uint32_t*)saes_table); - } - else { - cx0 = _mm_aesenc_si128(cx0, ax0); - cx1 = _mm_aesenc_si128(cx1, ax1); - } - - if (BASE == xmrig::VARIANT_1 || (BASE == xmrig::VARIANT_2)) { - cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, bx01, cx0); - cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, bx11, cx1); - } else { - _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0)); - _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx10, cx1)); - } - - idx0 = _mm_cvtsi128_si64(cx0); - idx1 = _mm_cvtsi128_si64(cx1); - - uint64_t hi, lo, cl, ch; - cl = ((uint64_t*) &l0[idx0 & MASK])[0]; - ch = ((uint64_t*) &l0[idx0 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01); - if (VARIANT == xmrig::VARIANT_4) { - al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32); - ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(0, cl, cx0); - } - } - - lo = __umul128(idx0, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0); - } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al0 += hi; - ah0 += lo; - - ((uint64_t*)&l0[idx0 & MASK])[0] = al0; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*) &l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0; - } else { - ((uint64_t*) &l0[idx0 & MASK])[1] = ah0; - } - - al0 ^= cl; - ah0 ^= ch; - idx0 = al0; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; - int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; - int64_t q = n / (d | 0x5); - - ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - d = ~d; - } - - idx0 = d ^ q; - } - - cl = ((uint64_t*) &l1[idx1 & MASK])[0]; - ch = ((uint64_t*) &l1[idx1 & MASK])[1]; - - if (BASE == xmrig::VARIANT_2) { - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { - VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11); - if (VARIANT == xmrig::VARIANT_4) { - al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32); - ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32); - } - } else { - VARIANT2_INTEGER_MATH(1, cl, cx1); - } - } - - lo = __umul128(idx1, cl, &hi); - - if (BASE == xmrig::VARIANT_2) { - if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0); - } else { - VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); - } - } - - al1 += hi; - ah1 += lo; - - ((uint64_t*)&l1[idx1 & MASK])[0] = al1; - - if (BASE == xmrig::VARIANT_1 && (VARIANT == xmrig::VARIANT_TUBE || VARIANT == xmrig::VARIANT_RTO)) { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1 ^ al1; - } else if (BASE == xmrig::VARIANT_1) { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1; - } else { - ((uint64_t*)&l1[idx1 & MASK])[1] = ah1; - } - - al1 ^= cl; - ah1 ^= ch; - idx1 = al1; - - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { - int64_t n = ((int64_t*)&l1[idx1 & MASK])[0]; - int32_t d = ((int32_t*)&l1[idx1 & MASK])[2]; - int64_t q = n / (d | 0x5); - - ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; - - if (VARIANT == xmrig::VARIANT_XHV) { - d = ~d; - } - - idx1 = d ^ q; - } - - if (BASE == xmrig::VARIANT_2) { - bx01 = bx00; - bx11 = bx10; - } - - bx00 = cx0; - bx10 = cx1; - } - - cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0); - cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1); - - xmrig::keccakf(h0, 24); - xmrig::keccakf(h1, 24); - - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); - extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); -} - - -#define CN_STEP1(a, b0, b1, c, l, ptr, idx) \ - ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ - c = _mm_load_si128(ptr); - - -#define CN_STEP2(a, b0, b1, c, l, ptr, idx) \ - if (VARIANT == xmrig::VARIANT_TUBE) { \ - c = aes_round_tweak_div(c, a); \ - } \ - else if (SOFT_AES) { \ - c = soft_aesenc(&c, a, (const uint32_t*)saes_table); \ - } else { \ - c = _mm_aesenc_si128(c, a); \ - } \ - \ - if (BASE == xmrig::VARIANT_1 || BASE == xmrig::VARIANT_2) { \ - cryptonight_monero_tweak((uint64_t*)ptr, l, idx & MASK, a, b0, b1, c); \ - } else { \ - _mm_store_si128(ptr, _mm_xor_si128(b0, c)); \ - } - - -#define CN_STEP3(part, a, b0, b1, c, l, ptr, idx) \ - idx = _mm_cvtsi128_si64(c); \ - ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \ - uint64_t cl##part = ((uint64_t*)ptr)[0]; \ - uint64_t ch##part = ((uint64_t*)ptr)[1]; - - -#define CN_STEP4(part, a, b0, b1, c, l, mc, ptr, idx) \ - uint64_t al##part, ah##part; \ - if (BASE == xmrig::VARIANT_2) { \ - if ((VARIANT == xmrig::VARIANT_WOW) || (VARIANT == xmrig::VARIANT_4)) { \ - al##part = _mm_cvtsi128_si64(a); \ - ah##part = _mm_cvtsi128_si64(_mm_srli_si128(a, 8)); \ - VARIANT4_RANDOM_MATH(part, al##part, ah##part, cl##part, b0, b1); \ - if (VARIANT == xmrig::VARIANT_4) { \ - al##part ^= r##part[2] | ((uint64_t)(r##part[3]) << 32); \ - ah##part ^= r##part[0] | ((uint64_t)(r##part[1]) << 32); \ - } \ - } else { \ - VARIANT2_INTEGER_MATH(part, cl##part, c); \ - } \ - } \ - lo = __umul128(idx, cl##part, &hi); \ - if (BASE == xmrig::VARIANT_2) { \ - if (VARIANT == xmrig::VARIANT_4) { \ - VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \ - } else { \ - VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); \ - } \ - } \ - if (VARIANT == xmrig::VARIANT_4) { \ - a = _mm_set_epi64x(ah##part, al##part); \ - } \ - a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ - \ - if (BASE == xmrig::VARIANT_1) { \ - _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ - \ - if (VARIANT == xmrig::VARIANT_TUBE || \ - VARIANT == xmrig::VARIANT_RTO) { \ - ((uint64_t*)ptr)[1] ^= ((uint64_t*)ptr)[0]; \ - } \ - } else { \ - _mm_store_si128(ptr, a); \ - } \ - \ - a = _mm_xor_si128(a, _mm_set_epi64x(ch##part, cl##part)); \ - idx = _mm_cvtsi128_si64(a); \ - \ - if (ALGO == xmrig::CRYPTONIGHT_HEAVY) { \ - int64_t n = ((int64_t*)&l[idx & MASK])[0]; \ - int32_t d = ((int32_t*)&l[idx & MASK])[2]; \ - int64_t q = n / (d | 0x5); \ - ((int64_t*)&l[idx & MASK])[0] = n ^ q; \ - if (VARIANT == xmrig::VARIANT_XHV) { \ - d = ~d; \ - } \ - \ - idx = d ^ q; \ - } \ - if (BASE == xmrig::VARIANT_2) { \ - b1 = b0; \ - } \ - b0 = c; - - -#define CONST_INIT(ctx, n) \ - __m128i mc##n; \ - __m128i division_result_xmm_##n; \ - __m128i sqrt_result_xmm_##n; \ - if (BASE == xmrig::VARIANT_1) { \ - mc##n = _mm_set_epi64x(*reinterpret_cast(input + n * size + 35) ^ \ - *(reinterpret_cast((ctx)->state) + 24), 0); \ - } \ - if (BASE == xmrig::VARIANT_2) { \ - division_result_xmm_##n = _mm_cvtsi64_si128(h##n[12]); \ - sqrt_result_xmm_##n = _mm_cvtsi64_si128(h##n[13]); \ - } \ - __m128i ax##n = _mm_set_epi64x(h##n[1] ^ h##n[5], h##n[0] ^ h##n[4]); \ - __m128i bx##n##0 = _mm_set_epi64x(h##n[3] ^ h##n[7], h##n[2] ^ h##n[6]); \ - __m128i bx##n##1 = _mm_set_epi64x(h##n[9] ^ h##n[11], h##n[8] ^ h##n[10]); \ - __m128i cx##n = _mm_setzero_si128(); \ - VARIANT4_RANDOM_MATH_INIT(n); - - -template -inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32 * 3); - return; - } - - for (size_t i = 0; i < 3; i++) { - xmrig::keccak(input + size * i, size, ctx[i]->state); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); - } - - uint8_t* l0 = ctx[0]->memory; - uint8_t* l1 = ctx[1]->memory; - uint8_t* l2 = ctx[2]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - uint64_t* h2 = reinterpret_cast(ctx[2]->state); - - CONST_INIT(ctx[0], 0); - CONST_INIT(ctx[1], 1); - CONST_INIT(ctx[2], 2); - VARIANT2_SET_ROUNDING_MODE(); - - uint64_t idx0, idx1, idx2; - idx0 = _mm_cvtsi128_si64(ax0); - idx1 = _mm_cvtsi128_si64(ax1); - idx2 = _mm_cvtsi128_si64(ax2); - - for (size_t i = 0; i < ITERATIONS; i++) { - uint64_t hi, lo; - __m128i *ptr0, *ptr1, *ptr2; - - CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - - CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - - CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2); - - CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0); - CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1); - CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2); - } - - for (size_t i = 0; i < 3; i++) { - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state)); - xmrig::keccakf(reinterpret_cast(ctx[i]->state), 24); - extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i); - } -} - - -template -inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32 * 4); - return; - } - - for (size_t i = 0; i < 4; i++) { - xmrig::keccak(input + size * i, size, ctx[i]->state); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); - } - - uint8_t* l0 = ctx[0]->memory; - uint8_t* l1 = ctx[1]->memory; - uint8_t* l2 = ctx[2]->memory; - uint8_t* l3 = ctx[3]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - uint64_t* h2 = reinterpret_cast(ctx[2]->state); - uint64_t* h3 = reinterpret_cast(ctx[3]->state); - - CONST_INIT(ctx[0], 0); - CONST_INIT(ctx[1], 1); - CONST_INIT(ctx[2], 2); - CONST_INIT(ctx[3], 3); - VARIANT2_SET_ROUNDING_MODE(); - - uint64_t idx0, idx1, idx2, idx3; - idx0 = _mm_cvtsi128_si64(ax0); - idx1 = _mm_cvtsi128_si64(ax1); - idx2 = _mm_cvtsi128_si64(ax2); - idx3 = _mm_cvtsi128_si64(ax3); - - for (size_t i = 0; i < ITERATIONS; i++) - { - uint64_t hi, lo; - __m128i *ptr0, *ptr1, *ptr2, *ptr3; - - CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3); - - CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3); - - CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3); - - CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0); - CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1); - CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2); - CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3); - } - - for (size_t i = 0; i < 4; i++) { - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state)); - xmrig::keccakf(reinterpret_cast(ctx[i]->state), 24); - extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i); - } -} - - -template -inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) -{ - constexpr size_t MASK = xmrig::cn_select_mask(); - constexpr size_t ITERATIONS = xmrig::cn_select_iter(); - constexpr size_t MEM = xmrig::cn_select_memory(); - constexpr xmrig::Variant BASE = xmrig::cn_base_variant(); - - if (BASE == xmrig::VARIANT_1 && size < 43) { - memset(output, 0, 32 * 5); - return; - } - - for (size_t i = 0; i < 5; i++) { - xmrig::keccak(input + size * i, size, ctx[i]->state); - cn_explode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory)); - } - - uint8_t* l0 = ctx[0]->memory; - uint8_t* l1 = ctx[1]->memory; - uint8_t* l2 = ctx[2]->memory; - uint8_t* l3 = ctx[3]->memory; - uint8_t* l4 = ctx[4]->memory; - uint64_t* h0 = reinterpret_cast(ctx[0]->state); - uint64_t* h1 = reinterpret_cast(ctx[1]->state); - uint64_t* h2 = reinterpret_cast(ctx[2]->state); - uint64_t* h3 = reinterpret_cast(ctx[3]->state); - uint64_t* h4 = reinterpret_cast(ctx[4]->state); - - CONST_INIT(ctx[0], 0); - CONST_INIT(ctx[1], 1); - CONST_INIT(ctx[2], 2); - CONST_INIT(ctx[3], 3); - CONST_INIT(ctx[4], 4); - VARIANT2_SET_ROUNDING_MODE(); - - uint64_t idx0, idx1, idx2, idx3, idx4; - idx0 = _mm_cvtsi128_si64(ax0); - idx1 = _mm_cvtsi128_si64(ax1); - idx2 = _mm_cvtsi128_si64(ax2); - idx3 = _mm_cvtsi128_si64(ax3); - idx4 = _mm_cvtsi128_si64(ax4); - - for (size_t i = 0; i < ITERATIONS; i++) - { - uint64_t hi, lo; - __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4; - - CN_STEP1(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP1(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP1(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP1(ax3, bx30, bx31, cx3, l3, ptr3, idx3); - CN_STEP1(ax4, bx40, bx41, cx4, l4, ptr4, idx4); - - CN_STEP2(ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP2(ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP2(ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP2(ax3, bx30, bx31, cx3, l3, ptr3, idx3); - CN_STEP2(ax4, bx40, bx41, cx4, l4, ptr4, idx4); - - CN_STEP3(0, ax0, bx00, bx01, cx0, l0, ptr0, idx0); - CN_STEP3(1, ax1, bx10, bx11, cx1, l1, ptr1, idx1); - CN_STEP3(2, ax2, bx20, bx21, cx2, l2, ptr2, idx2); - CN_STEP3(3, ax3, bx30, bx31, cx3, l3, ptr3, idx3); - CN_STEP3(4, ax4, bx40, bx41, cx4, l4, ptr4, idx4); - - CN_STEP4(0, ax0, bx00, bx01, cx0, l0, mc0, ptr0, idx0); - CN_STEP4(1, ax1, bx10, bx11, cx1, l1, mc1, ptr1, idx1); - CN_STEP4(2, ax2, bx20, bx21, cx2, l2, mc2, ptr2, idx2); - CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3); - CN_STEP4(4, ax4, bx40, bx41, cx4, l4, mc4, ptr4, idx4); - } - - for (size_t i = 0; i < 5; i++) { - cn_implode_scratchpad(reinterpret_cast<__m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state)); - xmrig::keccakf(reinterpret_cast(ctx[i]->state), 24); - extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i); - } -} - -#endif /* XMRIG_CRYPTONIGHT_X86_H */ diff --git a/src/crypto/CryptonightR_gen.cpp b/src/crypto/CryptonightR_gen.cpp deleted file mode 100644 index 3fba49cd..00000000 --- a/src/crypto/CryptonightR_gen.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include "crypto/CryptoNight_monero.h" - -typedef void(*void_func)(); - -#include "crypto/asm/CryptonightR_template.h" -#include "Mem.h" - - -static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)()) -{ - const ptrdiff_t size = reinterpret_cast(p2) - reinterpret_cast(p1); - if (size > 0) { - memcpy(p, reinterpret_cast(p1), size); - p += size; - } -} - -static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, xmrig::Assembly ASM) -{ - uint32_t prev_rot_src = (uint32_t)(-1); - - for (int i = 0;; ++i) { - const V4_Instruction inst = code[i]; - if (inst.opcode == RET) { - break; - } - - uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2); - uint8_t dst_index = inst.dst_index; - uint8_t src_index = inst.src_index; - - const uint32_t a = inst.dst_index; - const uint32_t b = inst.src_index; - const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS)); - - switch (inst.opcode) { - case ROR: - case ROL: - if (b != prev_rot_src) { - prev_rot_src = b; - add_code(p, instructions_mov[c], instructions_mov[c + 1]); - } - break; - } - - if (a == prev_rot_src) { - prev_rot_src = (uint32_t)(-1); - } - - void_func begin = instructions[c]; - - if ((ASM = xmrig::ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) { - // AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL - // Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41 - uint8_t* prefix = reinterpret_cast(begin); - - if (*prefix == 0x49) { - *(p++) = 0x41; - } - - begin = reinterpret_cast(prefix + 1); - } - - add_code(p, begin, instructions[c + 1]); - - if (inst.opcode == ADD) { - *(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C; - if (is_64_bit) { - prev_rot_src = (uint32_t)(-1); - } - } - } -} - -void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightWOW_template_part1, CryptonightWOW_template_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightWOW_template_part2, CryptonightWOW_template_part3); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_template_mainloop) - ((const uint8_t*)CryptonightWOW_template_part1)) - (p - p0)); - add_code(p, CryptonightWOW_template_part3, CryptonightWOW_template_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightR_template_part1, CryptonightR_template_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightR_template_part2, CryptonightR_template_part3); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0)); - add_code(p, CryptonightR_template_part3, CryptonightR_template_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightWOW_template_double_part1, CryptonightWOW_template_double_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightWOW_template_double_part2, CryptonightWOW_template_double_part3); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightWOW_template_double_part3, CryptonightWOW_template_double_part4); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_template_double_mainloop) - ((const uint8_t*)CryptonightWOW_template_double_part1)) - (p - p0)); - add_code(p, CryptonightWOW_template_double_part4, CryptonightWOW_template_double_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0)); - add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0)); - add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} - -void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) -{ - uint8_t* p0 = reinterpret_cast(machine_code); - uint8_t* p = p0; - - add_code(p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2); - add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); - add_code(p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3); - *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0)); - add_code(p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end); - - Mem::flushInstructionCache(machine_code, p - p0); -} diff --git a/src/crypto/SSE2NEON.h b/src/crypto/SSE2NEON.h deleted file mode 100644 index 6a00448d..00000000 --- a/src/crypto/SSE2NEON.h +++ /dev/null @@ -1,1497 +0,0 @@ -#ifndef SSE2NEON_H -#define SSE2NEON_H - -// This header file provides a simple API translation layer -// between SSE intrinsics to their corresponding ARM NEON versions -// -// This header file does not (yet) translate *all* of the SSE intrinsics. -// Since this is in support of a specific porting effort, I have only -// included the intrinsics I needed to get my port to work. -// -// Questions/Comments/Feedback send to: jratcliffscarab@gmail.com -// -// If you want to improve or add to this project, send me an -// email and I will probably approve your access to the depot. -// -// Project is located here: -// -// https://github.com/jratcliff63367/sse2neon -// -// Show your appreciation for open source by sending me a bitcoin tip to the following -// address. -// -// TipJar: 1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p : -// https://blockchain.info/address/1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p -// -// -// Contributors to this project are: -// -// John W. Ratcliff : jratcliffscarab@gmail.com -// Brandon Rowlett : browlett@nvidia.com -// Ken Fast : kfast@gdeb.com -// Eric van Beurden : evanbeurden@nvidia.com -// Alexander Potylitsin : apotylitsin@nvidia.com -// -// -// ********************************************************************************************************************* -// apoty: March 17, 2017 -// Current version was changed in most to fix issues and potential issues. -// All unit tests were rewritten as a part of forge lib project to cover all implemented functions. -// ********************************************************************************************************************* -// Release notes for January 20, 2017 version: -// -// The unit tests have been refactored. They no longer assert on an error, instead they return a pass/fail condition -// The unit-tests now test 10,000 random float and int values against each intrinsic. -// -// SSE2NEON now supports 95 SSE intrinsics. 39 of them have formal unit tests which have been implemented and -// fully tested on NEON/ARM. The remaining 56 still need unit tests implemented. -// -// A struct is now defined in this header file called 'SIMDVec' which can be used by applications which -// attempt to access the contents of an _m128 struct directly. It is important to note that accessing the __m128 -// struct directly is bad coding practice by Microsoft: @see: https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx -// -// However, some legacy source code may try to access the contents of an __m128 struct directly so the developer -// can use the SIMDVec as an alias for it. Any casting must be done manually by the developer, as you cannot -// cast or otherwise alias the base NEON data type for intrinsic operations. -// -// A bug was found with the _mm_shuffle_ps intrinsic. If the shuffle permutation was not one of the ones with -// a custom/unique implementation causing it to fall through to the default shuffle implementation it was failing -// to return the correct value. This is now fixed. -// -// A bug was found with the _mm_cvtps_epi32 intrinsic. This converts floating point values to integers. -// It was not honoring the correct rounding mode. In SSE the default rounding mode when converting from float to int -// is to use 'round to even' otherwise known as 'bankers rounding'. ARMv7 did not support this feature but ARMv8 does. -// As it stands today, this header file assumes ARMv8. If you are trying to target really old ARM devices, you may get -// a build error. -// -// Support for a number of new intrinsics was added, however, none of them yet have unit-tests to 100% confirm they are -// producing the correct results on NEON. These unit tests will be added as soon as possible. -// -// Here is the list of new instrinsics which have been added: -// -// _mm_cvtss_f32 : extracts the lower order floating point value from the parameter -// _mm_add_ss : adds the scalar single - precision floating point values of a and b -// _mm_div_ps : Divides the four single - precision, floating - point values of a and b. -// _mm_div_ss : Divides the scalar single - precision floating point value of a by b. -// _mm_sqrt_ss : Computes the approximation of the square root of the scalar single - precision floating point value of in. -// _mm_rsqrt_ps : Computes the approximations of the reciprocal square roots of the four single - precision floating point values of in. -// _mm_comilt_ss : Compares the lower single - precision floating point scalar values of a and b using a less than operation -// _mm_comigt_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than operation. -// _mm_comile_ss : Compares the lower single - precision floating point scalar values of a and b using a less than or equal operation. -// _mm_comige_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than or equal operation. -// _mm_comieq_ss : Compares the lower single - precision floating point scalar values of a and b using an equality operation. -// _mm_comineq_s : Compares the lower single - precision floating point scalar values of a and b using an inequality operation -// _mm_unpackhi_epi8 : Interleaves the upper 8 signed or unsigned 8 - bit integers in a with the upper 8 signed or unsigned 8 - bit integers in b. -// _mm_unpackhi_epi16: Interleaves the upper 4 signed or unsigned 16 - bit integers in a with the upper 4 signed or unsigned 16 - bit integers in b. -// -// ********************************************************************************************************************* -/* -** The MIT license: -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and associated documentation files (the "Software"), to deal -** in the Software without restriction, including without limitation the rights -** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -** copies of the Software, and to permit persons to whom the Software is furnished -** to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in all -** copies or substantial portions of the Software. - -** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#define ENABLE_CPP_VERSION 0 - -#if defined(__GNUC__) || defined(__clang__) -# pragma push_macro("FORCE_INLINE") -# pragma push_macro("ALIGN_STRUCT") -# define FORCE_INLINE static inline __attribute__((always_inline)) -# define ALIGN_STRUCT(x) __attribute__((aligned(x))) -#else -# error "Macro name collisions may happens with unknown compiler" -# define FORCE_INLINE static inline -# define ALIGN_STRUCT(x) __declspec(align(x)) -#endif - -#include -#include "arm_neon.h" - - -/*******************************************************/ -/* MACRO for shuffle parameter for _mm_shuffle_ps(). */ -/* Argument fp3 is a digit[0123] that represents the fp*/ -/* from argument "b" of mm_shuffle_ps that will be */ -/* placed in fp3 of result. fp2 is the same for fp2 in */ -/* result. fp1 is a digit[0123] that represents the fp */ -/* from argument "a" of mm_shuffle_ps that will be */ -/* places in fp1 of result. fp0 is the same for fp0 of */ -/* result */ -/*******************************************************/ -#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ - (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) - -/* indicate immediate constant argument in a given range */ -#define __constrange(a,b) \ - const - -typedef float32x4_t __m128; -typedef int32x4_t __m128i; - - -// ****************************************** -// type-safe casting between types -// ****************************************** - -#define vreinterpretq_m128_f16(x) \ - vreinterpretq_f32_f16(x) - -#define vreinterpretq_m128_f32(x) \ - (x) - -#define vreinterpretq_m128_f64(x) \ - vreinterpretq_f32_f64(x) - - -#define vreinterpretq_m128_u8(x) \ - vreinterpretq_f32_u8(x) - -#define vreinterpretq_m128_u16(x) \ - vreinterpretq_f32_u16(x) - -#define vreinterpretq_m128_u32(x) \ - vreinterpretq_f32_u32(x) - -#define vreinterpretq_m128_u64(x) \ - vreinterpretq_f32_u64(x) - - -#define vreinterpretq_m128_s8(x) \ - vreinterpretq_f32_s8(x) - -#define vreinterpretq_m128_s16(x) \ - vreinterpretq_f32_s16(x) - -#define vreinterpretq_m128_s32(x) \ - vreinterpretq_f32_s32(x) - -#define vreinterpretq_m128_s64(x) \ - vreinterpretq_f32_s64(x) - - -#define vreinterpretq_f16_m128(x) \ - vreinterpretq_f16_f32(x) - -#define vreinterpretq_f32_m128(x) \ - (x) - -#define vreinterpretq_f64_m128(x) \ - vreinterpretq_f64_f32(x) - - -#define vreinterpretq_u8_m128(x) \ - vreinterpretq_u8_f32(x) - -#define vreinterpretq_u16_m128(x) \ - vreinterpretq_u16_f32(x) - -#define vreinterpretq_u32_m128(x) \ - vreinterpretq_u32_f32(x) - -#define vreinterpretq_u64_m128(x) \ - vreinterpretq_u64_f32(x) - - -#define vreinterpretq_s8_m128(x) \ - vreinterpretq_s8_f32(x) - -#define vreinterpretq_s16_m128(x) \ - vreinterpretq_s16_f32(x) - -#define vreinterpretq_s32_m128(x) \ - vreinterpretq_s32_f32(x) - -#define vreinterpretq_s64_m128(x) \ - vreinterpretq_s64_f32(x) - - -#define vreinterpretq_m128i_s8(x) \ - vreinterpretq_s32_s8(x) - -#define vreinterpretq_m128i_s16(x) \ - vreinterpretq_s32_s16(x) - -#define vreinterpretq_m128i_s32(x) \ - (x) - -#define vreinterpretq_m128i_s64(x) \ - vreinterpretq_s32_s64(x) - - -#define vreinterpretq_m128i_u8(x) \ - vreinterpretq_s32_u8(x) - -#define vreinterpretq_m128i_u16(x) \ - vreinterpretq_s32_u16(x) - -#define vreinterpretq_m128i_u32(x) \ - vreinterpretq_s32_u32(x) - -#define vreinterpretq_m128i_u64(x) \ - vreinterpretq_s32_u64(x) - - -#define vreinterpretq_s8_m128i(x) \ - vreinterpretq_s8_s32(x) - -#define vreinterpretq_s16_m128i(x) \ - vreinterpretq_s16_s32(x) - -#define vreinterpretq_s32_m128i(x) \ - (x) - -#define vreinterpretq_s64_m128i(x) \ - vreinterpretq_s64_s32(x) - - -#define vreinterpretq_u8_m128i(x) \ - vreinterpretq_u8_s32(x) - -#define vreinterpretq_u16_m128i(x) \ - vreinterpretq_u16_s32(x) - -#define vreinterpretq_u32_m128i(x) \ - vreinterpretq_u32_s32(x) - -#define vreinterpretq_u64_m128i(x) \ - vreinterpretq_u64_s32(x) - - -// union intended to allow direct access to an __m128 variable using the names that the MSVC -// compiler provides. This union should really only be used when trying to access the members -// of the vector as integer values. GCC/clang allow native access to the float members through -// a simple array access operator (in C since 4.6, in C++ since 4.8). -// -// Ideally direct accesses to SIMD vectors should not be used since it can cause a performance -// hit. If it really is needed however, the original __m128 variable can be aliased with a -// pointer to this union and used to access individual components. The use of this union should -// be hidden behind a macro that is used throughout the codebase to access the members instead -// of always declaring this type of variable. -typedef union ALIGN_STRUCT(16) SIMDVec -{ - float m128_f32[4]; // as floats - do not to use this. Added for convenience. - int8_t m128_i8[16]; // as signed 8-bit integers. - int16_t m128_i16[8]; // as signed 16-bit integers. - int32_t m128_i32[4]; // as signed 32-bit integers. - int64_t m128_i64[2]; // as signed 64-bit integers. - uint8_t m128_u8[16]; // as unsigned 8-bit integers. - uint16_t m128_u16[8]; // as unsigned 16-bit integers. - uint32_t m128_u32[4]; // as unsigned 32-bit integers. - uint64_t m128_u64[2]; // as unsigned 64-bit integers. -} SIMDVec; - - -// ****************************************** -// Set/get methods -// ****************************************** - -// extracts the lower order floating point value from the parameter : https://msdn.microsoft.com/en-us/library/bb514059%28v=vs.120%29.aspx?f=255&MSPPError=-2147217396 -FORCE_INLINE float _mm_cvtss_f32(__m128 a) -{ - return vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); -} - -// Sets the 128-bit value to zero https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx -FORCE_INLINE __m128i _mm_setzero_si128() -{ - return vreinterpretq_m128i_s32(vdupq_n_s32(0)); -} - -// Clears the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_setzero_ps(void) -{ - return vreinterpretq_m128_f32(vdupq_n_f32(0)); -} - -// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set1_ps(float _w) -{ - return vreinterpretq_m128_f32(vdupq_n_f32(_w)); -} - -// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set_ps1(float _w) -{ - return vreinterpretq_m128_f32(vdupq_n_f32(_w)); -} - -// Sets the four single-precision, floating-point values to the four inputs. https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x) -{ - float __attribute__((aligned(16))) data[4] = { x, y, z, w }; - return vreinterpretq_m128_f32(vld1q_f32(data)); -} - -// Sets the four single-precision, floating-point values to the four inputs in reverse order. https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx -FORCE_INLINE __m128 _mm_setr_ps(float w, float z , float y , float x ) -{ - float __attribute__ ((aligned (16))) data[4] = { w, z, y, x }; - return vreinterpretq_m128_f32(vld1q_f32(data)); -} - -// Sets the 4 signed 32-bit integer values to i. https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx -FORCE_INLINE __m128i _mm_set1_epi32(int _i) -{ - return vreinterpretq_m128i_s32(vdupq_n_s32(_i)); -} - -// Sets the 4 signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx -FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0) -{ - int32_t __attribute__((aligned(16))) data[4] = { i0, i1, i2, i3 }; - return vreinterpretq_m128i_s32(vld1q_s32(data)); -} - -// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx -FORCE_INLINE void _mm_store_ps(float *p, __m128 a) -{ - vst1q_f32(p, vreinterpretq_f32_m128(a)); -} - -// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx -FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a) -{ - vst1q_f32(p, vreinterpretq_f32_m128(a)); -} - -// Stores four 32-bit integer values as (as a __m128i value) at the address p. https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx -FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a) -{ - vst1q_s32((int32_t*) p, vreinterpretq_s32_m128i(a)); -} - -// Stores the lower single - precision, floating - point value. https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx -FORCE_INLINE void _mm_store_ss(float *p, __m128 a) -{ - vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0); -} - -// Reads the lower 64 bits of b and stores them into the lower 64 bits of a. https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx -FORCE_INLINE void _mm_storel_epi64(__m128i* a, __m128i b) -{ - uint64x1_t hi = vget_high_u64(vreinterpretq_u64_m128i(*a)); - uint64x1_t lo = vget_low_u64(vreinterpretq_u64_m128i(b)); - *a = vreinterpretq_m128i_u64(vcombine_u64(lo, hi)); -} - -// Loads a single single-precision, floating-point value, copying it into all four words https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx -FORCE_INLINE __m128 _mm_load1_ps(const float * p) -{ - return vreinterpretq_m128_f32(vld1q_dup_f32(p)); -} - -// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx -FORCE_INLINE __m128 _mm_load_ps(const float * p) -{ - return vreinterpretq_m128_f32(vld1q_f32(p)); -} - -// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_loadu_ps(const float * p) -{ - // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are equivalent for neon - return vreinterpretq_m128_f32(vld1q_f32(p)); -} - -// Loads an single - precision, floating - point value into the low word and clears the upper three words. https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_load_ss(const float * p) -{ - return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0)); -} - - -// ****************************************** -// Logic/Binary operations -// ****************************************** - -// Compares for inequality. https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32( vmvnq_u32( vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)) ) ); -} - -// Computes the bitwise AND-NOT of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx -FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_s32( vbicq_s32(vreinterpretq_s32_m128(b), vreinterpretq_s32_m128(a)) ); // *NOTE* argument swap -} - -// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the 128-bit value in a. https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx -FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32( vbicq_s32(vreinterpretq_s32_m128i(b), vreinterpretq_s32_m128i(a)) ); // *NOTE* argument swap -} - -// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx -FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32( vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) ); -} - -// Computes the bitwise AND of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_s32( vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) ); -} - -// Computes the bitwise OR of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx -FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_s32( vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) ); -} - -// Computes bitwise EXOR (exclusive-or) of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx -FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_s32( veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) ); -} - -// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx -FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32( vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) ); -} - -// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx -FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32( veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) ); -} - -// NEON does not provide this method -// Creates a 4-bit mask from the most significant bits of the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx -FORCE_INLINE int _mm_movemask_ps(__m128 a) -{ -#if ENABLE_CPP_VERSION // I am not yet convinced that the NEON version is faster than the C version of this - uint32x4_t &ia = *(uint32x4_t *)&a; - return (ia[0] >> 31) | ((ia[1] >> 30) & 2) | ((ia[2] >> 29) & 4) | ((ia[3] >> 28) & 8); -#else - static const uint32x4_t movemask = { 1, 2, 4, 8 }; - static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; - uint32x4_t t0 = vreinterpretq_u32_m128(a); - uint32x4_t t1 = vtstq_u32(t0, highbit); - uint32x4_t t2 = vandq_u32(t1, movemask); - uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2)); - return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1); -#endif -} - -// Takes the upper 64 bits of a and places it in the low end of the result -// Takes the lower 64 bits of b and places it into the high end of the result. -FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b) -{ - float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); - float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a32, b10)); -} - -// takes the lower two 32-bit values from a and swaps them and places in high end of result -// takes the higher two 32 bit values from b and swaps them and places in low end of result. -FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); - float32x2_t b23 = vrev64_f32(vget_high_f32(vreinterpretq_f32_m128(b))); - return vreinterpretq_m128_f32(vcombine_f32(a01, b23)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0321(__m128 a, __m128 b) -{ - float32x2_t a21 = vget_high_f32(vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3)); - float32x2_t b03 = vget_low_f32(vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3)); - return vreinterpretq_m128_f32(vcombine_f32(a21, b03)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2103(__m128 a, __m128 b) -{ - float32x2_t a03 = vget_low_f32(vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3)); - float32x2_t b21 = vget_high_f32(vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3)); - return vreinterpretq_m128_f32(vcombine_f32(a03, b21)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1010(__m128 a, __m128 b) -{ - float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a10, b10)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1001(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); - float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a01, b10)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0101(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); - float32x2_t b01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(b))); - return vreinterpretq_m128_f32(vcombine_f32(a01, b01)); -} - -// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the high -FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b) -{ - float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a10, b32)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b) -{ - float32x2_t a11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 1); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vcombine_f32(a11, b00)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b) -{ - float32x2_t a22 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vcombine_f32(a22, b00)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b) -{ - float32x2_t a00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 0); - float32x2_t b22 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vcombine_f32(a00, b22)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b) -{ - float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); - float32x2_t a22 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0); - float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* apoty: TODO: use vzip ?*/ - float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(a02, b32)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b) -{ - float32x2_t a33 = vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 1); - float32x2_t b11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 1); - return vreinterpretq_m128_f32(vcombine_f32(a33, b11)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b) -{ - float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32_t b2 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 2); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - float32x2_t b20 = vset_lane_f32(b2, b00, 1); - return vreinterpretq_m128_f32(vcombine_f32(a10, b20)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - float32x2_t b20 = vset_lane_f32(b2, b00, 1); - return vreinterpretq_m128_f32(vcombine_f32(a01, b20)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b) -{ - float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); - float32x2_t b20 = vset_lane_f32(b2, b00, 1); - return vreinterpretq_m128_f32(vcombine_f32(a32, b20)); -} - -// NEON does not support a general purpose permute intrinsic -// Currently I am not sure whether the C implementation is faster or slower than the NEON version. -// Note, this has to be expanded as a template because the shuffle value must be an immediate value. -// The same is true on SSE as well. -// Selects four specific single-precision, floating-point values from a and b, based on the mask i. https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx -#if ENABLE_CPP_VERSION // I am not convinced that the NEON version is faster than the C version yet. -FORCE_INLINE __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, __constrange(0,255) int imm) -{ - __m128 ret; - ret[0] = a[imm & 0x3]; - ret[1] = a[(imm >> 2) & 0x3]; - ret[2] = b[(imm >> 4) & 0x03]; - ret[3] = b[(imm >> 6) & 0x03]; - return ret; -} -#else -#define _mm_shuffle_ps_default(a, b, imm) \ -({ \ - float32x4_t ret; \ - ret = vmovq_n_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & 0x3)); \ - ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), ret, 1); \ - ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3), ret, 2); \ - ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3), ret, 3); \ - vreinterpretq_m128_f32(ret); \ -}) -#endif - -//FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255) int imm) -#define _mm_shuffle_ps(a, b, imm) \ -({ \ - __m128 ret; \ - switch (imm) \ - { \ - case _MM_SHUFFLE(1, 0, 3, 2): ret = _mm_shuffle_ps_1032((a), (b)); break; \ - case _MM_SHUFFLE(2, 3, 0, 1): ret = _mm_shuffle_ps_2301((a), (b)); break; \ - case _MM_SHUFFLE(0, 3, 2, 1): ret = _mm_shuffle_ps_0321((a), (b)); break; \ - case _MM_SHUFFLE(2, 1, 0, 3): ret = _mm_shuffle_ps_2103((a), (b)); break; \ - case _MM_SHUFFLE(1, 0, 1, 0): ret = _mm_shuffle_ps_1010((a), (b)); break; \ - case _MM_SHUFFLE(1, 0, 0, 1): ret = _mm_shuffle_ps_1001((a), (b)); break; \ - case _MM_SHUFFLE(0, 1, 0, 1): ret = _mm_shuffle_ps_0101((a), (b)); break; \ - case _MM_SHUFFLE(3, 2, 1, 0): ret = _mm_shuffle_ps_3210((a), (b)); break; \ - case _MM_SHUFFLE(0, 0, 1, 1): ret = _mm_shuffle_ps_0011((a), (b)); break; \ - case _MM_SHUFFLE(0, 0, 2, 2): ret = _mm_shuffle_ps_0022((a), (b)); break; \ - case _MM_SHUFFLE(2, 2, 0, 0): ret = _mm_shuffle_ps_2200((a), (b)); break; \ - case _MM_SHUFFLE(3, 2, 0, 2): ret = _mm_shuffle_ps_3202((a), (b)); break; \ - case _MM_SHUFFLE(1, 1, 3, 3): ret = _mm_shuffle_ps_1133((a), (b)); break; \ - case _MM_SHUFFLE(2, 0, 1, 0): ret = _mm_shuffle_ps_2010((a), (b)); break; \ - case _MM_SHUFFLE(2, 0, 0, 1): ret = _mm_shuffle_ps_2001((a), (b)); break; \ - case _MM_SHUFFLE(2, 0, 3, 2): ret = _mm_shuffle_ps_2032((a), (b)); break; \ - default: ret = _mm_shuffle_ps_default((a), (b), (imm)); break; \ - } \ - ret; \ -}) - -// Takes the upper 64 bits of a and places it in the low end of the result -// Takes the lower 64 bits of a and places it into the high end of the result. -FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a) -{ - int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a)); - int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); - return vreinterpretq_m128i_s32(vcombine_s32(a32, a10)); -} - -// takes the lower two 32-bit values from a and swaps them and places in low end of result -// takes the higher two 32 bit values from a and swaps them and places in high end of result. -FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a) -{ - int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); - int32x2_t a23 = vrev64_s32(vget_high_s32(vreinterpretq_s32_m128i(a))); - return vreinterpretq_m128i_s32(vcombine_s32(a01, a23)); -} - -// rotates the least significant 32 bits into the most signficant 32 bits, and shifts the rest down -FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a) -{ - return vreinterpretq_m128i_s32(vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 1)); -} - -// rotates the most significant 32 bits into the least signficant 32 bits, and shifts the rest up -FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a) -{ - return vreinterpretq_m128i_s32(vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 3)); -} - -// gets the lower 64 bits of a, and places it in the upper 64 bits -// gets the lower 64 bits of a and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a) -{ - int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); - return vreinterpretq_m128i_s32(vcombine_s32(a10, a10)); -} - -// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the lower 64 bits -// gets the lower 64 bits of a, and places it in the upper 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a) -{ - int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); - int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); - return vreinterpretq_m128i_s32(vcombine_s32(a01, a10)); -} - -// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the upper 64 bits -// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a) -{ - int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); - return vreinterpretq_m128i_s32(vcombine_s32(a01, a01)); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a) -{ - int32x2_t a11 = vdup_lane_s32(vget_low_s32(vreinterpretq_s32_m128i(a)), 1); - int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0); - return vreinterpretq_m128i_s32(vcombine_s32(a11, a22)); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a) -{ - int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0); - int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); - return vreinterpretq_m128i_s32(vcombine_s32(a22, a01)); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a) -{ - int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a)); - int32x2_t a33 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 1); - return vreinterpretq_m128i_s32(vcombine_s32(a32, a33)); -} - -//FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) int imm) -#if ENABLE_CPP_VERSION -FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) int imm) -{ - __m128i ret; - ret[0] = a[imm & 0x3]; - ret[1] = a[(imm >> 2) & 0x3]; - ret[2] = a[(imm >> 4) & 0x03]; - ret[3] = a[(imm >> 6) & 0x03]; - return ret; -} -#else -#define _mm_shuffle_epi32_default(a, imm) \ -({ \ - int32x4_t ret; \ - ret = vmovq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm) & 0x3)); \ - ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 2) & 0x3), ret, 1); \ - ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), ret, 2); \ - ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3), ret, 3); \ - vreinterpretq_m128i_s32(ret); \ -}) -#endif - -//FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a, __constrange(0,255) int imm) -#if defined(__aarch64__) -#define _mm_shuffle_epi32_splat(a, imm) \ -({ \ - vreinterpretq_m128i_s32(vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm))); \ -}) -#else -#define _mm_shuffle_epi32_splat(a, imm) \ -({ \ - vreinterpretq_m128i_s32(vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)))); \ -}) -#endif - -// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx -//FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a, __constrange(0,255) int imm) -#define _mm_shuffle_epi32(a, imm) \ -({ \ - __m128i ret; \ - switch (imm) \ - { \ - case _MM_SHUFFLE(1, 0, 3, 2): ret = _mm_shuffle_epi_1032((a)); break; \ - case _MM_SHUFFLE(2, 3, 0, 1): ret = _mm_shuffle_epi_2301((a)); break; \ - case _MM_SHUFFLE(0, 3, 2, 1): ret = _mm_shuffle_epi_0321((a)); break; \ - case _MM_SHUFFLE(2, 1, 0, 3): ret = _mm_shuffle_epi_2103((a)); break; \ - case _MM_SHUFFLE(1, 0, 1, 0): ret = _mm_shuffle_epi_1010((a)); break; \ - case _MM_SHUFFLE(1, 0, 0, 1): ret = _mm_shuffle_epi_1001((a)); break; \ - case _MM_SHUFFLE(0, 1, 0, 1): ret = _mm_shuffle_epi_0101((a)); break; \ - case _MM_SHUFFLE(2, 2, 1, 1): ret = _mm_shuffle_epi_2211((a)); break; \ - case _MM_SHUFFLE(0, 1, 2, 2): ret = _mm_shuffle_epi_0122((a)); break; \ - case _MM_SHUFFLE(3, 3, 3, 2): ret = _mm_shuffle_epi_3332((a)); break; \ - case _MM_SHUFFLE(0, 0, 0, 0): ret = _mm_shuffle_epi32_splat((a),0); break; \ - case _MM_SHUFFLE(1, 1, 1, 1): ret = _mm_shuffle_epi32_splat((a),1); break; \ - case _MM_SHUFFLE(2, 2, 2, 2): ret = _mm_shuffle_epi32_splat((a),2); break; \ - case _MM_SHUFFLE(3, 3, 3, 3): ret = _mm_shuffle_epi32_splat((a),3); break; \ - default: ret = _mm_shuffle_epi32_default((a), (imm)); break; \ - } \ - ret; \ -}) - -// Shuffles the upper 4 signed or unsigned 16 - bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx -//FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a, __constrange(0,255) int imm) -#define _mm_shufflehi_epi16_function(a, imm) \ -({ \ - int16x8_t ret = vreinterpretq_s16_s32(a); \ - int16x4_t highBits = vget_high_s16(ret); \ - ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & 0x3), ret, 4); \ - ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, 5); \ - ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, 6); \ - ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, 7); \ - vreinterpretq_s32_s16(ret); \ -}) - -//FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a, __constrange(0,255) int imm) -#define _mm_shufflehi_epi16(a, imm) \ - _mm_shufflehi_epi16_function((a), (imm)) - - -// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while shifting in zeros. : https://msdn.microsoft.com/en-us/library/z2k3bbtb%28v=vs.90%29.aspx -//FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, __constrange(0,255) int imm) -#define _mm_slli_epi32(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) {\ - ret = a; \ - } \ - else if ((imm) > 31) { \ - ret = _mm_setzero_si128(); \ - } \ - else { \ - ret = vreinterpretq_m128i_s32(vshlq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \ - } \ - ret; \ -}) - -//Shifts the 4 signed or unsigned 32-bit integers in a right by count bits while shifting in zeros. https://msdn.microsoft.com/en-us/library/w486zcfa(v=vs.100).aspx -//FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm) -#define _mm_srli_epi32(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) { \ - ret = a; \ - } \ - else if ((imm)> 31) { \ - ret = _mm_setzero_si128(); \ - } \ - else { \ - ret = vreinterpretq_m128i_u32(vshrq_n_u32(vreinterpretq_u32_m128i(a), (imm))); \ - } \ - ret; \ -}) - -// Shifts the 4 signed 32 - bit integers in a right by count bits while shifting in the sign bit. https://msdn.microsoft.com/en-us/library/z1939387(v=vs.100).aspx -//FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm) -#define _mm_srai_epi32(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) { \ - ret = a; \ - } \ - else if ((imm) > 31) { \ - ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), 16)); \ - ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(ret), 16)); \ - } \ - else { \ - ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \ - } \ - ret; \ -}) - -// Shifts the 128 - bit value in a right by imm bytes while shifting in zeros.imm must be an immediate. https://msdn.microsoft.com/en-us/library/305w28yz(v=vs.100).aspx -//FORCE_INLINE _mm_srli_si128(__m128i a, __constrange(0,255) int imm) -#define _mm_srli_si128(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) { \ - ret = a; \ - } \ - else if ((imm) > 15) { \ - ret = _mm_setzero_si128(); \ - } \ - else { \ - ret = vreinterpretq_m128i_s8(vextq_s8(vreinterpretq_s8_m128i(a), vdupq_n_s8(0), (imm))); \ - } \ - ret; \ -}) - -// Shifts the 128-bit value in a left by imm bytes while shifting in zeros. imm must be an immediate. https://msdn.microsoft.com/en-us/library/34d3k2kt(v=vs.100).aspx -//FORCE_INLINE __m128i _mm_slli_si128(__m128i a, __constrange(0,255) int imm) -#define _mm_slli_si128(a, imm) \ -({ \ - __m128i ret; \ - if ((imm) <= 0) { \ - ret = a; \ - } \ - else if ((imm) > 15) { \ - ret = _mm_setzero_si128(); \ - } \ - else { \ - ret = vreinterpretq_m128i_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_m128i(a), 16 - (imm))); \ - } \ - ret; \ -}) - -// NEON does not provide a version of this function, here is an article about some ways to repro the results. -// http://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon -// Creates a 16-bit mask from the most significant bits of the 16 signed or unsigned 8-bit integers in a and zero extends the upper bits. https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx -FORCE_INLINE int _mm_movemask_epi8(__m128i _a) -{ - uint8x16_t input = vreinterpretq_u8_m128i(_a); - static const int8_t __attribute__((aligned(16))) xr[8] = { -7, -6, -5, -4, -3, -2, -1, 0 }; - uint8x8_t mask_and = vdup_n_u8(0x80); - int8x8_t mask_shift = vld1_s8(xr); - - uint8x8_t lo = vget_low_u8(input); - uint8x8_t hi = vget_high_u8(input); - - lo = vand_u8(lo, mask_and); - lo = vshl_u8(lo, mask_shift); - - hi = vand_u8(hi, mask_and); - hi = vshl_u8(hi, mask_shift); - - lo = vpadd_u8(lo, lo); - lo = vpadd_u8(lo, lo); - lo = vpadd_u8(lo, lo); - - hi = vpadd_u8(hi, hi); - hi = vpadd_u8(hi, hi); - hi = vpadd_u8(hi, hi); - - return ((hi[0] << 8) | (lo[0] & 0xFF)); -} - - -// ****************************************** -// Math operations -// ****************************************** - -// Subtracts the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vsubq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or unsigned 32-bit integers of a. https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx -FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128_f32(vsubq_s32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -FORCE_INLINE __m128i _mm_sub_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); -} - -// Adds the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx -FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// adds the scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx -FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b) -{ - float32_t b0 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - //the upper values in the result must be the remnants of . - return vreinterpretq_m128_f32(vaddq_f32(a, value)); -} - -// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or unsigned 32-bit integers in b. https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx -FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32(vaddq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx -FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); -} - -// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or unsigned 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vmulq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); -} - -// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or unsigned 32-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32(vmulq_s32(vreinterpretq_s32_m128i(a),vreinterpretq_s32_m128i(b))); -} - -// Multiplies the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx -FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Divides the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx -FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b) -{ - float32x4_t recip0 = vrecpeq_f32(vreinterpretq_f32_m128(b)); - float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, vreinterpretq_f32_m128(b))); - return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), recip1)); -} - -// Divides the scalar single-precision floating point value of a by b. https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx -FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) -{ - float32_t value = vgetq_lane_f32(vreinterpretq_f32_m128(_mm_div_ps(a, b)), 0); - return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); -} - -// This version does additional iterations to improve accuracy. Between 1 and 4 recommended. -// Computes the approximations of reciprocals of the four single-precision, floating-point values of a. https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx -FORCE_INLINE __m128 recipq_newton(__m128 in, int n) -{ - int i; - float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in)); - for (i = 0; i < n; ++i) - { - recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); - } - return vreinterpretq_m128_f32(recip); -} - -// Computes the approximations of reciprocals of the four single-precision, floating-point values of a. https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx -FORCE_INLINE __m128 _mm_rcp_ps(__m128 in) -{ - float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in)); - recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); - return vreinterpretq_m128_f32(recip); -} - -// Computes the approximations of square roots of the four single-precision, floating-point values of a. First computes reciprocal square roots and then reciprocals of the four values. https://msdn.microsoft.com/en-us/library/vstudio/8z67bwwk(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in) -{ - float32x4_t recipsq = vrsqrteq_f32(vreinterpretq_f32_m128(in)); - float32x4_t sq = vrecpeq_f32(recipsq); - // ??? use step versions of both sqrt and recip for better accuracy? - return vreinterpretq_m128_f32(sq); -} - -// Computes the approximation of the square root of the scalar single-precision floating point value of in. https://msdn.microsoft.com/en-us/library/ahfsc22d(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in) -{ - float32_t value = vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0); - return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0)); -} - -// Computes the approximations of the reciprocal square roots of the four single-precision floating point values of in. https://msdn.microsoft.com/en-us/library/22hfsh53(v=vs.100).aspx -FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in) -{ - return vreinterpretq_m128_f32(vrsqrteq_f32(vreinterpretq_f32_m128(in))); -} - -// Computes the maximums of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/ff5d607a(v=vs.100).aspx -FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Computes the minima of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/wh13kadz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_f32(vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Computes the maximum of the two lower scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/s6db5esz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b) -{ - float32_t value = vgetq_lane_f32(vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); -} - -// Computes the minimum of the two lower scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/0a9y7xaa(v=vs.100).aspx -FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b) -{ - float32_t value = vgetq_lane_f32(vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - return vreinterpretq_m128_f32(vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); -} - -// Computes the pairwise minima of the 8 signed 16-bit integers from a and the 8 signed 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/6te997ew(v=vs.100).aspx -FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vminq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); -} - -// epi versions of min/max -// Computes the pariwise maximums of the four signed 32-bit integer values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/bb514055(v=vs.100).aspx -FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32(vmaxq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Computes the pariwise minima of the four signed 32-bit integer values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/bb531476(v=vs.100).aspx -FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s32(vminq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/59hddw1d(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b) -{ - /* apoty: issue with large values because of result saturation */ - //int16x8_t ret = vqdmulhq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)); /* =2*a*b */ - //return vreinterpretq_m128i_s16(vshrq_n_s16(ret, 1)); - int16x4_t a3210 = vget_low_s16(vreinterpretq_s16_m128i(a)); - int16x4_t b3210 = vget_low_s16(vreinterpretq_s16_m128i(b)); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ - int16x4_t a7654 = vget_high_s16(vreinterpretq_s16_m128i(a)); - int16x4_t b7654 = vget_high_s16(vreinterpretq_s16_m128i(b)); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t r = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); - return vreinterpretq_m128i_u16(r.val[1]); -} - -// Computes pairwise add of each argument as single-precision, floating-point values a and b. -//https://msdn.microsoft.com/en-us/library/yd9wecaa.aspx -FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b ) -{ -#if defined(__aarch64__) - return vreinterpretq_m128_f32(vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); //AArch64 -#else - float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); - float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); - float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_f32(vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32))); -#endif -} - -// ****************************************** -// Compare operations -// ****************************************** - -// Compares for less than https://msdn.microsoft.com/en-us/library/vstudio/f330yhc8(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares for greater than. https://msdn.microsoft.com/en-us/library/vstudio/11dy102s(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares for greater than or equal. https://msdn.microsoft.com/en-us/library/vstudio/fs813y2t(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares for less than or equal. https://msdn.microsoft.com/en-us/library/vstudio/1s75w83z(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares for equality. https://msdn.microsoft.com/en-us/library/vstudio/36aectz5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b) -{ - return vreinterpretq_m128_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); -} - -// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers in b for less than. https://msdn.microsoft.com/en-us/library/vstudio/4ak0bf5d(v=vs.100).aspx -FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_u32(vcltq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers in b for greater than. https://msdn.microsoft.com/en-us/library/vstudio/1s9f2z0y(v=vs.100).aspx -FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_u32(vcgtq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); -} - -// Compares the four 32-bit floats in a and b to check if any values are NaN. Ordered compare between each value returns true for "orderable" and false for "not orderable" (NaN). https://msdn.microsoft.com/en-us/library/vstudio/0h9w00fx(v=vs.100).aspx -// see also: -// http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean -// http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics -FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b ) -{ - // Note: NEON does not have ordered compare builtin - // Need to compare a eq a and b eq b to check for NaN - // Do AND of results to get final - uint32x4_t ceqaa = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t ceqbb = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - return vreinterpretq_m128_u32(vandq_u32(ceqaa, ceqbb)); -} - -// Compares the lower single-precision floating point scalar values of a and b using a less than operation. : https://msdn.microsoft.com/en-us/library/2kwe606b(v=vs.90).aspx -// Important note!! The documentation on MSDN is incorrect! If either of the values is a NAN the docs say you will get a one, but in fact, it will return a zero!! -FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b) -{ - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using a greater than operation. : https://msdn.microsoft.com/en-us/library/b0738e0t(v=vs.100).aspx -FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b) -{ - //return vgetq_lane_u32(vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using a less than or equal operation. : https://msdn.microsoft.com/en-us/library/1w4t7c57(v=vs.90).aspx -FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b) -{ - //return vgetq_lane_u32(vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using a greater than or equal operation. : https://msdn.microsoft.com/en-us/library/8t80des6(v=vs.100).aspx -FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b) -{ - //return vgetq_lane_u32(vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using an equality operation. : https://msdn.microsoft.com/en-us/library/93yx2h2b(v=vs.100).aspx -FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b) -{ - //return vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); - return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0) ? 1 : 0; -} - -// Compares the lower single-precision floating point scalar values of a and b using an inequality operation. : https://msdn.microsoft.com/en-us/library/bafh5e0a(v=vs.90).aspx -FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b) -{ - //return !vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)), 0); - uint32x4_t a_not_nan = vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); - uint32x4_t b_not_nan = vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); - return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0) ? 1 : 0; -} - -// according to the documentation, these intrinsics behave the same as the non-'u' versions. We'll just alias them here. -#define _mm_ucomilt_ss _mm_comilt_ss -#define _mm_ucomile_ss _mm_comile_ss -#define _mm_ucomigt_ss _mm_comigt_ss -#define _mm_ucomige_ss _mm_comige_ss -#define _mm_ucomieq_ss _mm_comieq_ss -#define _mm_ucomineq_ss _mm_comineq_ss - -// ****************************************** -// Conversions -// ****************************************** - -// Converts the four single-precision, floating-point values of a to signed 32-bit integer values using truncate. https://msdn.microsoft.com/en-us/library/vstudio/1h005y6x(v=vs.100).aspx -FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a) -{ - return vreinterpretq_m128i_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a))); -} - -// Converts the four signed 32-bit integer values of a to single-precision, floating-point values https://msdn.microsoft.com/en-us/library/vstudio/36bwxcx5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a) -{ - return vreinterpretq_m128_f32(vcvtq_f32_s32(vreinterpretq_s32_m128i(a))); -} - -// Converts the four unsigned 8-bit integers in the lower 32 bits to four unsigned 32-bit integers. https://msdn.microsoft.com/en-us/library/bb531467%28v=vs.100%29.aspx -FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a) -{ - uint8x16_t u8x16 = vreinterpretq_u8_s32(a); /* xxxx xxxx xxxx DCBA */ - uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ - uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ - return vreinterpretq_s32_u32(u32x4); -} - -// Converts the four signed 16-bit integers in the lower 64 bits to four signed 32-bit integers. https://msdn.microsoft.com/en-us/library/bb514079%28v=vs.100%29.aspx -FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a) -{ - return vreinterpretq_m128i_s32(vmovl_s16(vget_low_s16(vreinterpretq_s16_m128i(a)))); -} - -// Converts the four single-precision, floating-point values of a to signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/xdc42k5e(v=vs.100).aspx -// *NOTE*. The default rounding mode on SSE is 'round to even', which ArmV7 does not support! -// It is supported on ARMv8 however. -FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a) -{ -#if defined(__aarch64__) - return vcvtnq_s32_f32(a); -#else - uint32x4_t signmask = vdupq_n_u32(0x80000000); - float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a), vdupq_n_f32(0.5f)); /* +/- 0.5 */ - int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/ - int32x4_t r_trunc = vcvtq_s32_f32(vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */ - int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */ - int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */ - float32x4_t delta = vsubq_f32(vreinterpretq_f32_m128(a), vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */ - uint32x4_t is_delta_half = vceqq_f32(delta, half); /* delta == +/- 0.5 */ - return vreinterpretq_m128i_s32(vbslq_s32(is_delta_half, r_even, r_normal)); -#endif -} - -// Moves the least significant 32 bits of a to a 32-bit integer. https://msdn.microsoft.com/en-us/library/5z7a9642%28v=vs.90%29.aspx -FORCE_INLINE int _mm_cvtsi128_si32(__m128i a) -{ - return vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0); -} - -// Moves 32-bit integer a to the least significant 32 bits of an __m128 object, zero extending the upper bits. https://msdn.microsoft.com/en-us/library/ct3539ha%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_cvtsi32_si128(int a) -{ - return vreinterpretq_m128i_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0)); -} - - -// Applies a type cast to reinterpret four 32-bit floating point values passed in as a 128-bit parameter as packed 32-bit integers. https://msdn.microsoft.com/en-us/library/bb514099.aspx -FORCE_INLINE __m128i _mm_castps_si128(__m128 a) -{ - return vreinterpretq_m128i_s32(vreinterpretq_s32_m128(a)); -} - -// Applies a type cast to reinterpret four 32-bit integers passed in as a 128-bit parameter as packed 32-bit floating point values. https://msdn.microsoft.com/en-us/library/bb514029.aspx -FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a) -{ - return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a)); -} - -// Loads 128-bit value. : https://msdn.microsoft.com/en-us/library/atzzad1h(v=vs.80).aspx -FORCE_INLINE __m128i _mm_load_si128(const __m128i *p) -{ - return vreinterpretq_m128i_s32(vld1q_s32((int32_t *)p)); -} - -// ****************************************** -// Miscellaneous Operations -// ****************************************** - -// Packs the 16 signed 16-bit integers from a and b into 8-bit integers and saturates. https://msdn.microsoft.com/en-us/library/k4y4f7w5%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s8(vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)), vqmovn_s16(vreinterpretq_s16_m128i(b)))); -} - -// Packs the 16 signed 16 - bit integers from a and b into 8 - bit unsigned integers and saturates. https://msdn.microsoft.com/en-us/library/07ad1wx4(v=vs.100).aspx -FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) -{ - return vreinterpretq_m128i_u8(vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)), vqmovun_s16(vreinterpretq_s16_m128i(b)))); -} - -// Packs the 8 signed 32-bit integers from a and b into signed 16-bit integers and saturates. https://msdn.microsoft.com/en-us/library/393t56f9%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b) -{ - return vreinterpretq_m128i_s16(vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)), vqmovn_s32(vreinterpretq_s32_m128i(b)))); -} - -// Interleaves the lower 8 signed or unsigned 8-bit integers in a with the lower 8 signed or unsigned 8-bit integers in b. https://msdn.microsoft.com/en-us/library/xf7k860c%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b) -{ - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(a))); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(b))); - int8x8x2_t result = vzip_s8(a1, b1); - return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1])); -} - -// Interleaves the lower 4 signed or unsigned 16-bit integers in a with the lower 4 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/btxb17bw%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b) -{ - int16x4_t a1 = vget_low_s16(vreinterpretq_s16_m128i(a)); - int16x4_t b1 = vget_low_s16(vreinterpretq_s16_m128i(b)); - int16x4x2_t result = vzip_s16(a1, b1); - return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1])); -} - -// Interleaves the lower 2 signed or unsigned 32 - bit integers in a with the lower 2 signed or unsigned 32 - bit integers in b. https://msdn.microsoft.com/en-us/library/x8atst9d(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b) -{ - int32x2_t a1 = vget_low_s32(vreinterpretq_s32_m128i(a)); - int32x2_t b1 = vget_low_s32(vreinterpretq_s32_m128i(b)); - int32x2x2_t result = vzip_s32(a1, b1); - return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1])); -} - -// Selects and interleaves the lower two single-precision, floating-point values from a and b. https://msdn.microsoft.com/en-us/library/25st103b%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b) -{ - float32x2_t a1 = vget_low_f32(vreinterpretq_f32_m128(a)); - float32x2_t b1 = vget_low_f32(vreinterpretq_f32_m128(b)); - float32x2x2_t result = vzip_f32(a1, b1); - return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1])); -} - -// Selects and interleaves the upper two single-precision, floating-point values from a and b. https://msdn.microsoft.com/en-us/library/skccxx7d%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b) -{ - float32x2_t a1 = vget_high_f32(vreinterpretq_f32_m128(a)); - float32x2_t b1 = vget_high_f32(vreinterpretq_f32_m128(b)); - float32x2x2_t result = vzip_f32(a1, b1); - return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1])); -} - -// Interleaves the upper 8 signed or unsigned 8-bit integers in a with the upper 8 signed or unsigned 8-bit integers in b. https://msdn.microsoft.com/en-us/library/t5h7783k(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b) -{ - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(a))); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(b))); - int8x8x2_t result = vzip_s8(a1, b1); - return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1])); -} - -// Interleaves the upper 4 signed or unsigned 16-bit integers in a with the upper 4 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/03196cz7(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b) -{ - int16x4_t a1 = vget_high_s16(vreinterpretq_s16_m128i(a)); - int16x4_t b1 = vget_high_s16(vreinterpretq_s16_m128i(b)); - int16x4x2_t result = vzip_s16(a1, b1); - return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1])); -} - -// Interleaves the upper 2 signed or unsigned 32-bit integers in a with the upper 2 signed or unsigned 32-bit integers in b. https://msdn.microsoft.com/en-us/library/65sa7cbs(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b) -{ - int32x2_t a1 = vget_high_s32(vreinterpretq_s32_m128i(a)); - int32x2_t b1 = vget_high_s32(vreinterpretq_s32_m128i(b)); - int32x2x2_t result = vzip_s32(a1, b1); - return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1])); -} - -// Extracts the selected signed or unsigned 16-bit integer from a and zero extends. https://msdn.microsoft.com/en-us/library/6dceta0c(v=vs.100).aspx -//FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm) -#define _mm_extract_epi16(a, imm) \ -({ \ - (vgetq_lane_s16(vreinterpretq_s16_m128i(a), (imm)) & 0x0000ffffUL); \ -}) - -// Inserts the least significant 16 bits of b into the selected 16-bit integer of a. https://msdn.microsoft.com/en-us/library/kaze8hz1%28v=vs.100%29.aspx -//FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, const int b, __constrange(0,8) int imm) -#define _mm_insert_epi16(a, b, imm) \ -({ \ - vreinterpretq_m128i_s16(vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \ -}) - -// ****************************************** -// Streaming Extensions -// ****************************************** - -// Guarantees that every preceding store is globally visible before any subsequent store. https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx -FORCE_INLINE void _mm_sfence(void) -{ - __sync_synchronize(); -} - -// Stores the data in a to the address p without polluting the caches. If the cache line containing address p is already in the cache, the cache will be updated.Address p must be 16 - byte aligned. https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx -FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a) -{ - *p = a; -} - -// Cache line containing p is flushed and invalidated from all caches in the coherency domain. : https://msdn.microsoft.com/en-us/library/ba08y07y(v=vs.100).aspx -FORCE_INLINE void _mm_clflush(void const*p) -{ - // no corollary for Neon? -} - -#if defined(__GNUC__) || defined(__clang__) -# pragma pop_macro("ALIGN_STRUCT") -# pragma pop_macro("FORCE_INLINE") -#endif - -#endif diff --git a/src/crypto/argon2_hasher/common/DLLExport.h b/src/crypto/argon2_hasher/common/DLLExport.h new file mode 100644 index 00000000..3019914f --- /dev/null +++ b/src/crypto/argon2_hasher/common/DLLExport.h @@ -0,0 +1,16 @@ +// +// Created by Haifa Bogdan Adnan on 04.11.2018. +// + +#ifndef ARGON2_DLLEXPORT_H +#define ARGON2_DLLEXPORT_H + +#undef DLLEXPORT + +#ifndef _WIN64 + #define DLLEXPORT +#else + #define DLLEXPORT __declspec(dllexport) +#endif + +#endif //ARGON2_DLLEXPORT_H diff --git a/src/crypto/argon2_hasher/common/DLLImport.h b/src/crypto/argon2_hasher/common/DLLImport.h new file mode 100644 index 00000000..1946a4a2 --- /dev/null +++ b/src/crypto/argon2_hasher/common/DLLImport.h @@ -0,0 +1,16 @@ +// +// Created by Haifa Bogdan Adnan on 04.11.2018. +// + +#ifndef ARGON2_DLLIMPORT_H +#define ARGON2_DLLIMPORT_H + +#ifndef DLLEXPORT + #ifndef _WIN64 + #define DLLEXPORT + #else + #define DLLEXPORT __declspec(dllimport) + #endif +#endif + +#endif //ARGON2_DLLIMPORT_H diff --git a/src/crypto/argon2_hasher/common/common.cpp b/src/crypto/argon2_hasher/common/common.cpp new file mode 100644 index 00000000..676e5a80 --- /dev/null +++ b/src/crypto/argon2_hasher/common/common.cpp @@ -0,0 +1,21 @@ +// +// Created by Haifa Bogdan Adnan on 05/08/2018. +// + +#include "DLLExport.h" +#include "common.h" +#include + +vector getFiles(const string &folder) { + vector result; + DIR *dir; + struct dirent *ent; + if ((dir = opendir (folder.c_str())) != NULL) { + while ((ent = readdir (dir)) != NULL) { + if(ent->d_type == DT_REG) + result.push_back(ent->d_name); + } + closedir (dir); + } + return result; +} diff --git a/src/crypto/argon2_hasher/common/common.h b/src/crypto/argon2_hasher/common/common.h new file mode 100755 index 00000000..753716a0 --- /dev/null +++ b/src/crypto/argon2_hasher/common/common.h @@ -0,0 +1,56 @@ +// +// Created by Haifa Bogdan Adnan on 04/08/2018. +// + +#ifndef ARGON2_COMMON_H +#define ARGON2_COMMON_H + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include "DLLImport.h" + +#ifndef _WIN64 +#include +#include + +#include +#include +#include +#include +#else +#include +#endif + +#ifdef __APPLE__ +#include "../macosx/cpu_affinity.h" +#endif + +using namespace std; + +#define LOG(msg) cout< getFiles(const string &folder); + +#endif //ARGON2_COMMON_H diff --git a/src/crypto/argon2_hasher/crypt/base64.cpp b/src/crypto/argon2_hasher/crypt/base64.cpp new file mode 100644 index 00000000..12975989 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/base64.cpp @@ -0,0 +1,103 @@ +// +// Created by Haifa Bogdan Adnan on 17/08/2018. +// + +#include "crypto/argon2_hasher/common/DLLExport.h" +#include "../common/common.h" +#include "base64.h" + +static const string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +static inline bool is_base64(unsigned char c) { + return (isalnum(c) || (c == '+') || (c == '/')); +} + +void base64::encode(const char *input, int input_size, char *output) { + char *ret = output; + int i = 0; + int j = 0; + unsigned char char_array_3[3]; + unsigned char char_array_4[4]; + + while (input_size--) { + char_array_3[i++] = *(input++); + if (i == 3) { + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for(i = 0; (i <4) ; i++) + *(ret++) = base64_chars[char_array_4[i]]; + i = 0; + } + } + + if (i) + { + for(j = i; j < 3; j++) + char_array_3[j] = '\0'; + + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for (j = 0; (j < i + 1); j++) + *(ret++) = base64_chars[char_array_4[j]]; + + while((i++ < 3)) + *(ret++) = '='; + + } +} + +int base64::decode(const char *input, char *output, int output_size) { + size_t in_len = strlen(input); + int i = 0; + int j = 0; + int in_ = 0; + unsigned char char_array_4[4], char_array_3[3]; + char *ret = output; + int out_size = 0; + + while (in_len-- && ( input[in_] != '=') && is_base64(input[in_])) { + char_array_4[i++] = input[in_]; in_++; + if (i ==4) { + for (i = 0; i <4; i++) + char_array_4[i] = base64_chars.find(char_array_4[i]); + + char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) { + out_size ++; + if(output_size < out_size) + return -1; + *(ret++) = char_array_3[i]; + } + i = 0; + } + } + + if (i) { + for (j = 0; j < i; j++) + char_array_4[j] = base64_chars.find(char_array_4[j]); + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + + for (j = 0; (j < i - 1); j++) { + out_size ++; + if(output_size < out_size) + return -1; + *(ret++) = char_array_3[j]; + } + } + return out_size; +} + diff --git a/src/crypto/argon2_hasher/crypt/base64.h b/src/crypto/argon2_hasher/crypt/base64.h new file mode 100644 index 00000000..2ce74b88 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/base64.h @@ -0,0 +1,14 @@ +// +// Created by Haifa Bogdan Adnan on 17/08/2018. +// + +#ifndef ARGON2_BASE64_H +#define ARGON2_BASE64_H + +class DLLEXPORT base64 { +public: + static void encode(const char *input, int input_size, char *output); + static int decode(const char *input, char *output, int output_size); +}; + +#endif //ARGON2_BASE64_H diff --git a/src/crypto/argon2_hasher/crypt/hex.cpp b/src/crypto/argon2_hasher/crypt/hex.cpp new file mode 100644 index 00000000..e8a86312 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/hex.cpp @@ -0,0 +1,30 @@ +// +// Created by Haifa Bogdan Adnan on 30/05/2019. +// + +#include "crypto/argon2_hasher/common/DLLExport.h" +#include "../common/common.h" +#include "hex.h" + +void hex::encode(const unsigned char *input, int input_size, char *output) { + for ( int i=0; i> 4; // hi nybble + char b2= *input & 0x0f; // lo nybble + b1+='0'; if (b1>'9') b1 += 7; // gap between '9' and 'A' + b2+='0'; if (b2>'9') b2 += 7; + *(output++)= b1; + *(output++) = b2; + input++; + } + *output = 0; +} + +int hex::decode(const char *input, unsigned char *output, int output_size) { + size_t in_len = strlen(input); + for ( int i=0; i9) b1 -= 7; + unsigned char b2= input[i+1] -'0'; if (b2>9) b2 -= 7; + *(output++) = (b1<<4) + b2; // <<4 multiplies by 16 + } + return in_len / 2; +} diff --git a/src/crypto/argon2_hasher/crypt/hex.h b/src/crypto/argon2_hasher/crypt/hex.h new file mode 100644 index 00000000..038f2f8e --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/hex.h @@ -0,0 +1,14 @@ +// +// Created by Haifa Bogdan Adnan on 30/05/2019. +// + +#ifndef ARGON2_HEX_H +#define ARGON2_HEX_H + +class DLLEXPORT hex { +public: + static void encode(const unsigned char *input, int input_size, char *output); + static int decode(const char *input, unsigned char *output, int output_size); +}; + +#endif //ARGON2_HEX_H diff --git a/src/crypto/argon2_hasher/crypt/random_generator.cpp b/src/crypto/argon2_hasher/crypt/random_generator.cpp new file mode 100644 index 00000000..a6801266 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/random_generator.cpp @@ -0,0 +1,27 @@ +// +// Created by Haifa Bogdan Adnan on 17/08/2018. +// + +#include "crypto/argon2_hasher/common/DLLExport.h" +#include "../common/common.h" + +#include "random_generator.h" + +random_generator::random_generator() : __mt19937Gen(__randomDevice()), __mt19937Distr(0, 255) { + +} + +random_generator &random_generator::instance() { + return __instance; +} + +void random_generator::get_random_data(unsigned char *buffer, int length) { +// __thread_lock.lock(); + for(int i=0;i __mt19937Distr; + mutex __thread_lock; + + static random_generator __instance; +}; + +#endif //ARGON2_RANDOM_GENERATOR_H diff --git a/src/crypto/argon2_hasher/crypt/sha512.cpp b/src/crypto/argon2_hasher/crypt/sha512.cpp new file mode 100644 index 00000000..d94ec1bb --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/sha512.cpp @@ -0,0 +1,152 @@ +#include "crypto/argon2_hasher/common/DLLExport.h" + +#include +#include +#include "sha512.h" + +const unsigned long long SHA512::sha512_k[80] = //ULL = uint64 + {0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, + 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, + 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, + 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, + 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, + 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, + 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, + 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, + 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, + 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, + 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL}; + +void SHA512::transform(const unsigned char *message, unsigned int block_nb) +{ + uint64 w[80]; + uint64 wv[8]; + uint64 t1, t2; + const unsigned char *sub_block; + int i, j; + for (i = 0; i < (int) block_nb; i++) { + sub_block = message + (i << 7); + for (j = 0; j < 16; j++) { + SHA2_PACK64(&sub_block[j << 3], &w[j]); + } + for (j = 16; j < 80; j++) { + w[j] = SHA512_F4(w[j - 2]) + w[j - 7] + SHA512_F3(w[j - 15]) + w[j - 16]; + } + for (j = 0; j < 8; j++) { + wv[j] = m_h[j]; + } + for (j = 0; j < 80; j++) { + t1 = wv[7] + SHA512_F2(wv[4]) + SHA2_CH(wv[4], wv[5], wv[6]) + + sha512_k[j] + w[j]; + t2 = SHA512_F1(wv[0]) + SHA2_MAJ(wv[0], wv[1], wv[2]); + wv[7] = wv[6]; + wv[6] = wv[5]; + wv[5] = wv[4]; + wv[4] = wv[3] + t1; + wv[3] = wv[2]; + wv[2] = wv[1]; + wv[1] = wv[0]; + wv[0] = t1 + t2; + } + for (j = 0; j < 8; j++) { + m_h[j] += wv[j]; + } + + } +} + +void SHA512::init() +{ + m_h[0] = 0x6a09e667f3bcc908ULL; + m_h[1] = 0xbb67ae8584caa73bULL; + m_h[2] = 0x3c6ef372fe94f82bULL; + m_h[3] = 0xa54ff53a5f1d36f1ULL; + m_h[4] = 0x510e527fade682d1ULL; + m_h[5] = 0x9b05688c2b3e6c1fULL; + m_h[6] = 0x1f83d9abfb41bd6bULL; + m_h[7] = 0x5be0cd19137e2179ULL; + m_len = 0; + m_tot_len = 0; +} + +void SHA512::update(const unsigned char *message, unsigned int len) +{ + unsigned int block_nb; + unsigned int new_len, rem_len, tmp_len; + const unsigned char *shifted_message; + tmp_len = SHA384_512_BLOCK_SIZE - m_len; + rem_len = len < tmp_len ? len : tmp_len; + memcpy(&m_block[m_len], message, rem_len); + if (m_len + len < SHA384_512_BLOCK_SIZE) { + m_len += len; + return; + } + new_len = len - rem_len; + block_nb = new_len / SHA384_512_BLOCK_SIZE; + shifted_message = message + rem_len; + transform(m_block, 1); + transform(shifted_message, block_nb); + rem_len = new_len % SHA384_512_BLOCK_SIZE; + memcpy(m_block, &shifted_message[block_nb << 7], rem_len); + m_len = rem_len; + m_tot_len += (block_nb + 1) << 7; +} + +void SHA512::final(unsigned char *digest) +{ + unsigned int block_nb; + unsigned int pm_len; + unsigned int len_b; + int i; + block_nb = 1 + ((SHA384_512_BLOCK_SIZE - 17) + < (m_len % SHA384_512_BLOCK_SIZE)); + len_b = (m_tot_len + m_len) << 3; + pm_len = block_nb << 7; + memset(m_block + m_len, 0, pm_len - m_len); + m_block[m_len] = 0x80; + SHA2_UNPACK32(len_b, m_block + pm_len - 4); + transform(m_block, block_nb); + for (i = 0 ; i < 8; i++) { + SHA2_UNPACK64(m_h[i], &digest[i << 3]); + } +} + +unsigned char *SHA512::hash(unsigned char *input, size_t length) +{ + unsigned char *digest = (unsigned char*)malloc(SHA512::DIGEST_SIZE); + memset(digest,0,SHA512::DIGEST_SIZE); + SHA512 ctx = SHA512(); + ctx.init(); + ctx.update(input, length); + ctx.final(digest); + return digest; +} diff --git a/src/crypto/argon2_hasher/crypt/sha512.h b/src/crypto/argon2_hasher/crypt/sha512.h new file mode 100644 index 00000000..5eb28326 --- /dev/null +++ b/src/crypto/argon2_hasher/crypt/sha512.h @@ -0,0 +1,70 @@ +#ifndef SHA512_H +#define SHA512_H + +#include + +class DLLEXPORT SHA512 +{ +protected: + typedef unsigned char uint8; + typedef unsigned int uint32; + typedef unsigned long long uint64; + + const static uint64 sha512_k[]; + static const unsigned int SHA384_512_BLOCK_SIZE = (1024/8); + +public: + void init(); + void update(const unsigned char *message, unsigned int len); + void final(unsigned char *digest); + static const unsigned int DIGEST_SIZE = ( 512 / 8); + + static unsigned char *hash(unsigned char *input, size_t length); +protected: + void transform(const unsigned char *message, unsigned int block_nb); + unsigned int m_tot_len; + unsigned int m_len; + unsigned char m_block[2 * SHA384_512_BLOCK_SIZE]; + uint64 m_h[8]; +}; + +#define SHA2_SHFR(x, n) (x >> n) +#define SHA2_ROTR(x, n) ((x >> n) | (x << ((sizeof(x) << 3) - n))) +#define SHA2_ROTL(x, n) ((x << n) | (x >> ((sizeof(x) << 3) - n))) +#define SHA2_CH(x, y, z) ((x & y) ^ (~x & z)) +#define SHA2_MAJ(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) +#define SHA512_F1(x) (SHA2_ROTR(x, 28) ^ SHA2_ROTR(x, 34) ^ SHA2_ROTR(x, 39)) +#define SHA512_F2(x) (SHA2_ROTR(x, 14) ^ SHA2_ROTR(x, 18) ^ SHA2_ROTR(x, 41)) +#define SHA512_F3(x) (SHA2_ROTR(x, 1) ^ SHA2_ROTR(x, 8) ^ SHA2_SHFR(x, 7)) +#define SHA512_F4(x) (SHA2_ROTR(x, 19) ^ SHA2_ROTR(x, 61) ^ SHA2_SHFR(x, 6)) +#define SHA2_UNPACK32(x, str) \ +{ \ +*((str) + 3) = (uint8) ((x) ); \ +*((str) + 2) = (uint8) ((x) >> 8); \ +*((str) + 1) = (uint8) ((x) >> 16); \ +*((str) + 0) = (uint8) ((x) >> 24); \ +} +#define SHA2_UNPACK64(x, str) \ +{ \ +*((str) + 7) = (uint8) ((x) ); \ +*((str) + 6) = (uint8) ((x) >> 8); \ +*((str) + 5) = (uint8) ((x) >> 16); \ +*((str) + 4) = (uint8) ((x) >> 24); \ +*((str) + 3) = (uint8) ((x) >> 32); \ +*((str) + 2) = (uint8) ((x) >> 40); \ +*((str) + 1) = (uint8) ((x) >> 48); \ +*((str) + 0) = (uint8) ((x) >> 56); \ +} +#define SHA2_PACK64(str, x) \ +{ \ +*(x) = ((uint64) *((str) + 7) ) \ +| ((uint64) *((str) + 6) << 8) \ +| ((uint64) *((str) + 5) << 16) \ +| ((uint64) *((str) + 4) << 24) \ +| ((uint64) *((str) + 3) << 32) \ +| ((uint64) *((str) + 2) << 40) \ +| ((uint64) *((str) + 1) << 48) \ +| ((uint64) *((str) + 0) << 56); \ +} + +#endif diff --git a/src/crypto/argon2_hasher/hash/Hasher.cpp b/src/crypto/argon2_hasher/hash/Hasher.cpp new file mode 100755 index 00000000..cea64052 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/Hasher.cpp @@ -0,0 +1,132 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#include "../common/common.h" +#include "../crypt/base64.h" +#include "../crypt/hex.h" +#include "../crypt/random_generator.h" + +#include "crypto/argon2_hasher/common/DLLExport.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" +#include "Hasher.h" + +vector *Hasher::m_registeredHashers = NULL; +string Hasher::m_appFolder = ""; + +typedef void (*hasherLoader)(); + +Hasher::Hasher() { + m_intensity = 0; + m_type = ""; + m_subType = ""; + m_shortSubType = ""; + m_description = ""; + + m_computingThreads = 1; + + if(m_registeredHashers == NULL) { + m_registeredHashers = new vector(); + } + + m_registeredHashers->push_back(this); +} + +Hasher::~Hasher() {}; + +string Hasher::type() { + return m_type; +} + +string Hasher::subType(bool shortName) { + if(shortName && !(m_shortSubType.empty())) { + string shortVersion = m_shortSubType; + shortVersion.erase(3); + return shortVersion; + } + else + return m_subType; +} + +string Hasher::info() { + return m_description; +} + +int Hasher::computingThreads() { + return m_computingThreads; +} + +void Hasher::loadHashers(const string &appPath) { + m_registeredHashers = new vector(); + + string modulePath = "."; + + size_t lastSlash = appPath.find_last_of("/\\"); + if (lastSlash != string::npos) { + modulePath = appPath.substr(0, lastSlash); + if(modulePath.empty()) { + modulePath = "."; + } + } + + m_appFolder = modulePath; + + modulePath += "/modules/"; + + vector files = getFiles(modulePath); + for(string file : files) { + if(file.find(".hsh") != string::npos) { + void *dllHandle = dlopen((modulePath + file).c_str(), RTLD_LAZY); + if(dllHandle != NULL) { + hasherLoader hasherLoaderPtr = (hasherLoader) dlsym(dllHandle, "hasherLoader"); + (*hasherLoaderPtr)(); + } + } + } +} + +vector Hasher::getHashers() { + return *m_registeredHashers; +} + +vector Hasher::getActiveHashers() { + vector filtered; + for(Hasher *hasher : *m_registeredHashers) { + if(hasher->m_intensity != 0) + filtered.push_back(hasher); + } + return filtered; +} + +vector Hasher::getHashers(const string &type) { + vector filtered; + for(Hasher *hasher : *m_registeredHashers) { + if(hasher->m_type == type) + filtered.push_back(hasher); + } + return filtered; +} + +map &Hasher::devices() { + return m_deviceInfos; +} + +void Hasher::storeDeviceInfo(int deviceId, DeviceInfo device) { + m_deviceInfosMutex.lock(); + m_deviceInfos[deviceId] = device; + m_deviceInfosMutex.unlock(); +} + +Argon2Profile *Hasher::getArgon2Profile(xmrig::Algo algorithm, xmrig::Variant variant) { + if(algorithm == xmrig::ARGON2) { + switch(variant) { + case xmrig::VARIANT_CHUKWA: + return &argon2profile_3_1_512; + case xmrig::VARIANT_CHUKWA_LITE: + return &argon2profile_4_1_256; + default: + return nullptr; + } + } + return nullptr; +} diff --git a/src/crypto/argon2_hasher/hash/Hasher.h b/src/crypto/argon2_hasher/hash/Hasher.h new file mode 100755 index 00000000..3f0c1b86 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/Hasher.h @@ -0,0 +1,63 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#ifndef ARGON2_HASHER_H +#define ARGON2_HASHER_H + +#include "crypto/argon2_hasher/hash/argon2/Defs.h" +#include "../../../core/HasherConfig.h" +#include "../../../common/xmrig.h" + +struct DeviceInfo { + string name; + string bus_id; + double intensity; +}; + +#define REGISTER_HASHER(x) extern "C" { DLLEXPORT void hasherLoader() { x *instance = new x(); } } + +class DLLEXPORT Hasher { +public: + Hasher(); + virtual ~Hasher(); + + virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant) = 0; + virtual bool configure(xmrig::HasherConfig &config) = 0; + virtual void cleanup() = 0; + virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) = 0; + virtual size_t parallelism(int workerIdx) = 0; + virtual size_t deviceCount() = 0; + + string type(); + string subType(bool shortName = false); + + string info(); + int computingThreads(); + + map &devices(); + + static vector getHashers(const string &type); + static vector getHashers(); + static vector getActiveHashers(); + static void loadHashers(const string &appPath); + +protected: + double m_intensity; + string m_type; + string m_subType; + string m_shortSubType; //max 3 characters + string m_description; + int m_computingThreads; + static string m_appFolder; + + void storeDeviceInfo(int deviceId, DeviceInfo device); + Argon2Profile *getArgon2Profile(xmrig::Algo algorithm, xmrig::Variant variant); + +private: + static vector *m_registeredHashers; + map m_deviceInfos; + mutex m_deviceInfosMutex; +}; + +#endif //ARGON2_HASHER_H diff --git a/src/crypto/argon2_hasher/hash/argon2/Argon2.cpp b/src/crypto/argon2_hasher/hash/argon2/Argon2.cpp new file mode 100755 index 00000000..7accf8c0 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/Argon2.cpp @@ -0,0 +1,143 @@ +// +// Created by Haifa Bogdan Adnan on 05/08/2018. +// + +#include "../../common/common.h" +#include "../../crypt/base64.h" +#include "../../crypt/hex.h" +#include "../../crypt/random_generator.h" + +#include "blake2/blake2.h" +#include "../../common/DLLExport.h" +#include "../../../Argon2_constants.h" +#include "Argon2.h" +#include "Defs.h" + +Argon2::Argon2(argon2BlocksPrehash prehash, argon2BlocksFillerPtr filler, argon2BlocksPosthash posthash, void *memory, void *userData) { + m_prehash = prehash; + m_filler = filler; + m_posthash = posthash; + m_outputMemory = m_seedMemory = (uint8_t*)memory; + m_userData = userData; + m_threads = 1; +} + +int Argon2::generateHashes(const Argon2Profile &profile, HashData &hashData) { + if(initializeSeeds(profile, hashData)) { + if(fillBlocks(profile)) { + return encodeHashes(profile, hashData); + } + } + + return 0; +} + +bool Argon2::initializeSeeds(const Argon2Profile &profile, HashData &hashData) { + if(m_prehash != NULL) { + return (*m_prehash)(hashData.input, m_threads, (Argon2Profile*)&profile, m_userData); + } + else { + uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; + + for (int i = 0; i < m_threads; i++, (*(nonce(hashData)))++) { + initialHash(profile, blockhash, (char *) hashData.input, hashData.inSize, xmrig::ARGON2_HASHLEN); + + memset(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0, + ARGON2_PREHASH_SEED_LENGTH - + ARGON2_PREHASH_DIGEST_LENGTH); + + fillFirstBlocks(profile, blockhash, i); + } + + return true; + } +} + +bool Argon2::fillBlocks(const Argon2Profile &profile) { + m_outputMemory = (uint8_t *)(*m_filler) (m_threads, (Argon2Profile*)&profile, m_userData); + return m_outputMemory != NULL; +} + +int Argon2::encodeHashes(const Argon2Profile &profile, HashData &hashData) { + if(m_posthash != NULL) { + if((*m_posthash)(hashData.output, m_threads, (Argon2Profile*)&profile, m_userData)) { + return m_threads; + } + return 0; + } + else { + if (m_outputMemory != NULL) { + uint32_t nonceInfo = *(nonce(hashData)) - m_threads; + + for (int i = 0; i < m_threads; i++, nonceInfo++) { + blake2b_long((void *) (hashData.output + i * hashData.outSize), xmrig::ARGON2_HASHLEN, + (void *) (m_outputMemory + i * profile.memSize), ARGON2_BLOCK_SIZE); + memcpy(hashData.output + i * hashData.outSize + xmrig::ARGON2_HASHLEN, &nonceInfo, 4); + } + return m_threads; + } + else + return 0; + } +} + +void Argon2::initialHash(const Argon2Profile &profile, uint8_t *blockhash, const char *data, size_t dataSz,size_t outSz) { + blake2b_state BlakeHash; + uint32_t value; + + blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); + + value = profile.thrCost; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = outSz; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = profile.memCost; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = profile.tmCost; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = ARGON2_VERSION; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = ARGON2_TYPE_VALUE; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + value = (uint32_t)dataSz; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + blake2b_update(&BlakeHash, (const uint8_t *)data, dataSz); + + value = xmrig::ARGON2_SALTLEN; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + blake2b_update(&BlakeHash, (const uint8_t *)data, xmrig::ARGON2_SALTLEN); + + value = 0; + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); +} + +void Argon2::fillFirstBlocks(const Argon2Profile &profile, uint8_t *blockhash, int thread) { + block *blocks = (block *)(m_seedMemory + thread * profile.memSize); + size_t lane_length = profile.memCost / profile.thrCost; + + for (uint32_t l = 0; l < profile.thrCost; ++l) { + *((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH)) = 0; + *((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4)) = l; + + blake2b_long((void *)(blocks + l * lane_length), ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + + *((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH)) = 1; + + blake2b_long((void *)(blocks + l * lane_length + 1), ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + } +} + +void Argon2::setThreads(int threads) { + m_threads = threads; +} diff --git a/src/crypto/argon2_hasher/hash/argon2/Argon2.h b/src/crypto/argon2_hasher/hash/argon2/Argon2.h new file mode 100644 index 00000000..90e72d53 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/Argon2.h @@ -0,0 +1,56 @@ +// +// Created by Haifa Bogdan Adnan on 05/08/2018. +// + +#ifndef ARIOMINER_ARGON2_H +#define ARIOMINER_ARGON2_H + +#include "Defs.h" +#include "crypto/argon2_hasher/hash/Hasher.h" + +typedef bool (*argon2BlocksPrehash)(void *, int, Argon2Profile *, void *); // data_memory +typedef void *(*argon2BlocksFillerPtr)(int, Argon2Profile *, void *); +typedef bool (*argon2BlocksPosthash)(void *, int, Argon2Profile *, void *); // raw_hash_mem + +struct HashData { + uint8_t *input; + uint8_t *output; + size_t inSize; + size_t outSize; +}; + +class DLLEXPORT Argon2 { +public: + Argon2(argon2BlocksPrehash prehash, argon2BlocksFillerPtr filler, argon2BlocksPosthash posthash, void *memory, void *userData); + + int generateHashes(const Argon2Profile &profile, HashData &hashData); + + bool initializeSeeds(const Argon2Profile &profile, HashData &hashData); + bool fillBlocks(const Argon2Profile &profile); + int encodeHashes(const Argon2Profile &profile, HashData &hashData); + + void setThreads(int threads); + +private: + void initialHash(const Argon2Profile &profile, uint8_t *blockhash, const char *data, size_t dataSz, size_t outSz); + void fillFirstBlocks(const Argon2Profile &profile, uint8_t *blockhash, int thread); + + inline uint32_t *nonce(HashData &hashData) + { + return reinterpret_cast(hashData.input + 39); + } + + argon2BlocksPrehash m_prehash; + argon2BlocksFillerPtr m_filler; + argon2BlocksPosthash m_posthash; + + int m_threads; + + uint8_t *m_seedMemory; + uint8_t *m_outputMemory; + + void *m_userData; +}; + + +#endif //ARIOMINER_ARGON2_H diff --git a/src/crypto/argon2_hasher/hash/argon2/Defs.h b/src/crypto/argon2_hasher/hash/argon2/Defs.h new file mode 100755 index 00000000..3f6b7181 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/Defs.h @@ -0,0 +1,50 @@ +// +// Created by Haifa Bogdan Adnan on 06/08/2018. +// + +#ifndef ARIOMINER_DEFS_H +#define ARIOMINER_DEFS_H + +#define ARGON2_RAW_LENGTH 32 +#define ARGON2_TYPE_VALUE 2 +#define ARGON2_VERSION 0x13 + +#define ARGON2_BLOCK_SIZE 1024 +#define ARGON2_DWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 4 +#define ARGON2_QWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 8 +#define ARGON2_OWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 16 +#define ARGON2_HWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 32 +#define ARGON2_512BIT_WORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 64 +#define ARGON2_PREHASH_DIGEST_LENGTH 64 +#define ARGON2_PREHASH_SEED_LENGTH 72 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; + +typedef struct Argon2Profile_ { + uint32_t memCost; + uint32_t thrCost; + uint32_t tmCost; + size_t memSize; + int32_t *blockRefs; + size_t blockRefsSize; + char profileName[15]; + int32_t *segments; // { start segment / current block, stop segment (excluding) / previous block, addressing type = 0 -> i, 1 -> d } + uint32_t segSize; + uint32_t segCount; + uint32_t succesiveIdxs; // 0 - idx are precalculated, 1 - idx are successive + int pwdLen; // in dwords + int saltLen; // in dwords +} Argon2Profile; + +extern DLLEXPORT Argon2Profile argon2profile_3_1_512; +extern DLLEXPORT Argon2Profile argon2profile_4_1_256; + +#ifdef __cplusplus +} +#endif + +#endif //ARIOMINER_DEFS_H diff --git a/src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c b/src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c new file mode 100644 index 00000000..9a0cbfa3 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/argon2profile_3_1_512.c @@ -0,0 +1,292 @@ +#include +#include +#include "../../common/DLLExport.h" +#include "Defs.h" + +int32_t blocks_refs_3_1_512[] = { + 2, 0, 1, + 3, 1, 1, + 4, 2, 1, + 5, 3, 1, + 6, 3, 1, + 7, 3, 1, + 8, 2, 1, + 9, 5, 1, + 10, 0, 1, + 11, 9, 1, + 12, 10, 1, + 13, 9, 1, + 14, 12, 1, + 15, 8, 1, + 16, 5, 1, + 17, 15, 1, + 18, 10, 1, + 19, 14, 1, + 20, 7, 1, + 21, 19, 1, + 22, 14, 1, + 23, 7, 1, + 24, 14, 1, + 25, 23, 1, + 26, 24, 1, + 27, 0, 1, + 28, 9, 1, + 29, 11, 1, + 30, 12, 1, + 31, 29, 1, + 32, 12, 1, + 33, 23, 1, + 34, 30, 1, + 35, 1, 1, + 36, 32, 1, + 37, 8, 1, + 38, 30, 1, + 39, 31, 1, + 40, 15, 1, + 41, 38, 1, + 42, 29, 1, + 43, 18, 1, + 44, 33, 1, + 45, 18, 1, + 46, 39, 1, + 47, 43, 1, + 48, 40, 1, + 49, 38, 1, + 50, 5, 1, + 51, 47, 1, + 52, 14, 1, + 53, 45, 1, + 54, 30, 1, + 55, 13, 1, + 56, 47, 1, + 57, 30, 1, + 58, 21, 1, + 59, 18, 1, + 60, 36, 1, + 61, 58, 1, + 62, 58, 1, + 63, 19, 1, + 64, 59, 1, + 65, 29, 1, + 66, 10, 1, + 67, 48, 1, + 68, 39, 1, + 69, 25, 1, + 70, 63, 1, + 71, 57, 1, + 72, 70, 1, + 73, 16, 1, + 74, 20, 1, + 75, 72, 1, + 76, 67, 1, + 77, 61, 1, + 78, 49, 1, + 79, 63, 1, + 80, 9, 1, + 81, 19, 1, + 82, 80, 1, + 83, 36, 1, + 84, 20, 1, + 85, 23, 1, + 86, 52, 1, + 87, 85, 1, + 88, 75, 1, + 89, 18, 1, + 90, 85, 1, + 91, 2, 1, + 92, 81, 1, + 93, 91, 1, + 94, 91, 1, + 95, 3, 1, + 96, 45, 1, + 97, 16, 1, + 98, 11, 1, + 99, 60, 1, + 100, 89, 1, + 101, 65, 1, + 102, 39, 1, + 103, 63, 1, + 104, 66, 1, + 105, 74, 1, + 106, 54, 1, + 107, 88, 1, + 108, 106, 1, + 109, 107, 1, + 110, 47, 1, + 111, 8, 1, + 112, 95, 1, + 113, 66, 1, + 114, 1, 1, + 115, 2, 1, + 116, 20, 1, + 117, 110, 1, + 118, 47, 1, + 119, 117, 1, + 120, 114, 1, + 121, 37, 1, + 122, 71, 1, + 123, 51, 1, + 124, 122, 1, + 125, 44, 1, + 126, 92, 1, + 127, 120, 1, + 128, 123, 1, + 129, 127, 1, + 130, 11, 1, + 131, 110, 1, + 132, 93, 1, + 133, 20, 1, + 134, 58, 1, + 135, 13, 1, + 136, 73, 1, + 137, 27, 1, + 138, 94, 1, + 139, 110, 1, + 140, 96, 1, + 141, 57, 1, + 142, 137, 1, + 143, 116, 1, + 144, 119, 1, + 145, 141, 1, + 146, 73, 1, + 147, 26, 1, + 148, 103, 1, + 149, 125, 1, + 150, 146, 1, + 151, 149, 1, + 152, 28, 1, + 153, 149, 1, + 154, 125, 1, + 155, 104, 1, + 156, 61, 1, + 157, 128, 1, + 158, 156, 1, + 159, 122, 1, + 160, 96, 1, + 161, 92, 1, + 162, 160, 1, + 163, 154, 1, + 164, 88, 1, + 165, 160, 1, + 166, 134, 1, + 167, 116, 1, + 168, 23, 1, + 169, 167, 1, + 170, 100, 1, + 171, 169, 1, + 172, 169, 1, + 173, 127, 1, + 174, 0, 1, + 175, 78, 1, + 176, 155, 1, + 177, 124, 1, + 178, 138, 1, + 179, 41, 1, + 180, 156, 1, + 181, 173, 1, + 182, 122, 1, + 183, 173, 1, + 184, 112, 1, + 185, 15, 1, + 186, 183, 1, + 187, 171, 1, + 188, 163, 1, + 189, 85, 1, + 190, 45, 1, + 191, 171, 1, + 192, 139, 1, + 193, 188, 1, + 194, 192, 1, + 195, 78, 1, + 196, 5, 1, + 197, 187, 1, + 198, 180, 1, + 199, 195, 1, + 200, 102, 1, + 201, 89, 1, + 202, 165, 1, + 203, 144, 1, + 204, 171, 1, + 205, 152, 1, + 206, 53, 1, + 207, 19, 1, + 208, 206, 1, + 209, 165, 1, + 210, 208, 1, + 211, 76, 1, + 212, 177, 1, + 213, 189, 1, + 214, 43, 1, + 215, 120, 1, + 216, 122, 1, + 217, 189, 1, + 218, 45, 1, + 219, 217, 1, + 220, 207, 1, + 221, 202, 1, + 222, 169, 1, + 223, 194, 1, + 224, 213, 1, + 225, 178, 1, + 226, 175, 1, + 227, 221, 1, + 228, 212, 1, + 229, 220, 1, + 230, 227, 1, + 231, 30, 1, + 232, 34, 1, + 233, 91, 1, + 234, 231, 1, + 235, 154, 1, + 236, 100, 1, + 237, 166, 1, + 238, 216, 1, + 239, 229, 1, + 240, 177, 1, + 241, 123, 1, + 242, 172, 1, + 243, 71, 1, + 244, 241, 1, + 245, 236, 1, + 246, 109, 1, + 247, 4, 1, + 248, 246, 1, + 249, 166, 1, + 250, 248, 1, + 251, 243, 1, + 252, 248, 1, + 253, 39, 1, + 254, 98, 1, + 255, 253, 1 +}; + +int32_t segments_3_1_512[] = { // current_idx, previous_idx, seg_type 0=i 1=d + 2, 1, 0, + 128, 127, 0, + 256, 255, 1, + 384, 383, 1, + 0, 511, 1, + 128, 127, 1, + 256, 255, 1, + 384, 383, 1, + 0, 511, 1, + 128, 127, 1, + 256, 255, 1, + 384, 383, 1 +}; + +DLLEXPORT Argon2Profile argon2profile_3_1_512 = { + 512, + 1, + 3, + 524288, //256 blocks of 1024 bytes + blocks_refs_3_1_512, + sizeof(blocks_refs_3_1_512) / (3 * sizeof(int32_t)), + "3_1_512", + segments_3_1_512, + 128, + 12, + 1, + 32, + 4 +}; diff --git a/src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c b/src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c new file mode 100644 index 00000000..59890c49 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/argon2profile_4_1_256.c @@ -0,0 +1,168 @@ +#include +#include +#include "../../common/DLLExport.h" +#include "Defs.h" + +int32_t blocks_refs_4_1_256[] = { + 2, 0, 1, + 3, 1, 1, + 4, 2, 1, + 5, 3, 1, + 6, 0, 1, + 7, 4, 1, + 8, 5, 1, + 9, 7, 1, + 10, 7, 1, + 11, 9, 1, + 12, 5, 1, + 13, 11, 1, + 14, 3, 1, + 15, 2, 1, + 16, 12, 1, + 17, 15, 1, + 18, 15, 1, + 19, 10, 1, + 20, 4, 1, + 21, 18, 1, + 22, 17, 1, + 23, 19, 1, + 24, 2, 1, + 25, 23, 1, + 26, 22, 1, + 27, 12, 1, + 28, 23, 1, + 29, 27, 1, + 30, 26, 1, + 31, 19, 1, + 32, 27, 1, + 33, 29, 1, + 34, 32, 1, + 35, 18, 1, + 36, 32, 1, + 37, 16, 1, + 38, 35, 1, + 39, 22, 1, + 40, 30, 1, + 41, 31, 1, + 42, 39, 1, + 43, 36, 1, + 44, 18, 1, + 45, 0, 1, + 46, 36, 1, + 47, 12, 1, + 48, 28, 1, + 49, 39, 1, + 50, 4, 1, + 51, 48, 1, + 52, 48, 1, + 53, 51, 1, + 54, 50, 1, + 55, 3, 1, + 56, 54, 1, + 57, 53, 1, + 58, 48, 1, + 59, 47, 1, + 60, 25, 1, + 61, 53, 1, + 62, 31, 1, + 63, 59, 1, + 64, 45, 1, + 65, 63, 1, + 66, 48, 1, + 67, 58, 1, + 68, 40, 1, + 69, 17, 1, + 70, 62, 1, + 71, 24, 1, + 72, 60, 1, + 73, 71, 1, + 74, 72, 1, + 75, 57, 1, + 76, 69, 1, + 77, 58, 1, + 78, 74, 1, + 79, 69, 1, + 80, 75, 1, + 81, 74, 1, + 82, 56, 1, + 83, 67, 1, + 84, 15, 1, + 85, 83, 1, + 86, 69, 1, + 87, 83, 1, + 88, 85, 1, + 89, 24, 1, + 90, 52, 1, + 91, 70, 1, + 92, 88, 1, + 93, 42, 1, + 94, 61, 1, + 95, 93, 1, + 96, 22, 1, + 97, 37, 1, + 98, 15, 1, + 99, 91, 1, + 100, 14, 1, + 101, 98, 1, + 102, 24, 1, + 103, 84, 1, + 104, 44, 1, + 105, 103, 1, + 106, 12, 1, + 107, 15, 1, + 108, 79, 1, + 109, 35, 1, + 110, 4, 1, + 111, 109, 1, + 112, 90, 1, + 113, 109, 1, + 114, 43, 1, + 115, 73, 1, + 116, 113, 1, + 117, 107, 1, + 118, 51, 1, + 119, 117, 1, + 120, 118, 1, + 121, 115, 1, + 122, 74, 1, + 123, 67, 1, + 124, 102, 1, + 125, 17, 1, + 126, 113, 1, + 127, 110, 1 +}; + +int32_t segments_4_1_256[] = { // current_idx, previous_idx, seg_type 0=i 1=d + 2, 1, 0, + 64, 63, 0, + 128, 127, 1, + 192, 191, 1, + 0, 255, 1, + 64, 63, 1, + 128, 127, 1, + 192, 191, 1, + 0, 255, 1, + 64, 63, 1, + 128, 127, 1, + 192, 191, 1, + 0, 255, 1, + 64, 63, 1, + 128, 127, 1, + 192, 191, 1 +}; + +DLLEXPORT Argon2Profile argon2profile_4_1_256 = { + 256, + 1, + 4, + 262144, //256 blocks of 1024 bytes + blocks_refs_4_1_256, + sizeof(blocks_refs_4_1_256) / (3 * sizeof(int32_t)), + "4_1_256", + segments_4_1_256, + 64, + 16, + 1, + 32, + 4 +}; diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h new file mode 100644 index 00000000..a70cd7f0 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-config.h @@ -0,0 +1,76 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2_CONFIG_H +#define BLAKE2_CONFIG_H + +/* These don't work everywhere */ +#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__) || defined(_M_X64) +#define HAVE_SSE2 +#endif + +#if defined(__SSSE3__) +#define HAVE_SSSE3 +#endif + +#if defined(__SSE4_1__) +#define HAVE_SSE41 +#endif + +#if defined(__AVX__) +#define HAVE_AVX +#endif + +#if defined(__AVX2__) +#define HAVE_AVX2 +#endif + +#if defined(__XOP__) +#define HAVE_XOP +#endif + + +#ifdef HAVE_AVX2 +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_XOP +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_AVX +#ifndef HAVE_SSE41 +#define HAVE_SSE41 +#endif +#endif + +#ifdef HAVE_SSE41 +#ifndef HAVE_SSSE3 +#define HAVE_SSSE3 +#endif +#endif + +#ifdef HAVE_SSSE3 +#define HAVE_SSE2 +#endif + +#if !defined(HAVE_SSE2) +#error "This code requires at least SSE2." +#endif + +#endif \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h new file mode 100644 index 00000000..e77ad92f --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2-impl.h @@ -0,0 +1,154 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef PORTABLE_BLAKE2_IMPL_H +#define PORTABLE_BLAKE2_IMPL_H + +#include +#include + +#if defined(_MSC_VER) +#define BLAKE2_INLINE __inline +#elif defined(__GNUC__) || defined(__clang__) +#define BLAKE2_INLINE __inline__ +#else +#define BLAKE2_INLINE +#endif + +/* Argon2 Team - Begin Code */ +/* + Not an exhaustive list, but should cover the majority of modern platforms + Additionally, the code will always be correct---this is only a performance + tweak. +*/ +#if (defined(__BYTE_ORDER__) && \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \ + defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ + defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \ + defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \ + defined(_M_ARM) +#define NATIVE_LITTLE_ENDIAN +#endif +/* Argon2 Team - End Code */ + +static BLAKE2_INLINE uint32_t load32(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = (const uint8_t *)src; + uint32_t w = *p++; + w |= (uint32_t)(*p++) << 8; + w |= (uint32_t)(*p++) << 16; + w |= (uint32_t)(*p++) << 24; + return w; +#endif +} + +static BLAKE2_INLINE uint64_t load64(const void *src) { +#if defined(NATIVE_LITTLE_ENDIAN) + uint64_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = (const uint8_t *)src; + uint64_t w = *p++; + w |= (uint64_t)(*p++) << 8; + w |= (uint64_t)(*p++) << 16; + w |= (uint64_t)(*p++) << 24; + w |= (uint64_t)(*p++) << 32; + w |= (uint64_t)(*p++) << 40; + w |= (uint64_t)(*p++) << 48; + w |= (uint64_t)(*p++) << 56; + return w; +#endif +} + +static BLAKE2_INLINE void store32(void *dst, uint32_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +static BLAKE2_INLINE void store64(void *dst, uint64_t w) { +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +#endif +} + +static BLAKE2_INLINE uint64_t load48(const void *src) { + const uint8_t *p = (const uint8_t *)src; + uint64_t w = *p++; + w |= (uint64_t)(*p++) << 8; + w |= (uint64_t)(*p++) << 16; + w |= (uint64_t)(*p++) << 24; + w |= (uint64_t)(*p++) << 32; + w |= (uint64_t)(*p++) << 40; + return w; +} + +static BLAKE2_INLINE void store48(void *dst, uint64_t w) { + uint8_t *p = (uint8_t *)dst; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; + w >>= 8; + *p++ = (uint8_t)w; +} + +static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) { + return (w >> c) | (w << (32 - c)); +} + +static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { + return (w >> c) | (w << (64 - c)); +} + +#endif diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h new file mode 100644 index 00000000..70e4aeb8 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2.h @@ -0,0 +1,90 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef PORTABLE_BLAKE2_H +#define PORTABLE_BLAKE2_H + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +enum blake2b_constant { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 +}; + +#pragma pack(push, 1) +typedef struct __blake2b_param { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint64_t node_offset; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ +} blake2b_param; +#pragma pack(pop) + +typedef struct __blake2b_state { + uint64_t h[8]; + uint64_t t[2]; + uint64_t f[2]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + unsigned buflen; + unsigned outlen; + uint8_t last_node; +} blake2b_state; + +/* Ensure param structs have not been wrongly padded */ +/* Poor man's static_assert */ +enum { + blake2_size_check_0 = 1 / !!(CHAR_BIT == 8), + blake2_size_check_2 = + 1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT) +}; + +/* Streaming API */ +int blake2b_init(blake2b_state *S, size_t outlen); +int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, + size_t keylen); +int blake2b_init_param(blake2b_state *S, const blake2b_param *P); +int blake2b_update(blake2b_state *S, const void *in, size_t inlen); +int blake2b_update_static(blake2b_state *S, const char in, size_t inlen); +int blake2b_final(blake2b_state *S, void *out, size_t outlen); + +/* Simple API */ +int blake2b(void *out, size_t outlen, const void *in, size_t inlen, + const void *key, size_t keylen); + +/* Argon2 Team - Begin Code */ +int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); +/* Argon2 Team - End Code */ + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h new file mode 100644 index 00000000..f79123d8 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse2.h @@ -0,0 +1,68 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2B_LOAD_SSE2_H +#define BLAKE2B_LOAD_SSE2_H + +#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) +#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5) +#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2) +#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7) +#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1) +#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13) +#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2) +#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6) +#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8) +#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11) +#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15) +#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14) +#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14) +#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13) +#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9) +#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2) +#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12) +#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1) +#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8) +#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6) +#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11) +#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3) +#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1) +#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4) +#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7) +#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6) +#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3) +#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12) +#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) + + +#endif \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h new file mode 100644 index 00000000..e8564b57 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-load-sse41.h @@ -0,0 +1,402 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2B_LOAD_SSE41_H +#define BLAKE2B_LOAD_SSE41_H + +#define LOAD_MSG_0_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_0_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_1_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_1_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_1_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_1_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#define LOAD_MSG_2_1(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m5, 8); \ +b1 = _mm_unpackhi_epi64(m2, m7); \ +} while(0) + + +#define LOAD_MSG_2_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m0); \ +b1 = _mm_blend_epi16(m1, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_2_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m5, m1, 0xF0); \ +b1 = _mm_unpackhi_epi64(m3, m4); \ +} while(0) + + +#define LOAD_MSG_2_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m3); \ +b1 = _mm_alignr_epi8(m2, m0, 8); \ +} while(0) + + +#define LOAD_MSG_3_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_unpackhi_epi64(m6, m5); \ +} while(0) + + +#define LOAD_MSG_3_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m0); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_3_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m2, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_3_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m5); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_4_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m2); \ +b1 = _mm_unpacklo_epi64(m1, m5); \ +} while(0) + + +#define LOAD_MSG_4_2(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m0, m3, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m7, m5, 0xF0); \ +b1 = _mm_blend_epi16(m3, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m0, 8); \ +b1 = _mm_blend_epi16(m4, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_5_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m3); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_5_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m5); \ +b1 = _mm_unpackhi_epi64(m5, m1); \ +} while(0) + + +#define LOAD_MSG_5_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m2, m3, 0xF0); \ +b1 = _mm_unpackhi_epi64(m7, m0); \ +} while(0) + + +#define LOAD_MSG_5_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m2); \ +b1 = _mm_blend_epi16(m7, m4, 0xF0); \ +} while(0) + + +#define LOAD_MSG_6_1(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m6, m0, 0xF0); \ +b1 = _mm_unpacklo_epi64(m7, m2); \ +} while(0) + + +#define LOAD_MSG_6_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_alignr_epi8(m5, m6, 8); \ +} while(0) + + +#define LOAD_MSG_6_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m3); \ +b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \ +} while(0) + + +#define LOAD_MSG_6_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_blend_epi16(m1, m5, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m3); \ +b1 = _mm_blend_epi16(m6, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_2(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpackhi_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_7_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_unpacklo_epi64(m4, m1); \ +} while(0) + + +#define LOAD_MSG_7_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m2); \ +b1 = _mm_unpacklo_epi64(m3, m5); \ +} while(0) + + +#define LOAD_MSG_8_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m7); \ +b1 = _mm_alignr_epi8(m0, m5, 8); \ +} while(0) + + +#define LOAD_MSG_8_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_alignr_epi8(m4, m1, 8); \ +} while(0) + + +#define LOAD_MSG_8_3(b0, b1) \ +do \ +{ \ +b0 = m6; \ +b1 = _mm_alignr_epi8(m5, m0, 8); \ +} while(0) + + +#define LOAD_MSG_8_4(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m3, 0xF0); \ +b1 = m2; \ +} while(0) + + +#define LOAD_MSG_9_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_unpackhi_epi64(m3, m0); \ +} while(0) + + +#define LOAD_MSG_9_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m2); \ +b1 = _mm_blend_epi16(m3, m2, 0xF0); \ +} while(0) + + +#define LOAD_MSG_9_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_unpackhi_epi64(m1, m6); \ +} while(0) + + +#define LOAD_MSG_9_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpacklo_epi64(m6, m0); \ +} while(0) + + +#define LOAD_MSG_10_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_10_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_11_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_11_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_11_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_11_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#endif \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h new file mode 100644 index 00000000..3e348e6f --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b-round.h @@ -0,0 +1,154 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2B_ROUND_H +#define BLAKE2B_ROUND_H + +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \ + : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c)))) +#else +#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) )) +#endif +#else +/* ... */ +#endif + +#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -32); \ + row4h = _mm_roti_epi64(row4h, -32); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -24); \ + row2h = _mm_roti_epi64(row2h, -24); \ + +#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -16); \ + row4h = _mm_roti_epi64(row4h, -16); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -63); \ + row2h = _mm_roti_epi64(row2h, -63); \ + +#if defined(HAVE_SSSE3) +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2h, row2l, 8); \ + t1 = _mm_alignr_epi8(row2l, row2h, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4h, row4l, 8); \ + t1 = _mm_alignr_epi8(row4l, row4h, 8); \ + row4l = t1; \ + row4h = t0; + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2l, row2h, 8); \ + t1 = _mm_alignr_epi8(row2h, row2l, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4l, row4h, 8); \ + t1 = _mm_alignr_epi8(row4h, row4l, 8); \ + row4l = t1; \ + row4h = t0; +#else + +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row4l;\ + t1 = row2l;\ + row4l = row3l;\ + row3l = row3h;\ + row3h = row4l;\ + row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \ + row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \ + row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \ + row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)) + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row3l;\ + row3l = row3h;\ + row3h = t0;\ + t0 = row2l;\ + t1 = row4l;\ + row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \ + row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \ + row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \ + row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)) + +#endif + +#if defined(HAVE_SSE41) +#include "blake2b-load-sse41.h" +#else +#include "blake2b-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_2(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + LOAD_MSG_ ##r ##_3(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_4(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + +#endif \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c new file mode 100644 index 00000000..c40a7991 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/argon2/blake2/blake2b.c @@ -0,0 +1,514 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64)) + #include "blake2-config.h" + + #ifdef _MSC_VER + #include /* for _mm_set_epi64x */ + #endif + #include + #if defined(HAVE_SSSE3) + #include + #endif + #if defined(HAVE_SSE41) + #include + #endif + #if defined(HAVE_AVX) + #include + #endif + #if defined(HAVE_XOP) + #include + #endif + + #include "blake2b-round.h" +#endif + +static const uint64_t blake2b_IV[8] = { + UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), + UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), + UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), + UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)}; + +static const unsigned int blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) { + S->f[1] = (uint64_t)-1; +} + +static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) { + if (S->last_node) { + blake2b_set_lastnode(S); + } + S->f[0] = (uint64_t)-1; +} + +static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S, + uint64_t inc) { + S->t[0] += inc; + S->t[1] += (S->t[0] < inc); +} + +static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) { + blake2b_set_lastblock(S); /* invalidate for further use */ +} + +static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) { + memset(S, 0, sizeof(*S)); + memcpy(S->h, blake2b_IV, sizeof(S->h)); +} + +int blake2b_init_param(blake2b_state *S, const blake2b_param *P) { + const unsigned char *p = (const unsigned char *)P; + unsigned int i; + + if (NULL == P || NULL == S) { + return -1; + } + + blake2b_init0(S); + /* IV XOR Parameter Block */ + for (i = 0; i < 8; ++i) { + S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); + } + S->outlen = P->digest_length; + return 0; +} + +/* Sequential blake2b initialization */ +int blake2b_init(blake2b_state *S, size_t outlen) { + blake2b_param P; + + if (S == NULL) { + return -1; + } + + if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + /* Setup Parameter Block for unkeyed BLAKE2 */ + P.digest_length = (uint8_t)outlen; + P.key_length = 0; + P.fanout = 1; + P.depth = 1; + P.leaf_length = 0; + P.node_offset = 0; + P.node_depth = 0; + P.inner_length = 0; + memset(P.reserved, 0, sizeof(P.reserved)); + memset(P.salt, 0, sizeof(P.salt)); + memset(P.personal, 0, sizeof(P.personal)); + + return blake2b_init_param(S, &P); +} + +int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, + size_t keylen) { + blake2b_param P; + + if (S == NULL) { + return -1; + } + + if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) { + blake2b_invalidate_state(S); + return -1; + } + + /* Setup Parameter Block for keyed BLAKE2 */ + P.digest_length = (uint8_t)outlen; + P.key_length = (uint8_t)keylen; + P.fanout = 1; + P.depth = 1; + P.leaf_length = 0; + P.node_offset = 0; + P.node_depth = 0; + P.inner_length = 0; + memset(P.reserved, 0, sizeof(P.reserved)); + memset(P.salt, 0, sizeof(P.salt)); + memset(P.personal, 0, sizeof(P.personal)); + + if (blake2b_init_param(S, &P) < 0) { + blake2b_invalidate_state(S); + return -1; + } + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset(block, 0, BLAKE2B_BLOCKBYTES); + memcpy(block, key, keylen); + blake2b_update(S, block, BLAKE2B_BLOCKBYTES); + } + return 0; +} + +#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64)) +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + __m128i row1l, row1h; + __m128i row2l, row2h; + __m128i row3l, row3h; + __m128i row4l, row4h; + __m128i b0, b1; + __m128i t0, t1; +#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) + const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); + const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); +#endif +#if defined(HAVE_SSE41) + const __m128i m0 = LOADU( block + 00 ); + const __m128i m1 = LOADU( block + 16 ); + const __m128i m2 = LOADU( block + 32 ); + const __m128i m3 = LOADU( block + 48 ); + const __m128i m4 = LOADU( block + 64 ); + const __m128i m5 = LOADU( block + 80 ); + const __m128i m6 = LOADU( block + 96 ); + const __m128i m7 = LOADU( block + 112 ); +#else + const uint64_t m0 = load64(block + 0 * sizeof(uint64_t)); + const uint64_t m1 = load64(block + 1 * sizeof(uint64_t)); + const uint64_t m2 = load64(block + 2 * sizeof(uint64_t)); + const uint64_t m3 = load64(block + 3 * sizeof(uint64_t)); + const uint64_t m4 = load64(block + 4 * sizeof(uint64_t)); + const uint64_t m5 = load64(block + 5 * sizeof(uint64_t)); + const uint64_t m6 = load64(block + 6 * sizeof(uint64_t)); + const uint64_t m7 = load64(block + 7 * sizeof(uint64_t)); + const uint64_t m8 = load64(block + 8 * sizeof(uint64_t)); + const uint64_t m9 = load64(block + 9 * sizeof(uint64_t)); + const uint64_t m10 = load64(block + 10 * sizeof(uint64_t)); + const uint64_t m11 = load64(block + 11 * sizeof(uint64_t)); + const uint64_t m12 = load64(block + 12 * sizeof(uint64_t)); + const uint64_t m13 = load64(block + 13 * sizeof(uint64_t)); + const uint64_t m14 = load64(block + 14 * sizeof(uint64_t)); + const uint64_t m15 = load64(block + 15 * sizeof(uint64_t)); +#endif + row1l = LOADU( &S->h[0] ); + row1h = LOADU( &S->h[2] ); + row2l = LOADU( &S->h[4] ); + row2h = LOADU( &S->h[6] ); + row3l = LOADU( &blake2b_IV[0] ); + row3h = LOADU( &blake2b_IV[2] ); + row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) ); + row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) ); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + row1l = _mm_xor_si128( row3l, row1l ); + row1h = _mm_xor_si128( row3h, row1h ); + STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) ); + STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) ); + row2l = _mm_xor_si128( row4l, row2l ); + row2h = _mm_xor_si128( row4h, row2h ); + STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); + STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); +} +#else +static void blake2b_compress(blake2b_state *S, const uint8_t *block) { + uint64_t m[16]; + uint64_t v[16]; + unsigned int i, r; + + for (i = 0; i < 16; ++i) { + m[i] = load64(block + i * sizeof(m[i])); + } + + for (i = 0; i < 8; ++i) { + v[i] = S->h[i]; + } + + v[8] = blake2b_IV[0]; + v[9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + +#define G(r, i, a, b, c, d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define ROUND(r) \ + do { \ + G(r, 0, v[0], v[4], v[8], v[12]); \ + G(r, 1, v[1], v[5], v[9], v[13]); \ + G(r, 2, v[2], v[6], v[10], v[14]); \ + G(r, 3, v[3], v[7], v[11], v[15]); \ + G(r, 4, v[0], v[5], v[10], v[15]); \ + G(r, 5, v[1], v[6], v[11], v[12]); \ + G(r, 6, v[2], v[7], v[8], v[13]); \ + G(r, 7, v[3], v[4], v[9], v[14]); \ + } while ((void)0, 0) + + for (r = 0; r < 12; ++r) { + ROUND(r); + } + + for (i = 0; i < 8; ++i) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } + +#undef G +#undef ROUND +} +#endif + +int blake2b_update(blake2b_state *S, const void *in, size_t inlen) { + const uint8_t *pin = (const uint8_t *)in; + + if (inlen == 0) { + return 0; + } + + /* Sanity check */ + if (S == NULL || in == NULL) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { + /* Complete current block */ + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + memcpy(&S->buf[left], pin, fill); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf); + S->buflen = 0; + inlen -= fill; + pin += fill; + /* Avoid buffer copies when possible */ + while (inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, pin); + inlen -= BLAKE2B_BLOCKBYTES; + pin += BLAKE2B_BLOCKBYTES; + } + } + memcpy(&S->buf[S->buflen], pin, inlen); + S->buflen += (unsigned int)inlen; + return 0; +} + +int blake2b_update_static(blake2b_state *S, const char in, size_t inlen) { + if (inlen == 0) { + return 0; + } + + /* Sanity check */ + if (S == NULL) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { + /* Complete current block */ + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + memset(&S->buf[left], in, fill); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf); + S->buflen = 0; + inlen -= fill; + /* Avoid buffer copies when possible */ + while (inlen > BLAKE2B_BLOCKBYTES) { + memset(S->buf, in, BLAKE2B_BLOCKBYTES); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress(S, S->buf); + inlen -= BLAKE2B_BLOCKBYTES; + } + } + memset(&S->buf[S->buflen], in, inlen); + S->buflen += (unsigned int)inlen; + return 0; +} + + +int blake2b_final(blake2b_state *S, void *out, size_t outlen) { + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + unsigned int i; + + /* Sanity checks */ + if (S == NULL || out == NULL || outlen < S->outlen) { + return -1; + } + + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } + + blake2b_increment_counter(S, S->buflen); + blake2b_set_lastblock(S); + memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ + blake2b_compress(S, S->buf); + + for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ + store64(buffer + sizeof(S->h[i]) * i, S->h[i]); + } + + memcpy(out, buffer, S->outlen); + return 0; +} + +int blake2b(void *out, size_t outlen, const void *in, size_t inlen, + const void *key, size_t keylen) { + blake2b_state S; + int ret = -1; + + /* Verify parameters */ + if (NULL == in && inlen > 0) { + goto fail; + } + + if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) { + goto fail; + } + + if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) { + goto fail; + } + + if (keylen > 0) { + if (blake2b_init_key(&S, outlen, key, keylen) < 0) { + goto fail; + } + } else { + if (blake2b_init(&S, outlen) < 0) { + goto fail; + } + } + + if (blake2b_update(&S, in, inlen) < 0) { + goto fail; + } + ret = blake2b_final(&S, out, outlen); + + fail: + return ret; +} + +/* Argon2 Team - Begin Code */ +int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) { + uint8_t *out = (uint8_t *)pout; + blake2b_state blake_state; + uint8_t outlen_bytes[sizeof(uint32_t)] = {0}; + int ret = -1; + + if (outlen > UINT32_MAX) { + goto fail; + } + + /* Ensure little-endian byte order! */ + store32(outlen_bytes, (uint32_t)outlen); + +#define TRY(statement) \ + do { \ + ret = statement; \ + if (ret < 0) { \ + goto fail; \ + } \ + } while ((void)0, 0) + + if (outlen <= BLAKE2B_OUTBYTES) { + TRY(blake2b_init(&blake_state, outlen)); + TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); + TRY(blake2b_update(&blake_state, in, inlen)); + TRY(blake2b_final(&blake_state, out, outlen)); + } else { + uint32_t toproduce; + uint8_t out_buffer[BLAKE2B_OUTBYTES]; + uint8_t in_buffer[BLAKE2B_OUTBYTES]; + TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES)); + TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); + TRY(blake2b_update(&blake_state, in, inlen)); + TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES)); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; + + while (toproduce > BLAKE2B_OUTBYTES) { + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, + BLAKE2B_OUTBYTES, NULL, 0)); + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce -= BLAKE2B_OUTBYTES / 2; + } + + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL, + 0)); + memcpy(out, out_buffer, toproduce); + } + fail: + return ret; +#undef TRY +} +/* Argon2 Team - End Code */ diff --git a/src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp new file mode 100755 index 00000000..08e4c019 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.cpp @@ -0,0 +1,227 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#if defined(__x86_64__) || defined(__i386__) || defined(_WIN64) + #include +#endif +#if defined(__arm__) + #include +#endif + +#include + +#include "../../common/common.h" + +#include "crypto/argon2_hasher/hash/Hasher.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" + +#include "CpuHasher.h" +#include "crypto/argon2_hasher/common/DLLExport.h" + +CpuHasher::CpuHasher() : Hasher() { + m_type = "CPU"; + m_subType = "CPU"; + m_shortSubType = "CPU"; + m_optimization = "REF"; + m_computingThreads = 0; + m_availableProcessingThr = 1; + m_availableMemoryThr = 1; + m_argon2BlocksFillerPtr = nullptr; + m_dllHandle = nullptr; + m_profile = nullptr; + m_threadData = nullptr; +} + +CpuHasher::~CpuHasher() { + this->cleanup(); +} + +bool CpuHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { + m_profile = getArgon2Profile(algorithm, variant); + m_description = detectFeaturesAndMakeDescription(); + return true; +} + +bool CpuHasher::configure(xmrig::HasherConfig &config) { + m_intensity = 100; + + if(config.cpuOptimization() != "") { + m_description += "Overiding detected optimization feature with " + config.cpuOptimization() + ".\n"; + m_optimization = config.cpuOptimization(); + } + + loadArgon2BlockFiller(); + + if(m_argon2BlocksFillerPtr == NULL) { + m_intensity = 0; + m_description += "Status: DISABLED - argon2 hashing module not found."; + return false; + } + + m_computingThreads = min(m_availableProcessingThr, m_availableMemoryThr); + + if (m_computingThreads == 0) { + m_intensity = 0; + m_description += "Status: DISABLED - not enough resources."; + return false; + } + + if(config.cpuThreads() > -1) { + m_intensity = min(100.0 * config.cpuThreads() / m_computingThreads, 100.0); + m_computingThreads = min(config.cpuThreads(), m_computingThreads); + } + + if (m_intensity == 0) { + m_description += "Status: DISABLED - by user."; + return false; + } + + m_deviceInfo.intensity = m_intensity; + + storeDeviceInfo(0, m_deviceInfo); + + m_threadData = new CpuHasherThread[m_computingThreads]; + for(int i=0; i < m_computingThreads; i++) { + void *buffer = NULL; + void *mem = allocateMemory(buffer); + if(mem == NULL) { + m_intensity = 0; + m_description += "Status: DISABLED - error allocating memory."; + return false; + } + m_threadData[i].mem = buffer; + m_threadData[i].argon2 = new Argon2(NULL, m_argon2BlocksFillerPtr, NULL, mem, mem); + m_threadData[i].hashData.outSize = xmrig::ARGON2_HASHLEN + sizeof(uint32_t); + } + + m_description += "Status: ENABLED - with " + to_string(m_computingThreads) + " threads."; + + return true; +} + +string CpuHasher::detectFeaturesAndMakeDescription() { + stringstream ss; +#if defined(__x86_64__) || defined(__i386__) || defined(_WIN64) + char brand_string[49]; + cpu_features::FillX86BrandString(brand_string); + m_deviceInfo.name = brand_string; + + ss << brand_string << endl; + + cpu_features::X86Features features = cpu_features::GetX86Info().features; + ss << "Optimization features: "; + +#if defined(__x86_64__) || defined(_WIN64) + ss << "SSE2 "; + m_optimization = "SSE2"; +#else + ss << "none"; + m_optimization = "REF"; +#endif + + if(features.ssse3 || features.avx2 || features.avx512f) { + if (features.ssse3) { + ss << "SSSE3 "; + m_optimization = "SSSE3"; + } + if (features.avx) { + ss << "AVX "; + m_optimization = "AVX"; + } + if (features.avx2) { + ss << "AVX2 "; + m_optimization = "AVX2"; + } + if (features.avx512f) { + ss << "AVX512F "; + m_optimization = "AVX512F"; + } + } + ss << endl; +#endif +#if defined(__arm__) + m_deviceInfo.name = "ARM processor"; + + cpu_features::ArmFeatures features = cpu_features::GetArmInfo().features; + ss << "ARM processor" << endl; + ss << "Optimization features: "; + + m_optimization = "REF"; + + if(features.neon) { + ss << "NEON"; + m_optimization = "NEON"; + } + else { + ss << "none"; + } + ss << endl; +#endif + ss << "Selecting " << m_optimization << " as candidate for hashing algorithm." << endl; + + m_availableProcessingThr = thread::hardware_concurrency(); + ss << "Parallelism: " << m_availableProcessingThr << " concurent threads supported." << endl; + + //check available memory + vector memoryTest; + for(m_availableMemoryThr = 0;m_availableMemoryThr < m_availableProcessingThr;m_availableMemoryThr++) { + void *memory = malloc(m_profile->memSize + 64); //64 bytes for alignament - to work on AVX512F optimisations + if(memory == NULL) + break; + memoryTest.push_back(memory); + } + for(vector::iterator it=memoryTest.begin(); it != memoryTest.end(); ++it) { + free(*it); + } + ss << "Memory: there is enough memory for " << m_availableMemoryThr << " concurent threads." << endl; + + return ss.str(); +} + +void CpuHasher::cleanup() { + for(int i=0; i < m_computingThreads; i++) { + delete m_threadData[i].argon2; + free(m_threadData[i].mem); + } + delete[] m_threadData; + if(m_dllHandle != NULL) + dlclose(m_dllHandle); +} + +void CpuHasher::loadArgon2BlockFiller() { + string module_path = m_appFolder; + module_path += "/modules/argon2_fill_blocks_" + m_optimization + ".opt"; + + m_dllHandle = dlopen(module_path.c_str(), RTLD_LAZY); + if(m_dllHandle != NULL) + m_argon2BlocksFillerPtr = (argon2BlocksFillerPtr)dlsym(m_dllHandle, "fill_memory_blocks"); +} + +int CpuHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { + CpuHasherThread &threadData = m_threadData[threadIdx]; + threadData.hashData.input = input; + threadData.hashData.inSize = size; + threadData.hashData.output = output; + return threadData.argon2->generateHashes(*m_profile, threadData.hashData); +} + +void *CpuHasher::allocateMemory(void *&buffer) { + size_t mem_size = m_profile->memSize + 64; + void *mem = malloc(mem_size); + buffer = mem; + return align(64, m_profile->memSize, mem, mem_size); +} + +size_t CpuHasher::parallelism(int workerIdx) { + if(workerIdx < 0 || workerIdx > computingThreads()) + return 0; + + return 1; +} + +size_t CpuHasher::deviceCount() { + return computingThreads(); +} + +REGISTER_HASHER(CpuHasher); \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/cpu/CpuHasher.h b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.h new file mode 100644 index 00000000..888421c6 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/CpuHasher.h @@ -0,0 +1,41 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#ifndef ARGON2_CPU_HASHER_H +#define ARGON2_CPU_HASHER_H + +struct CpuHasherThread { + Argon2 *argon2; + HashData hashData; + void *mem; +}; + +class CpuHasher : public Hasher { +public: + CpuHasher(); + ~CpuHasher(); + + virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); + virtual bool configure(xmrig::HasherConfig &config); + virtual void cleanup(); + virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output); + virtual size_t parallelism(int workerIdx); + virtual size_t deviceCount(); + +private: + string detectFeaturesAndMakeDescription(); + void loadArgon2BlockFiller(); + void *allocateMemory(void *&buffer); + + DeviceInfo m_deviceInfo; + string m_optimization; + int m_availableProcessingThr; + int m_availableMemoryThr; + void *m_dllHandle; + Argon2Profile *m_profile; + argon2BlocksFillerPtr m_argon2BlocksFillerPtr; + CpuHasherThread *m_threadData; +}; + +#endif //ARGON2_CPU_HASHER_H diff --git a/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h new file mode 100644 index 00000000..8048503c --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-opt.h @@ -0,0 +1,567 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef BLAKE_ROUND_MKA_OPT_H +#define BLAKE_ROUND_MKA_OPT_H + +#include "../../argon2/blake2/blake2-impl.h" + +#if !defined(__NEON__) +#include +#if defined(__SSSE3__) +#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */ +#endif + +#if (defined(__XOP__) || defined(__AVX__)) && (defined(__GNUC__) || defined(__clang__)) +#include +#endif +#else +#include +#endif + +#if !defined(__NEON__) +#if !defined(__AVX512F__) +#if !defined(__AVX2__) +#if !defined(__XOP__) +#if defined(__SSSE3__) +#define r16 \ + (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) +#define r24 \ + (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) \ + ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ + : (-(c) == 24) \ + ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) \ + ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) \ + ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_slli_epi64((x), 64 - (-(c)))) +#else /* defined(__SSE2__) */ +#define _mm_roti_epi64(r, c) \ + _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c)))) +#endif +#else +#endif + +static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) { + const __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ + \ + D0 = _mm_roti_epi64(D0, -32); \ + D1 = _mm_roti_epi64(D1, -32); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ + \ + B0 = _mm_roti_epi64(B0, -24); \ + B1 = _mm_roti_epi64(B1, -24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ + \ + D0 = _mm_roti_epi64(D0, -16); \ + D1 = _mm_roti_epi64(D1, -16); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ + \ + B0 = _mm_roti_epi64(B0, -63); \ + B1 = _mm_roti_epi64(B1, -63); \ + } while ((void)0, 0) + +#if defined(__SSSE3__) +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ + __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ + B0 = t0; \ + B1 = t1; \ + \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + \ + t0 = _mm_alignr_epi8(D1, D0, 8); \ + t1 = _mm_alignr_epi8(D0, D1, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ + __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ + B0 = t0; \ + B1 = t1; \ + \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + \ + t0 = _mm_alignr_epi8(D0, D1, 8); \ + t1 = _mm_alignr_epi8(D1, D0, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) +#else /* SSE2 */ +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = D0; \ + __m128i t1 = B0; \ + D0 = C0; \ + C0 = C1; \ + C1 = D0; \ + D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ + D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ + B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ + B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0, t1; \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + t0 = B0; \ + t1 = D0; \ + B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ + B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ + D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ + D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) +#endif + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) +#else /* __AVX2__ */ + +#include + +#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)) +#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) +#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) +#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x))) + +#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i ml = _mm256_mul_epu32(A0, B0); \ + ml = _mm256_add_epi64(ml, ml); \ + A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \ + D0 = _mm256_xor_si256(D0, A0); \ + D0 = rotr32(D0); \ + \ + ml = _mm256_mul_epu32(C0, D0); \ + ml = _mm256_add_epi64(ml, ml); \ + C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \ + \ + B0 = _mm256_xor_si256(B0, C0); \ + B0 = rotr24(B0); \ + \ + ml = _mm256_mul_epu32(A1, B1); \ + ml = _mm256_add_epi64(ml, ml); \ + A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \ + D1 = _mm256_xor_si256(D1, A1); \ + D1 = rotr32(D1); \ + \ + ml = _mm256_mul_epu32(C1, D1); \ + ml = _mm256_add_epi64(ml, ml); \ + C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \ + \ + B1 = _mm256_xor_si256(B1, C1); \ + B1 = rotr24(B1); \ + } while((void)0, 0); + +#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i ml = _mm256_mul_epu32(A0, B0); \ + ml = _mm256_add_epi64(ml, ml); \ + A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \ + D0 = _mm256_xor_si256(D0, A0); \ + D0 = rotr16(D0); \ + \ + ml = _mm256_mul_epu32(C0, D0); \ + ml = _mm256_add_epi64(ml, ml); \ + C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \ + B0 = _mm256_xor_si256(B0, C0); \ + B0 = rotr63(B0); \ + \ + ml = _mm256_mul_epu32(A1, B1); \ + ml = _mm256_add_epi64(ml, ml); \ + A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \ + D1 = _mm256_xor_si256(D1, A1); \ + D1 = rotr16(D1); \ + \ + ml = _mm256_mul_epu32(C1, D1); \ + ml = _mm256_add_epi64(ml, ml); \ + C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \ + B1 = _mm256_xor_si256(B1, C1); \ + B1 = rotr63(B1); \ + } while((void)0, 0); + +#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while((void)0, 0); + +#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + \ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ + \ + tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \ + D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while(0); + +#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + \ + B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ + C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ + D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while((void)0, 0); + +#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \ + __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \ + B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + \ + tmp1 = C0; \ + C0 = C1; \ + C1 = tmp1; \ + \ + tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \ + tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \ + D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \ + D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \ + } while((void)0, 0); + +#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \ + do{ \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \ + } while((void)0, 0); + +#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do{ \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \ + \ + UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + } while((void)0, 0); + +#endif /* __AVX2__ */ + +#else /* __AVX512F__ */ + +#include + +#define ror64(x, n) _mm512_ror_epi64((x), (n)) + +static BLAKE2_INLINE __m512i muladd(__m512i x, __m512i y) +{ + __m512i z = _mm512_mul_epu32(x, y); + return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = muladd(A0, B0); \ + A1 = muladd(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 32); \ + D1 = ror64(D1, 32); \ +\ + C0 = muladd(C0, D0); \ + C1 = muladd(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 24); \ + B1 = ror64(B1, 24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = muladd(A0, B0); \ + A1 = muladd(A1, B1); \ +\ + D0 = _mm512_xor_si512(D0, A0); \ + D1 = _mm512_xor_si512(D1, A1); \ +\ + D0 = ror64(D0, 16); \ + D1 = ror64(D1, 16); \ +\ + C0 = muladd(C0, D0); \ + C1 = muladd(C1, D1); \ +\ + B0 = _mm512_xor_si512(B0, C0); \ + B1 = _mm512_xor_si512(B1, C1); \ +\ + B0 = ror64(B0, 63); \ + B1 = ror64(B1, 63); \ + } while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \ + B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \ +\ + C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \ + C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \ +\ + D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \ + D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ +\ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#define SWAP_HALVES(A0, A1) \ + do { \ + __m512i t0, t1; \ + t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \ + t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \ + A0 = t0; \ + A1 = t1; \ + } while((void)0, 0) + +#define SWAP_QUARTERS(A0, A1) \ + do { \ + SWAP_HALVES(A0, A1); \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + } while((void)0, 0) + +#define UNSWAP_QUARTERS(A0, A1) \ + do { \ + A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \ + A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \ + SWAP_HALVES(A0, A1); \ + } while((void)0, 0) + +#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \ + do { \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + SWAP_HALVES(A0, B0); \ + SWAP_HALVES(C0, D0); \ + SWAP_HALVES(A1, B1); \ + SWAP_HALVES(C1, D1); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + SWAP_QUARTERS(A0, A1); \ + SWAP_QUARTERS(B0, B1); \ + SWAP_QUARTERS(C0, C1); \ + SWAP_QUARTERS(D0, D1); \ + BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \ + UNSWAP_QUARTERS(A0, A1); \ + UNSWAP_QUARTERS(B0, B1); \ + UNSWAP_QUARTERS(C0, C1); \ + UNSWAP_QUARTERS(D0, D1); \ + } while ((void)0, 0) + +#endif /* __AVX512F__ */ + +#else /* __NEON__ */ + +static BLAKE2_INLINE uint64x2_t fBlaMka(uint64x2_t x, uint64x2_t y) { + const uint64x2_t z = vmull_u32(vmovn_u64(x), vmovn_u64(y)); + return vaddq_u64(vaddq_u64(x, y), vaddq_u64(z, z)); +} + +#define vrorq_n_u64_32(x) vreinterpretq_u64_u32(vrev64q_u32(vreinterpretq_u32_u64((x)))) + +#define vrorq_n_u64_24(x) vcombine_u64( \ + vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_low_u64(x)), vreinterpret_u8_u64(vget_low_u64(x)), 3)), \ + vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_high_u64(x)), vreinterpret_u8_u64(vget_high_u64(x)), 3))) + +#define vrorq_n_u64_16(x) vcombine_u64( \ + vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_low_u64(x)), vreinterpret_u8_u64(vget_low_u64(x)), 2)), \ + vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_high_u64(x)), vreinterpret_u8_u64(vget_high_u64(x)), 2))) + +#define vrorq_n_u64_63(x) veorq_u64(vaddq_u64(x, x), vshrq_n_u64(x, 63)) + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ +do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = veorq_u64(D0, A0); \ + D1 = veorq_u64(D1, A1); \ + \ + D0 = vrorq_n_u64_32(D0); \ + D1 = vrorq_n_u64_32(D1); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = veorq_u64(B0, C0); \ + B1 = veorq_u64(B1, C1); \ + \ + B0 = vrorq_n_u64_24(B0); \ + B1 = vrorq_n_u64_24(B1); \ +} while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ +do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = veorq_u64(D0, A0); \ + D1 = veorq_u64(D1, A1); \ + \ + D0 = vrorq_n_u64_16(D0); \ + D1 = vrorq_n_u64_16(D1); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = veorq_u64(B0, C0); \ + B1 = veorq_u64(B1, C1); \ + \ + B0 = vrorq_n_u64_63(B0); \ + B1 = vrorq_n_u64_63(B1); \ +} while ((void)0, 0) + +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + t0 = vextq_u64(B0, B1, 1); \ + t1 = vextq_u64(B1, B0, 1); \ + B0 = t0; B1 = t1; t0 = C0; C0 = C1; C1 = t0; \ + t0 = vextq_u64(D1, D0, 1); t1 = vextq_u64(D0, D1, 1); \ + D0 = t0; D1 = t1; + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + t0 = vextq_u64(B1, B0, 1); \ + t1 = vextq_u64(B0, B1, 1); \ + B0 = t0; B1 = t1; t0 = C0; C0 = C1; C1 = t0; \ + t0 = vextq_u64(D0, D1, 1); t1 = vextq_u64(D1, D0, 1); \ + D0 = t0; D1 = t1; + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ +do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ +} while ((void)0, 0) + +#endif /* __NEON__ */ + +#endif /* BLAKE_ROUND_MKA_OPT_H */ diff --git a/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h new file mode 100644 index 00000000..fb07a969 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/blamka-round-ref.h @@ -0,0 +1,55 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef BLAKE_ROUND_MKA_H +#define BLAKE_ROUND_MKA_H + +#include "../../argon2/blake2/blake2-impl.h" + +/* designed by the Lyra PHC team */ +static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) { + const uint64_t m = UINT64_C(0xFFFFFFFF); + const uint64_t xy = (x & m) * (y & m); + return x + y + 2 * xy; +} + +#define G(a, b, c, d) \ + do { \ + a = fBlaMka(a, b); \ + d = rotr64(d ^ a, 32); \ + c = fBlaMka(c, d); \ + b = rotr64(b ^ c, 24); \ + a = fBlaMka(a, b); \ + d = rotr64(d ^ a, 16); \ + c = fBlaMka(c, d); \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \ + v12, v13, v14, v15) \ + do { \ + G(v0, v4, v8, v12); \ + G(v1, v5, v9, v13); \ + G(v2, v6, v10, v14); \ + G(v3, v7, v11, v15); \ + G(v0, v5, v10, v15); \ + G(v1, v6, v11, v12); \ + G(v2, v7, v8, v13); \ + G(v3, v4, v9, v14); \ + } while ((void)0, 0) + +#endif diff --git a/src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c new file mode 100755 index 00000000..c01261ab --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/argon2_opt/implementation.c @@ -0,0 +1,448 @@ +// +// Created by Haifa Bogdan Adnan on 06/08/2018. +// + +#include +#include +#include +#include + +#include "../../../common/DLLImport.h" +#include "../../argon2/Defs.h" +#include "../../../common/DLLExport.h" + +#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64) || defined(__NEON__)) +#include "blamka-round-opt.h" +#else +#include "blamka-round-ref.h" +#endif + +void copy_block(block *dst, const block *src) { + memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); +} + +void xor_block(block *dst, const block *src) { + int i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] ^= src->v[i]; + } +} + +#ifndef BUILD_REF + +#if defined(__AVX512F__) +static void fill_block(__m512i *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + state[i] = _mm512_xor_si512( + state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + block_XY[i] = _mm512_xor_si512( + state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm512_xor_si512( + state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); + } + } + + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND_1( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 2; ++i) { + BLAKE2_ROUND_2( + state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i], + state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + state[i] = _mm512_xor_si512(state[i], block_XY[i]); + _mm512_storeu_si512((__m512i *)next_block->v + i, state[i]); + } + } + else { + for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { + state[i] = _mm512_xor_si512(state[i], block_XY[i]); + } + } +} +#elif defined(__AVX2__) +static void fill_block(__m256i *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + state[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + block_XY[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); + } + } + + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], + state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); + } + + for (i = 0; i < 4; ++i) { + BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i], + state[16 + i], state[20 + i], state[24 + i], state[28 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + state[i] = _mm256_xor_si256(state[i], block_XY[i]); + _mm256_store_si256((__m256i *)next_block->v + i, state[i]); + } + } + else { + for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { + state[i] = _mm256_xor_si256(state[i], block_XY[i]); + } + } +} +#elif defined(__AVX__) + +#define I2D(x) _mm256_castsi256_pd(x) +#define D2I(x) _mm256_castpd_si256(x) + +static void fill_block(__m128i *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; + unsigned int i; + + __m256i t; + __m256i *s256 = (__m256i *) state, *block256 = (__m256i *) block_XY; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) { + t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \ + I2D(_mm256_loadu_si256((const __m256i *)ref_block->v + i)))); + _mm256_storeu_si256(s256 + i, t); + t = D2I(_mm256_xor_pd(I2D(t), \ + I2D(_mm256_loadu_si256((const __m256i *)next_block->v + i)))); + _mm256_storeu_si256(block256 + i, t); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) { + t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \ + I2D(_mm256_loadu_si256((const __m256i *)ref_block->v + i)))); + _mm256_storeu_si256(s256 + i, t); + _mm256_storeu_si256(block256 + i, t); + } + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], + state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], + state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) { + t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \ + I2D(_mm256_loadu_si256(block256 + i)))); + + _mm256_storeu_si256(s256 + i, t); + _mm256_storeu_si256((__m256i *)next_block->v + i, t); + } + } + else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) { + t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \ + I2D(_mm256_loadu_si256(block256 + i)))); + + _mm256_storeu_si256(s256 + i, t); + } + } + +} +#elif defined(__NEON__) +static void fill_block(uint64x2_t *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + uint64x2_t block_XY[ARGON2_OWORDS_IN_BLOCK]; + uint64x2_t t0, t1; + + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = veorq_u64(state[i], vld1q_u64(ref_block->v + i*2)); + block_XY[i] = veorq_u64(state[i], vld1q_u64(next_block->v + i*2)); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = veorq_u64(state[i], vld1q_u64(ref_block->v + i*2)); + } + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], + state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], + state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = veorq_u64(state[i], block_XY[i]); + vst1q_u64(next_block->v + i*2, state[i]); + } + } + else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = veorq_u64(state[i], block_XY[i]); + } + } +} +#else +static void fill_block(__m128i *state, const block *ref_block, + block *next_block, int with_xor, int keep) { + __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + block_XY[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + } + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], + state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], + state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + if(keep) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128(state[i], block_XY[i]); + _mm_storeu_si128((__m128i *)next_block->v + i, state[i]); + } + } + else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128(state[i], block_XY[i]); + } + } +} +#endif + +#else +static void fill_block(block *prev_block, const block *ref_block, + block *next_block, int with_xor, int keep) { + block block_tmp; + unsigned i; + + xor_block(prev_block, ref_block); + copy_block(&block_tmp, prev_block); + + if (with_xor && next_block != NULL) { + xor_block(&block_tmp, next_block); + } + + /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then + (16,17,..31)... finally (112,113,...127) */ + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND_NOMSG( + prev_block->v[16 * i], prev_block->v[16 * i + 1], prev_block->v[16 * i + 2], + prev_block->v[16 * i + 3], prev_block->v[16 * i + 4], prev_block->v[16 * i + 5], + prev_block->v[16 * i + 6], prev_block->v[16 * i + 7], prev_block->v[16 * i + 8], + prev_block->v[16 * i + 9], prev_block->v[16 * i + 10], prev_block->v[16 * i + 11], + prev_block->v[16 * i + 12], prev_block->v[16 * i + 13], prev_block->v[16 * i + 14], + prev_block->v[16 * i + 15]); + } + + /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then + (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ + for (i = 0; i < 8; i++) { + BLAKE2_ROUND_NOMSG( + prev_block->v[2 * i], prev_block->v[2 * i + 1], prev_block->v[2 * i + 16], + prev_block->v[2 * i + 17], prev_block->v[2 * i + 32], prev_block->v[2 * i + 33], + prev_block->v[2 * i + 48], prev_block->v[2 * i + 49], prev_block->v[2 * i + 64], + prev_block->v[2 * i + 65], prev_block->v[2 * i + 80], prev_block->v[2 * i + 81], + prev_block->v[2 * i + 96], prev_block->v[2 * i + 97], prev_block->v[2 * i + 112], + prev_block->v[2 * i + 113]); + } + + xor_block(prev_block, &block_tmp); + if(keep) + copy_block(next_block, prev_block); +} + +#endif + +DLLEXPORT void *fill_memory_blocks(int threads, Argon2Profile *profile, void *user_data) { + void *memory = user_data; +#ifndef BUILD_REF +#if defined(__AVX512F__) + __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK]; + uint64_t buff_512[8]; +#elif defined(__AVX2__) + __m256i state[ARGON2_HWORDS_IN_BLOCK]; + uint64_t buff_256[4]; +#elif defined(__x86_64__) || defined(_WIN64) + __m128i state[ARGON2_OWORDS_IN_BLOCK]; +#elif defined(__NEON__) + uint64x2_t state[ARGON2_OWORDS_IN_BLOCK]; +#endif +#else + block state_; + block *state = &state_; +#endif + int lane_length = profile->segSize * 4; + int seg_length = profile->segSize; + int suc_idx = profile->succesiveIdxs; + + for(int thr = 0; thr < threads;thr++) { + block *ref_block = NULL, *curr_block = NULL; + + int32_t ref_idx = 0; + int32_t cur_idx = 0; + int32_t prev_idx = 0; + int32_t seg_type = 0; + int32_t idx = 0; + int32_t keep = 1; + int32_t with_xor = 0; + + block *blocks = (block *)((uint8_t*)memory + thr * profile->memSize); + + int32_t *address = profile->blockRefs; + + for(uint32_t s = 0; s < profile->segCount; s++) { + cur_idx = profile->segments[s * 3]; + prev_idx = profile->segments[s * 3 + 1]; + seg_type = profile->segments[s * 3 + 2]; + keep = 1; + with_xor = (s >= profile->thrCost * 4) ? 1 : 0; + + idx = (s < profile->thrCost) ? 2 : 0; + + int32_t lane = s % profile->thrCost; + int32_t slice = (s / profile->thrCost) % 4; + int32_t pass = (s / profile->thrCost) / 4; + + memcpy(state, (void *) (blocks + prev_idx), ARGON2_BLOCK_SIZE); + + if(seg_type == 0) { + if(s < profile->thrCost) + address = &profile->blockRefs[(s * (profile->segSize - 2)) * 3]; + else + address = &profile->blockRefs[(profile->thrCost * (profile->segSize - 2) + (s - profile->thrCost) * profile->segSize) * 3]; + } + + for (int i = idx; i < seg_length; ++i, cur_idx ++) { + if (seg_type == 1) { // data dependent addressing +#ifndef BUILD_REF +#if defined(__AVX512F__) + _mm512_storeu_si512(buff_512, state[0]); + uint64_t pseudo_rand = buff_512[0]; +#elif defined(__AVX2__) + _mm256_storeu_si256(buff_256, state[0]); + uint64_t pseudo_rand = buff_256[0]; +#elif defined(__x86_64__) || defined(_WIN64) + uint64_t pseudo_rand = _mm_cvtsi128_si64(state[0]); +#elif defined(__NEON__) + uint64_t pseudo_rand = 0; + vst1q_lane_u64(&pseudo_rand, state[0], 0); +#endif +#else + uint64_t pseudo_rand = state->v[0]; +#endif + uint64_t ref_lane = ((pseudo_rand >> 32)) % profile->thrCost; + uint32_t reference_area_size = 0; + if(pass > 0) { + if (lane == ref_lane) { + reference_area_size = lane_length - seg_length + i - 1; + } else { + reference_area_size = lane_length - seg_length + ((i == 0) ? (-1) : 0); + } + } + else { + if (lane == ref_lane) { + reference_area_size = slice * seg_length + i - 1; + } else { + reference_area_size = slice * seg_length + ((i == 0) ? (-1) : 0); + } + } + uint64_t relative_position = pseudo_rand & 0xFFFFFFFF; + relative_position = relative_position * relative_position >> 32; + + relative_position = reference_area_size - 1 - + (reference_area_size * relative_position >> 32); + + ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length; + } + else { + ref_idx = address[1]; + if(suc_idx == 0) + cur_idx = address[0]; + keep = address[2]; + + address += 3; + } + + ref_block = blocks + ref_idx; + curr_block = blocks + cur_idx; + + fill_block(state, ref_block, curr_block, with_xor, keep); + } + } + + uint32_t dst = -1; + for(; address < (profile->blockRefs + profile->blockRefsSize * 3); address += 3) { + if (address[2] == -1) { + curr_block = blocks + address[0]; + ref_block = blocks + address[1]; + dst = address[0]; + xor_block(curr_block, ref_block); + } + } + if(dst != -1) + copy_block(blocks, blocks + dst); + else + copy_block(blocks, state); + } + + return memory; +} + diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format new file mode 100755 index 00000000..06ea346a --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.clang-format @@ -0,0 +1,4 @@ +--- +Language: Cpp +BasedOnStyle: Google +... diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore new file mode 100755 index 00000000..0690aa44 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.gitignore @@ -0,0 +1 @@ +cmake_build/ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml new file mode 100755 index 00000000..deafdfa7 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/.travis.yml @@ -0,0 +1,91 @@ +language: c + +sudo: false + +cache: + directories: + - $HOME/cpu_features_archives + +matrix: + include: + - os: linux + compiler: gcc + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: linux + compiler: clang + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: osx + compiler: gcc + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: osx + compiler: clang + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: linux-ppc64le + compiler: gcc + env: + TOOLCHAIN=NATIVE + TARGET=native + - os: linux-ppc64le + compiler: clang + env: + TOOLCHAIN=NATIVE + TARGET=native + # Toolchains for little-endian, 64-bit ARMv8 for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=aarch64-linux-gnu + QEMU_ARCH=aarch64 + # Toolchains for little-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=arm-linux-gnueabihf + QEMU_ARCH=arm + # Toolchains for little-endian, 32-bit ARMv8 for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=armv8l-linux-gnueabihf + QEMU_ARCH=arm + # Toolchains for little-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=arm-linux-gnueabi + QEMU_ARCH=arm + # Toolchains for big-endian, 64-bit ARMv8 for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=aarch64_be-linux-gnu + QEMU_ARCH=DISABLED + # Toolchains for big-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=armeb-linux-gnueabihf + QEMU_ARCH=DISABLED + # Toolchains for big-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems + - os: linux + env: + TOOLCHAIN=LINARO + TARGET=armeb-linux-gnueabi + QEMU_ARCH=DISABLED + - os: linux + env: + TOOLCHAIN=CODESCAPE + TARGET=mips-mti-linux-gnu + QEMU_ARCH=DISABLED + +script: + - cmake --version + - bash -e -x ./scripts/run_integration.sh diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt new file mode 100755 index 00000000..591c1164 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CMakeLists.txt @@ -0,0 +1,165 @@ +cmake_minimum_required(VERSION 3.0) + +project(CpuFeatures VERSION 0.1.0) + +# Default Build Type to be Release +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING + "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." + FORCE) +endif(NOT CMAKE_BUILD_TYPE) + +# BUILD_TESTING is a standard CMake variable, but we declare it here to make it +# prominent in the GUI. +option(BUILD_TESTING "Enable test (depends on googletest)." OFF) +# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make +# it prominent in the GUI. +option(BUILD_SHARED_LIBS "Build library as shared." OFF) + +# +# library : cpu_features +# + +set(_HDRS + include/cpuinfo_aarch64.h + include/cpuinfo_arm.h + include/cpuinfo_mips.h + include/cpuinfo_ppc.h + include/cpuinfo_x86.h + include/cpu_features_macros.h +) + +add_library(cpu_features + ${_HDRS} + include/internal/bit_utils.h + include/internal/linux_features_aggregator.h + include/internal/cpuid_x86.h + include/internal/filesystem.h + include/internal/hwcaps.h + include/internal/stack_line_reader.h + include/internal/string_view.h + include/cpu_features_macros.h + src/linux_features_aggregator.c + src/cpuid_x86_clang_gcc.c + src/cpuid_x86_msvc.c + src/cpuinfo_aarch64.c + src/cpuinfo_arm.c + src/cpuinfo_mips.c + src/cpuinfo_ppc.c + src/cpuinfo_x86.c + src/filesystem.c + src/hwcaps.c + src/stack_line_reader.c + src/string_view.c +) + +target_include_directories(cpu_features + PUBLIC + $ + $ + PRIVATE + include/internal +) +set_target_properties(cpu_features PROPERTIES PUBLIC_HEADER "${_HDRS}") +target_compile_definitions(cpu_features + PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024) +target_link_libraries(cpu_features PUBLIC ${CMAKE_DL_LIBS}) + +# The use of shared libraries is discouraged. +# For API / ABI compatibility reasons, it is recommended to build and use +# cpu_features in a subdirectory of your project or as an embedded dependency. +if(BUILD_SHARED_LIBS) + set_property(TARGET cpu_features PROPERTY POSITION_INDEPENDENT_CODE ON) +endif() +add_library(CpuFeature::cpu_features ALIAS cpu_features) + +# +# program : list_cpu_features +# + +add_executable(list_cpu_features src/utils/list_cpu_features.c) +target_link_libraries(list_cpu_features PRIVATE cpu_features) +add_executable(CpuFeature::list_cpu_features ALIAS list_cpu_features) + +# +# tests +# + +include(CTest) +if(BUILD_TESTING) + # Automatically incorporate googletest into the CMake Project if target not + # found. + if(NOT TARGET gtest OR NOT TARGET gmock_main) + # Download and unpack googletest at configure time. + configure_file( + cmake/googletest.CMakeLists.txt.in + googletest-download/CMakeLists.txt + ) + + execute_process( + COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download) + + if(result) + message(FATAL_ERROR "CMake step for googletest failed: ${result}") + endif() + + execute_process( + COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download) + + if(result) + message(FATAL_ERROR "Build step for googletest failed: ${result}") + endif() + + # Prevent overriding the parent project's compiler/linker settings on + # Windows. + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + + # Add googletest directly to our build. This defines the gtest and + # gtest_main targets. + add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src + ${CMAKE_BINARY_DIR}/googletest-build + EXCLUDE_FROM_ALL) + endif() + + add_subdirectory(test) +endif() + +# +# Install +# + +include(GNUInstallDirs) +install(TARGETS cpu_features list_cpu_features + EXPORT CpuFeaturesTargets + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cpu_features + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) +install(EXPORT CpuFeaturesTargets + NAMESPACE CpuFeatures:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures + COMPONENT Devel +) +include(CMakePackageConfigHelpers) +configure_package_config_file(cmake/CpuFeaturesConfig.cmake.in + "${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures" + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO +) +write_basic_package_version_file( + "${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake" + COMPATIBILITY SameMajorVersion +) +install( + FILES + "${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake" + "${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures" + COMPONENT Devel +) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md new file mode 100755 index 00000000..c980350f --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE b/src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE new file mode 100755 index 00000000..7a4a3ea2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md b/src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md new file mode 100755 index 00000000..039175b3 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/README.md @@ -0,0 +1,165 @@ +# cpu_features [![Build Status](https://travis-ci.org/google/cpu_features.svg?branch=master)](https://travis-ci.org/google/cpu_features) [![Build status](https://ci.appveyor.com/api/projects/status/46d1owsj7n8dsylq/branch/master?svg=true)](https://ci.appveyor.com/project/gchatelet/cpu-features/branch/master) + +A cross-platform C library to retrieve CPU features (such as available +instructions) at runtime. + +## Table of Contents + +- [Design Rationale](#rationale) +- [Code samples](#codesample) +- [Running sample code](#usagesample) +- [What's supported](#support) +- [License](#license) +- [Build with cmake](#cmake) + + +## Design Rationale + +- **Simple to use.** See the snippets below for examples. +- **Extensible.** Easy to add missing features or architectures. +- **Compatible with old compilers** and available on many architectures so it + can be used widely. To ensure that cpu_features works on as many platforms + as possible, we implemented it in a highly portable version of C: C99. +- **Sandbox-compatible.** The library uses a variety of strategies to cope + with sandboxed environments or when `cpuid` is unavailable. This is useful + when running integration tests in hermetic environments. +- **Thread safe, no memory allocation, and raises no exceptions.** + cpu_features is suitable for implementing fundamental libc functions like + `malloc`, `memcpy`, and `memcmp`. +- **Unit tested.** + + +### Checking features at runtime + +Here's a simple example that executes a codepath if the CPU supports both the +AES and the SSE4.2 instruction sets: + +```c +#include "cpuinfo_x86.h" + +static const X86Features features = GetX86Info().features; + +void Compute(void) { + if (features.aes && features.sse4_2) { + // Run optimized code. + } else { + // Run standard code. + } +} +``` + +### Caching for faster evaluation of complex checks + +If you wish, you can read all the features at once into a global variable, and +then query for the specific features you care about. Below, we store all the ARM +features and then check whether AES and NEON are supported. + +```c +#include +#include "cpuinfo_arm.h" + +static const ArmFeatures features = GetArmInfo().features; +static const bool has_aes_and_neon = features.aes && features.neon; + +// use has_aes_and_neon. +``` + +This is a good approach to take if you're checking for combinations of features +when using a compiler that is slow to extract individual bits from bit-packed +structures. + +### Checking compile time flags + +The following code determines whether the compiler was told to use the AVX +instruction set (e.g., `g++ -mavx`) and sets `has_avx` accordingly. + +```c +#include +#include "cpuinfo_x86.h" + +static const X86Features features = GetX86Info().features; +static const bool has_avx = CPU_FEATURES_COMPILED_X86_AVX || features.avx; + +// use has_avx. +``` + +`CPU_FEATURES_COMPILED_X86_AVX` is set to 1 if the compiler was instructed to +use AVX and 0 otherwise, combining compile time and runtime knowledge. + +### Rejecting poor hardware implementations based on microarchitecture + +On x86, the first incarnation of a feature in a microarchitecture might not be +the most efficient (e.g. AVX on Sandy Bridge). We provide a function to retrieve +the underlying microarchitecture so you can decide whether to use it. + +Below, `has_fast_avx` is set to 1 if the CPU supports the AVX instruction +set—but only if it's not Sandy Bridge. + +```c +#include +#include "cpuinfo_x86.h" + +static const X86Info info = GetX86Info(); +static const X86Microarchitecture uarch = GetX86Microarchitecture(&info); +static const bool has_fast_avx = info.features.avx && uarch != INTEL_SNB; + +// use has_fast_avx. +``` + +This feature is currently available only for x86 microarchitectures. + + +### Running sample code + +Building `cpu_features` brings a small executable to test the library. + +```shell + % ./build/list_cpu_features +arch : x86 +brand : Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz +family : 6 (0x06) +model : 45 (0x2D) +stepping : 7 (0x07) +uarch : INTEL_SNB +flags : aes,avx,cx16,smx,sse4_1,sse4_2,ssse3 +``` + +```shell +% ./build/list_cpu_features --json +{"arch":"x86","brand":" Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz","family":6,"model":45,"stepping":7,"uarch":"INTEL_SNB","flags":["aes","avx","cx16","smx","sse4_1","sse4_2","ssse3"]} +``` + + +## What's supported + +| | x86³ | ARM | AArch64 | MIPSel | POWER | +|---------|:----:|:-------:|:-------:|:------:|:-------:| +| Android | yes² | yes¹ | yes¹ | yes¹ | N/A | +| iOS | N/A | not yet | not yet | N/A | N/A | +| Linux | yes² | yes¹ | yes¹ | yes¹ | yes¹ | +| MacOs | yes² | N/A | not yet | N/A | no | +| Windows | yes² | not yet | not yet | N/A | N/A | + +1. **Features revealed from Linux.** We gather data from several sources + depending on availability: + + from glibc's + [getauxval](https://www.gnu.org/software/libc/manual/html_node/Auxiliary-Vector.html) + + by parsing `/proc/self/auxv` + + by parsing `/proc/cpuinfo` +2. **Features revealed from CPU.** features are retrieved by using the `cpuid` + instruction. +3. **Microarchitecture detection.** On x86 some features are not always + implemented efficiently in hardware (e.g. AVX on Sandybridge). Exposing the + microarchitecture allows the client to reject particular microarchitectures. + + + +## License + +The cpu_features library is licensed under the terms of the Apache license. +See [LICENSE](LICENSE) for more information. + + +## Build with CMake + +Please check the [CMake build instructions](cmake/README.md). diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE b/src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE new file mode 100755 index 00000000..8ea8a8b6 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/WORKSPACE @@ -0,0 +1,7 @@ +# ===== googletest ===== + +git_repository( + name = "com_google_googletest", + remote = "https://github.com/google/googletest.git", + commit = "c3f65335b79f47b05629e79a54685d899bc53b93", +) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml b/src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml new file mode 100755 index 00000000..f18635a3 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/appveyor.yml @@ -0,0 +1,24 @@ +version: '{build}' +shallow_clone: true + +platform: x64 + +environment: + matrix: + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + CMAKE_GENERATOR: "Visual Studio 15 2017 Win64" + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 + CMAKE_GENERATOR: "Visual Studio 14 2015 Win64" + +matrix: + fast_finish: true + +before_build: + - cmake --version + - cmake -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON -H. -Bcmake_build -G "%CMAKE_GENERATOR%" + +build_script: + - cmake --build cmake_build --config Debug --target ALL_BUILD + +test_script: + - cmake --build cmake_build --config Debug --target RUN_TESTS diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in new file mode 100755 index 00000000..e0bf10e4 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/CpuFeaturesConfig.cmake.in @@ -0,0 +1,3 @@ +# CpuFeatures CMake configuration file + +include("${CMAKE_CURRENT_LIST_DIR}/CpuFeaturesTargets.cmake") diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md new file mode 100755 index 00000000..b6baeaa2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/README.md @@ -0,0 +1,28 @@ +# CMake build instructions + +## Recommended usage : Incorporating cpu_features into a CMake project + + For API / ABI compatibility reasons, it is recommended to build and use + cpu_features in a subdirectory of your project or as an embedded dependency. + + This is similar to the recommended usage of the googletest framework + ( https://github.com/google/googletest/blob/master/googletest/README.md ) + + Build and use step-by-step + + + 1- Download cpu_features and copy it in a sub-directory in your project. + or add cpu_features as a git-submodule in your project + + 2- You can then use the cmake command `add_subdirectory()` to include + cpu_features directly and use the `cpu_features` target in your project. + + 3- Add the `cpu_features` target to the `target_link_libraries()` section of + your executable or of your library. + +## Enabling tests + + CMake default options for cpu_features is Release built type with tests + disabled. To enable testing set cmake `BUILD_TESTING` variable to `ON`, + [.travis.yml](../.travis.yml) and [appveyor.yml](../appveyor.yml) have up to + date examples. diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in new file mode 100755 index 00000000..d60a33e9 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/googletest.CMakeLists.txt.in @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 2.8.2) + +project(googletest-download NONE) + +include(ExternalProject) +ExternalProject_Add(googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG master + SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src" + BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake new file mode 100755 index 00000000..dcfab7cf --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/cmake/mips32-linux-gcc.cmake @@ -0,0 +1,34 @@ +set(CMAKE_SYSTEM_NAME "Linux") +set(CMAKE_SYSTEM_PROCESSOR "mips32") + +if (ENABLE_DSPR2 AND ENABLE_MSA) + message(FATAL_ERROR "ENABLE_DSPR2 and ENABLE_MSA cannot be combined.") +endif () + +if (ENABLE_DSPR2) + set(HAVE_DSPR2 1 CACHE BOOL "" FORCE) + set(MIPS_CFLAGS "-mdspr2") + set(MIPS_CXXFLAGS "-mdspr2") +elseif (ENABLE_MSA) + set(HAVE_MSA 1 CACHE BOOL "" FORCE) + set(MIPS_CFLAGS "-mmsa") + set(MIPS_CXXFLAGS "-mmsa") +endif () + +if ("${MIPS_CPU}" STREQUAL "") + set(MIPS_CFLAGS "${MIPS_CFLAGS} -mips32r2") + set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} -mips32r2") +elseif ("${MIPS_CPU}" STREQUAL "p5600") + set(P56_FLAGS "-mips32r5 -mload-store-pairs -msched-weight -mhard-float -mfp64") + set(MIPS_CFLAGS "${MIPS_CFLAGS} ${P56_FLAGS}") + set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} ${P56_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "-mfp64 ${CMAKE_EXE_LINKER_FLAGS}") +endif () + +set(CMAKE_C_COMPILER ${CROSS}gcc) +set(CMAKE_CXX_COMPILER ${CROSS}g++) +set(AS_EXECUTABLE ${CROSS}as) +set(CMAKE_C_COMPILER_ARG1 "-EL ${MIPS_CFLAGS}") +set(CMAKE_CXX_COMPILER_ARG1 "-EL ${MIPS_CXXFLAGS}") + +set(THREADS_PTHREAD_ARG "2" CACHE STRING "Forcibly set by CMakeLists.txt." FORCE) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h new file mode 100755 index 00000000..f8220e1b --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpu_features_macros.h @@ -0,0 +1,125 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_ +#define CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_ + +//////////////////////////////////////////////////////////////////////////////// +// Architectures +//////////////////////////////////////////////////////////////////////////////// + +#if ((defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__x86_64__)) && \ + !defined(__pnacl__) && !defined(__CLR_VER)) +#define CPU_FEATURES_ARCH_X86 +#endif + +#if (defined(__arm__) || defined(_M_ARM)) +#define CPU_FEATURES_ARCH_ARM +#endif + +#if defined(__aarch64__) +#define CPU_FEATURES_ARCH_AARCH64 +#endif + +#if (defined(CPU_FEATURES_ARCH_AARCH64) || defined(CPU_FEATURES_ARCH_ARM)) +#define CPU_FEATURES_ARCH_ANY_ARM +#endif + +#if defined(__mips__) +#define CPU_FEATURES_ARCH_MIPS +#endif + +#if defined(__powerpc__) +#define CPU_FEATURES_ARCH_PPC +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Os +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__linux__) +#define CPU_FEATURES_OS_LINUX_OR_ANDROID +#endif + +#if defined(__ANDROID__) +#define CPU_FEATURES_OS_ANDROID +#endif + +#if (defined(_WIN64) || defined(_WIN32)) +#define CPU_FEATURES_OS_WINDOWS +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Compilers +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__clang__) +#define CPU_FEATURES_COMPILER_CLANG +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#define CPU_FEATURES_COMPILER_GCC +#endif + +#if defined(_MSC_VER) +#define CPU_FEATURES_COMPILER_MSC +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Cpp +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__cplusplus) +#define CPU_FEATURES_START_CPP_NAMESPACE \ + namespace cpu_features { \ + extern "C" { +#define CPU_FEATURES_END_CPP_NAMESPACE \ + } \ + } +#else +#define CPU_FEATURES_START_CPP_NAMESPACE +#define CPU_FEATURES_END_CPP_NAMESPACE +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Compiler flags +//////////////////////////////////////////////////////////////////////////////// + +// Use the following to check if a feature is known to be available at compile +// time. See README.md for an example. +#if defined(CPU_FEATURES_ARCH_X86) +#define CPU_FEATURES_COMPILED_X86_AES defined(__AES__) +#define CPU_FEATURES_COMPILED_X86_F16C defined(__F16C__) +#define CPU_FEATURES_COMPILED_X86_BMI defined(__BMI__) +#define CPU_FEATURES_COMPILED_X86_BMI2 defined(__BMI2__) +#define CPU_FEATURES_COMPILED_X86_SSE (defined(__SSE__) || (_M_IX86_FP >= 1)) +#define CPU_FEATURES_COMPILED_X86_SSE2 (defined(__SSE2__) || (_M_IX86_FP >= 2)) +#define CPU_FEATURES_COMPILED_X86_SSE3 defined(__SSE3__) +#define CPU_FEATURES_COMPILED_X86_SSSE3 defined(__SSSE3__) +#define CPU_FEATURES_COMPILED_X86_SSE4_1 defined(__SSE4_1__) +#define CPU_FEATURES_COMPILED_X86_SSE4_2 defined(__SSE4_2__) +#define CPU_FEATURES_COMPILED_X86_AVX defined(__AVX__) +#define CPU_FEATURES_COMPILED_x86_AVX2 defined(__AVX2__) +#endif + +#if defined(CPU_FEATURES_ARCH_ANY_ARM) +#define CPU_FEATURES_COMPILED_ANY_ARM_NEON defined(__ARM_NEON__) +#endif + +#if defined(CPU_FEATURES_ARCH_MIPS) +#define CPU_FEATURES_COMPILED_MIPS_MSA defined(__mips_msa) +#endif + +#endif // CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h new file mode 100755 index 00000000..b8826ed4 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_aarch64.h @@ -0,0 +1,65 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_ + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int fp : 1; // Floating-point. + int asimd : 1; // Advanced SIMD. + int aes : 1; // Hardware-accelerated Advanced Encryption Standard. + int pmull : 1; // Polynomial multiply long. + int sha1 : 1; // Hardware-accelerated SHA1. + int sha2 : 1; // Hardware-accelerated SHA2-256. + int crc32 : 1; // Hardware-accelerated CRC-32. + + // Make sure to update Aarch64FeaturesEnum below if you add a field here. +} Aarch64Features; + +typedef struct { + Aarch64Features features; + int implementer; + int variant; + int part; + int revision; +} Aarch64Info; + +Aarch64Info GetAarch64Info(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + AARCH64_FP, + AARCH64_ASIMD, + AARCH64_AES, + AARCH64_PMULL, + AARCH64_SHA1, + AARCH64_SHA2, + AARCH64_CRC32, + AARCH64_LAST_, +} Aarch64FeaturesEnum; + +int GetAarch64FeaturesEnumValue(const Aarch64Features* features, + Aarch64FeaturesEnum value); + +const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h new file mode 100755 index 00000000..7a94bb08 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_arm.h @@ -0,0 +1,80 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_ + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int vfp : 1; // Vector Floating Point. + int iwmmxt : 1; // Intel Wireless MMX Technology. + int neon : 1; // Advanced SIMD. + int vfpv3 : 1; // VFP version 3 + int vfpv3d16 : 1; // VFP version 3 with 16 D-registers + int vfpv4 : 1; // VFP version 4 with fast context switching + int idiva : 1; // SDIV and UDIV hardware division in ARM mode. + int idivt : 1; // SDIV and UDIV hardware division in Thumb mode. + int aes : 1; // Hardware-accelerated Advanced Encryption Standard. + int pmull : 1; // Polynomial multiply long. + int sha1 : 1; // Hardware-accelerated SHA1. + int sha2 : 1; // Hardware-accelerated SHA2-256. + int crc32 : 1; // Hardware-accelerated CRC-32. + + // Make sure to update ArmFeaturesEnum below if you add a field here. +} ArmFeatures; + +typedef struct { + ArmFeatures features; + int implementer; + int architecture; + int variant; + int part; + int revision; +} ArmInfo; + +// TODO(user): Add macros to know which features are present at compile +// time. + +ArmInfo GetArmInfo(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + ARM_VFP, + ARM_IWMMXT, + ARM_NEON, + ARM_VFPV3, + ARM_VFPV3D16, + ARM_VFPV4, + ARM_IDIVA, + ARM_IDIVT, + ARM_AES, + ARM_PMULL, + ARM_SHA1, + ARM_SHA2, + ARM_CRC32, + ARM_LAST_, +} ArmFeaturesEnum; + +int GetArmFeaturesEnumValue(const ArmFeatures* features, ArmFeaturesEnum value); + +const char* GetArmFeaturesEnumName(ArmFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h new file mode 100755 index 00000000..48c23a16 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_mips.h @@ -0,0 +1,53 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_ + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int msa : 1; // MIPS SIMD Architecture + // https://www.mips.com/products/architectures/ase/simd/ + int eva : 1; // Enhanced Virtual Addressing + // https://www.mips.com/products/architectures/mips64/ + + // Make sure to update MipsFeaturesEnum below if you add a field here. +} MipsFeatures; + +typedef struct { + MipsFeatures features; +} MipsInfo; + +MipsInfo GetMipsInfo(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + MIPS_MSA, + MIPS_EVA, + MIPS_LAST_, +} MipsFeaturesEnum; + +int GetMipsFeaturesEnumValue(const MipsFeatures* features, + MipsFeaturesEnum value); + +const char* GetMipsFeaturesEnumName(MipsFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h new file mode 100755 index 00000000..654155da --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_ppc.h @@ -0,0 +1,141 @@ +// Copyright 2018 IBM +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_ + +#include "cpu_features_macros.h" +#include "internal/hwcaps.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + int ppc32 : 1; + int ppc64 : 1; + int ppc601 : 1; + int altivec : 1; + int fpu : 1; + int mmu : 1; + int mac_4xx : 1; + int unifiedcache : 1; + int spe : 1; + int efpsingle : 1; + int efpdouble : 1; + int no_tb : 1; + int power4 : 1; + int power5 : 1; + int power5plus : 1; + int cell : 1; + int booke : 1; + int smt : 1; + int icachesnoop : 1; + int arch205 : 1; + int pa6t : 1; + int dfp : 1; + int power6ext : 1; + int arch206 : 1; + int vsx : 1; + int pseries_perfmon_compat : 1; + int truele : 1; + int ppcle : 1; + int arch207 : 1; + int htm : 1; + int dscr : 1; + int ebb : 1; + int isel : 1; + int tar : 1; + int vcrypto : 1; + int htm_nosc : 1; + int arch300 : 1; + int ieee128 : 1; + int darn : 1; + int scv : 1; + int htm_no_suspend : 1; + + // Make sure to update PPCFeaturesEnum below if you add a field here. +} PPCFeatures; + +typedef struct { + PPCFeatures features; +} PPCInfo; + +// This function is guaranteed to be malloc, memset and memcpy free. +PPCInfo GetPPCInfo(void); + +typedef struct { + char platform[64]; // 0 terminated string + char model[64]; // 0 terminated string + char machine[64]; // 0 terminated string + char cpu[64]; // 0 terminated string + PlatformType type; +} PPCPlatformStrings; + +PPCPlatformStrings GetPPCPlatformStrings(void); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + PPC_32, /* 32 bit mode execution */ + PPC_64, /* 64 bit mode execution */ + PPC_601_INSTR, /* Old POWER ISA */ + PPC_HAS_ALTIVEC, /* SIMD Unit*/ + PPC_HAS_FPU, /* Floating Point Unit */ + PPC_HAS_MMU, /* Memory management unit */ + PPC_HAS_4xxMAC, + PPC_UNIFIED_CACHE, /* Unified instruction and data cache */ + PPC_HAS_SPE, /* Signal processing extention unit */ + PPC_HAS_EFP_SINGLE, /* SPE single precision fpu */ + PPC_HAS_EFP_DOUBLE, /* SPE double precision fpu */ + PPC_NO_TB, /* No timebase */ + PPC_POWER4, + PPC_POWER5, + PPC_POWER5_PLUS, + PPC_CELL, /* Cell broadband engine */ + PPC_BOOKE, /* Embedded ISA */ + PPC_SMT, /* Simultaneous multi-threading */ + PPC_ICACHE_SNOOP, + PPC_ARCH_2_05, /* ISA 2.05 - POWER6 */ + PPC_PA6T, /* PA Semi 6T core ISA */ + PPC_HAS_DFP, /* Decimal floating point unit */ + PPC_POWER6_EXT, + PPC_ARCH_2_06, /* ISA 2.06 - POWER7 */ + PPC_HAS_VSX, /* Vector-scalar extension */ + PPC_PSERIES_PERFMON_COMPAT, /* Set of backwards compatibile performance + monitoring events */ + PPC_TRUE_LE, + PPC_PPC_LE, + PPC_ARCH_2_07, /* ISA 2.07 - POWER8 */ + PPC_HTM, /* Hardware Transactional Memory */ + PPC_DSCR, /* Data stream control register */ + PPC_EBB, /* Event base branching */ + PPC_ISEL, /* Integer select instructions */ + PPC_TAR, /* Target address register */ + PPC_VEC_CRYPTO, /* Vector cryptography instructions */ + PPC_HTM_NOSC, /* Transactions aborted when syscall made*/ + PPC_ARCH_3_00, /* ISA 3.00 - POWER9 */ + PPC_HAS_IEEE128, /* VSX IEEE Binary Float 128-bit */ + PPC_DARN, /* Deliver a random number instruction */ + PPC_SCV, /* scv syscall */ + PPC_HTM_NO_SUSPEND, /* TM w/out suspended state */ + PPC_LAST_, +} PPCFeaturesEnum; + +int GetPPCFeaturesEnumValue(const PPCFeatures* features, PPCFeaturesEnum value); + +const char* GetPPCFeaturesEnumName(PPCFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h new file mode 100755 index 00000000..0123ddbe --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/cpuinfo_x86.h @@ -0,0 +1,154 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_X86_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_X86_H_ + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// See https://en.wikipedia.org/wiki/CPUID for a list of x86 cpu features. +typedef struct { + int aes : 1; + int erms : 1; + int f16c : 1; + int fma3 : 1; + int vpclmulqdq : 1; + int bmi1 : 1; + int bmi2 : 1; + + int ssse3 : 1; + int sse4_1 : 1; + int sse4_2 : 1; + + int avx : 1; + int avx2 : 1; + + int avx512f : 1; + int avx512cd : 1; + int avx512er : 1; + int avx512pf : 1; + int avx512bw : 1; + int avx512dq : 1; + int avx512vl : 1; + int avx512ifma : 1; + int avx512vbmi : 1; + int avx512vbmi2 : 1; + int avx512vnni : 1; + int avx512bitalg : 1; + int avx512vpopcntdq : 1; + int avx512_4vnniw : 1; + int avx512_4vbmi2 : 1; + + int smx : 1; + int sgx : 1; + int cx16 : 1; // aka. CMPXCHG16B + + // Make sure to update X86FeaturesEnum below if you add a field here. +} X86Features; + +typedef struct { + X86Features features; + int family; + int model; + int stepping; + char vendor[13]; // 0 terminated string +} X86Info; + +// Calls cpuid and returns an initialized X86info. +// This function is guaranteed to be malloc, memset and memcpy free. +X86Info GetX86Info(void); + +typedef enum { + X86_UNKNOWN, + INTEL_CORE, // CORE + INTEL_PNR, // PENRYN + INTEL_NHM, // NEHALEM + INTEL_ATOM_BNL, // BONNELL + INTEL_WSM, // WESTMERE + INTEL_SNB, // SANDYBRIDGE + INTEL_IVB, // IVYBRIDGE + INTEL_ATOM_SMT, // SILVERMONT + INTEL_HSW, // HASWELL + INTEL_BDW, // BROADWELL + INTEL_SKL, // SKYLAKE + INTEL_ATOM_GMT, // GOLDMONT + INTEL_KBL, // KABY LAKE + INTEL_CFL, // COFFEE LAKE + INTEL_CNL, // CANNON LAKE + AMD_HAMMER, // K8 + AMD_K10, // K10 + AMD_BOBCAT, // K14 + AMD_BULLDOZER, // K15 + AMD_JAGUAR, // K16 + AMD_ZEN, // K17 +} X86Microarchitecture; + +// Returns the underlying microarchitecture by looking at X86Info's vendor, +// family and model. +X86Microarchitecture GetX86Microarchitecture(const X86Info* info); + +// Calls cpuid and fills the brand_string. +// - brand_string *must* be of size 49 (beware of array decaying). +// - brand_string will be zero terminated. +// - This function calls memcpy. +void FillX86BrandString(char brand_string[49]); + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +typedef enum { + X86_AES, + X86_ERMS, + X86_F16C, + X86_FMA3, + X86_VPCLMULQDQ, + X86_BMI1, + X86_BMI2, + X86_SSSE3, + X86_SSE4_1, + X86_SSE4_2, + X86_AVX, + X86_AVX2, + X86_AVX512F, + X86_AVX512CD, + X86_AVX512ER, + X86_AVX512PF, + X86_AVX512BW, + X86_AVX512DQ, + X86_AVX512VL, + X86_AVX512IFMA, + X86_AVX512VBMI, + X86_AVX512VBMI2, + X86_AVX512VNNI, + X86_AVX512BITALG, + X86_AVX512VPOPCNTDQ, + X86_AVX512_4VNNIW, + X86_AVX512_4VBMI2, + X86_SMX, + X86_SGX, + X86_CX16, + X86_LAST_, +} X86FeaturesEnum; + +int GetX86FeaturesEnumValue(const X86Features* features, X86FeaturesEnum value); + +const char* GetX86FeaturesEnumName(X86FeaturesEnum); + +const char* GetX86MicroarchitectureName(X86Microarchitecture); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_X86_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h new file mode 100755 index 00000000..75f0cdd5 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/bit_utils.h @@ -0,0 +1,39 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ + +#include +#include +#include +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +inline static bool IsBitSet(uint32_t reg, uint32_t bit) { + return (reg >> bit) & 0x1; +} + +inline static uint32_t ExtractBitRange(uint32_t reg, uint32_t msb, + uint32_t lsb) { + const uint64_t bits = msb - lsb + 1; + const uint64_t mask = (1ULL << bits) - 1ULL; + assert(msb >= lsb); + return (reg >> lsb) & mask; +} + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h new file mode 100755 index 00000000..9dcee0de --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/cpuid_x86.h @@ -0,0 +1,37 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_ + +#include + +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// A struct to hold the result of a call to cpuid. +typedef struct { + uint32_t eax, ebx, ecx, edx; +} Leaf; + +// Retrieves the leaf for a particular cpuid. +Leaf CpuId(uint32_t leaf_id); + +// Returns the eax value of the XCR0 register. +uint32_t GetXCR0Eax(void); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h new file mode 100755 index 00000000..33788813 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/filesystem.h @@ -0,0 +1,38 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// An interface for the filesystem that allows mocking the filesystem in +// unittests. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_ + +#include +#include +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// Same as linux "open(filename, O_RDONLY)", retries automatically on EINTR. +int CpuFeatures_OpenFile(const char* filename); + +// Same as linux "read(file_descriptor, buffer, buffer_size)", retries +// automatically on EINTR. +int CpuFeatures_ReadFile(int file_descriptor, void* buffer, size_t buffer_size); + +// Same as linux "close(file_descriptor)". +void CpuFeatures_CloseFile(int file_descriptor); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h new file mode 100755 index 00000000..830cde31 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/hwcaps.h @@ -0,0 +1,131 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Interface to retrieve hardware capabilities. It relies on Linux's getauxval +// or `/proc/self/auxval` under the hood. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_ + +#include +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// To avoid depending on the linux kernel we reproduce the architecture specific +// constants here. + +// http://elixir.free-electrons.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h +#define AARCH64_HWCAP_FP (1UL << 0) +#define AARCH64_HWCAP_ASIMD (1UL << 1) +#define AARCH64_HWCAP_AES (1UL << 3) +#define AARCH64_HWCAP_PMULL (1UL << 4) +#define AARCH64_HWCAP_SHA1 (1UL << 5) +#define AARCH64_HWCAP_SHA2 (1UL << 6) +#define AARCH64_HWCAP_CRC32 (1UL << 7) + +// http://elixir.free-electrons.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h +#define ARM_HWCAP_VFP (1UL << 6) +#define ARM_HWCAP_IWMMXT (1UL << 9) +#define ARM_HWCAP_NEON (1UL << 12) +#define ARM_HWCAP_VFPV3 (1UL << 13) +#define ARM_HWCAP_VFPV3D16 (1UL << 14) +#define ARM_HWCAP_VFPV4 (1UL << 16) +#define ARM_HWCAP_IDIVA (1UL << 17) +#define ARM_HWCAP_IDIVT (1UL << 18) +#define ARM_HWCAP2_AES (1UL << 0) +#define ARM_HWCAP2_PMULL (1UL << 1) +#define ARM_HWCAP2_SHA1 (1UL << 2) +#define ARM_HWCAP2_SHA2 (1UL << 3) +#define ARM_HWCAP2_CRC32 (1UL << 4) + +// http://elixir.free-electrons.com/linux/latest/source/arch/mips/include/uapi/asm/hwcap.h +#define MIPS_HWCAP_VZ (1UL << 0) +#define MIPS_HWCAP_EVA (1UL << 1) +#define MIPS_HWCAP_HTW (1UL << 2) +#define MIPS_HWCAP_FPU (1UL << 3) +#define MIPS_HWCAP_MIPS32R2 (1UL << 4) +#define MIPS_HWCAP_MIPS32R5 (1UL << 5) +#define MIPS_HWCAP_MIPS64R6 (1UL << 6) +#define MIPS_HWCAP_DSPR1 (1UL << 7) +#define MIPS_HWCAP_DSPR2 (1UL << 8) +#define MIPS_HWCAP_MSA (1UL << 9) + +// http://elixir.free-electrons.com/linux/latest/source/arch/powerpc/include/uapi/asm/cputable.h +#ifndef _UAPI__ASM_POWERPC_CPUTABLE_H +/* in AT_HWCAP */ +#define PPC_FEATURE_32 0x80000000 +#define PPC_FEATURE_64 0x40000000 +#define PPC_FEATURE_601_INSTR 0x20000000 +#define PPC_FEATURE_HAS_ALTIVEC 0x10000000 +#define PPC_FEATURE_HAS_FPU 0x08000000 +#define PPC_FEATURE_HAS_MMU 0x04000000 +#define PPC_FEATURE_HAS_4xxMAC 0x02000000 +#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 +#define PPC_FEATURE_HAS_SPE 0x00800000 +#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 +#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 +#define PPC_FEATURE_NO_TB 0x00100000 +#define PPC_FEATURE_POWER4 0x00080000 +#define PPC_FEATURE_POWER5 0x00040000 +#define PPC_FEATURE_POWER5_PLUS 0x00020000 +#define PPC_FEATURE_CELL 0x00010000 +#define PPC_FEATURE_BOOKE 0x00008000 +#define PPC_FEATURE_SMT 0x00004000 +#define PPC_FEATURE_ICACHE_SNOOP 0x00002000 +#define PPC_FEATURE_ARCH_2_05 0x00001000 +#define PPC_FEATURE_PA6T 0x00000800 +#define PPC_FEATURE_HAS_DFP 0x00000400 +#define PPC_FEATURE_POWER6_EXT 0x00000200 +#define PPC_FEATURE_ARCH_2_06 0x00000100 +#define PPC_FEATURE_HAS_VSX 0x00000080 + +#define PPC_FEATURE_PSERIES_PERFMON_COMPAT 0x00000040 + +/* Reserved - do not use 0x00000004 */ +#define PPC_FEATURE_TRUE_LE 0x00000002 +#define PPC_FEATURE_PPC_LE 0x00000001 + +/* in AT_HWCAP2 */ +#define PPC_FEATURE2_ARCH_2_07 0x80000000 +#define PPC_FEATURE2_HTM 0x40000000 +#define PPC_FEATURE2_DSCR 0x20000000 +#define PPC_FEATURE2_EBB 0x10000000 +#define PPC_FEATURE2_ISEL 0x08000000 +#define PPC_FEATURE2_TAR 0x04000000 +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#define PPC_FEATURE2_HTM_NOSC 0x01000000 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#define PPC_FEATURE2_HAS_IEEE128 0x00400000 +#define PPC_FEATURE2_DARN 0x00200000 +#define PPC_FEATURE2_SCV 0x00100000 +#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 +#endif + +typedef struct { + unsigned long hwcaps; + unsigned long hwcaps2; +} HardwareCapabilities; + +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void); + +typedef struct { + char platform[64]; // 0 terminated string + char base_platform[64]; // 0 terminated string +} PlatformType; + +PlatformType CpuFeatures_GetPlatformType(void); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h new file mode 100755 index 00000000..77661d4c --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/linux_features_aggregator.h @@ -0,0 +1,60 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// CapabilityConfig provides a way to map cpu features to hardware caps and +// /proc/cpuinfo flags. We then provide functions to update capabilities from +// either source. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_ + +#include +#include +#include "cpu_features_macros.h" +#include "internal/hwcaps.h" +#include "internal/string_view.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +// Use the following macro to declare setter functions to be used in +// CapabilityConfig. +#define DECLARE_SETTER(FeatureType, FeatureName) \ + static void set_##FeatureName(void* const features, bool value) { \ + ((FeatureType*)features)->FeatureName = value; \ + } + +// Describes the relationship between hardware caps and /proc/cpuinfo flags. +typedef struct { + const HardwareCapabilities hwcaps_mask; + const char* const proc_cpuinfo_flag; + void (*set_bit)(void* const, bool); // setter for the corresponding bit. +} CapabilityConfig; + +// For every config, looks into flags_line for the presence of the +// corresponding proc_cpuinfo_flag, calls `set_bit` accordingly. +// Note: features is a pointer to the underlying Feature struct. +void CpuFeatures_SetFromFlags(const size_t configs_size, + const CapabilityConfig* configs, + const StringView flags_line, + void* const features); + +// For every config, looks into hwcaps for the presence of the feature. Calls +// `set_bit` with true if the hardware capability is found. +// Note: features is a pointer to the underlying Feature struct. +void CpuFeatures_OverrideFromHwCaps(const size_t configs_size, + const CapabilityConfig* configs, + const HardwareCapabilities hwcaps, + void* const features); + +CPU_FEATURES_END_CPP_NAMESPACE +#endif // CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h new file mode 100755 index 00000000..c540f6b2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/stack_line_reader.h @@ -0,0 +1,49 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Reads a file line by line and stores the data on the stack. This allows +// parsing files in one go without allocating. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_ + +#include + +#include "cpu_features_macros.h" +#include "internal/string_view.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + char buffer[STACK_LINE_READER_BUFFER_SIZE]; + StringView view; + int fd; + bool skip_mode; +} StackLineReader; + +// Initializes a StackLineReader. +void StackLineReader_Initialize(StackLineReader* reader, int fd); + +typedef struct { + StringView line; // A view of the line. + bool eof; // Nothing more to read, we reached EOF. + bool full_line; // If false the line was truncated to + // STACK_LINE_READER_BUFFER_SIZE. +} LineResult; + +// Reads the file pointed to by fd and tries to read a full line. +LineResult StackLineReader_NextLine(StackLineReader* reader); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h new file mode 100755 index 00000000..aa3779c4 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/include/internal/string_view.h @@ -0,0 +1,108 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A view over a piece of string. The view is not 0 terminated. +#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_ +#define CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_ + +#include +#include +#include +#include "cpu_features_macros.h" + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + const char* ptr; + size_t size; +} StringView; + +#ifdef __cplusplus +static const StringView kEmptyStringView = {NULL, 0}; +#else +static const StringView kEmptyStringView; +#endif + +// Returns a StringView from the provided string. +// Passing NULL is valid only if size is 0. +static inline StringView view(const char* str, const size_t size) { + StringView view; + view.ptr = str; + view.size = size; + return view; +} + +static inline StringView str(const char* str) { return view(str, strlen(str)); } + +// Returns the index of the first occurrence of c in view or -1 if not found. +int CpuFeatures_StringView_IndexOfChar(const StringView view, char c); + +// Returns the index of the first occurrence of sub_view in view or -1 if not +// found. +int CpuFeatures_StringView_IndexOf(const StringView view, + const StringView sub_view); + +// Returns whether a is equal to b (same content). +bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b); + +// Returns whether a starts with b. +bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b); + +// Removes count characters from the beginning of view or kEmptyStringView if +// count if greater than view.size. +StringView CpuFeatures_StringView_PopFront(const StringView str_view, + size_t count); + +// Removes count characters from the end of view or kEmptyStringView if count if +// greater than view.size. +StringView CpuFeatures_StringView_PopBack(const StringView str_view, + size_t count); + +// Keeps the count first characters of view or view if count if greater than +// view.size. +StringView CpuFeatures_StringView_KeepFront(const StringView str_view, + size_t count); + +// Retrieves the first character of view. If view is empty the behavior is +// undefined. +char CpuFeatures_StringView_Front(const StringView view); + +// Retrieves the last character of view. If view is empty the behavior is +// undefined. +char CpuFeatures_StringView_Back(const StringView view); + +// Removes leading and tailing space characters. +StringView CpuFeatures_StringView_TrimWhitespace(StringView view); + +// Convert StringView to positive integer. e.g. "42", "0x2a". +// Returns -1 on error. +int CpuFeatures_StringView_ParsePositiveNumber(const StringView view); + +// Copies src StringView to dst buffer. +void CpuFeatures_StringView_CopyString(const StringView src, char* dst, + size_t dst_size); + +// Checks if line contains the specified whitespace separated word. +bool CpuFeatures_StringView_HasWord(const StringView line, + const char* const word); + +// Get key/value from line. key and value are separated by ": ". +// key and value are cleaned up from leading and trailing whitespaces. +bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line, + StringView* key, + StringView* value); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh new file mode 100755 index 00000000..a1de0d1e --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/run_integration.sh @@ -0,0 +1,173 @@ +#!/bin/bash + +readonly SCRIPT_FOLDER=$(cd -P -- "$(dirname -- "$0")" && pwd -P) +readonly PROJECT_FOLDER="${SCRIPT_FOLDER}/.." +readonly ARCHIVE_FOLDER=~/cpu_features_archives +readonly QEMU_INSTALL=${ARCHIVE_FOLDER}/qemu +readonly DEFAULT_CMAKE_ARGS=" -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON" + +function extract() { + case $1 in + *.tar.bz2) tar xjf "$1" ;; + *.tar.xz) tar xJf "$1" ;; + *.tar.gz) tar xzf "$1" ;; + *) + echo "don't know how to extract '$1'..." + exit 1 + esac +} + +function unpackifnotexists() { + mkdir -p "${ARCHIVE_FOLDER}" + cd "${ARCHIVE_FOLDER}" || exit + local URL=$1 + local RELATIVE_FOLDER=$2 + local DESTINATION="${ARCHIVE_FOLDER}/${RELATIVE_FOLDER}" + if [[ ! -d "${DESTINATION}" ]] ; then + local ARCHIVE_NAME=$(echo ${URL} | sed 's/.*\///') + test -f "${ARCHIVE_NAME}" || wget -q "${URL}" + extract "${ARCHIVE_NAME}" + fi +} + +function installqemuifneeded() { + local VERSION=${QEMU_VERSION:=2.11.1} + local ARCHES=${QEMU_ARCHES:=arm aarch64 i386 x86_64 mips mipsel} + local TARGETS=${QEMU_TARGETS:=$(echo "$ARCHES" | sed 's#$# #;s#\([^ ]*\) #\1-linux-user #g')} + + if echo "${VERSION} ${TARGETS}" | cmp --silent ${QEMU_INSTALL}/.build -; then + echo "qemu ${VERSION} up to date!" + return 0 + fi + + echo "VERSION: ${VERSION}" + echo "TARGETS: ${TARGETS}" + + rm -rf ${QEMU_INSTALL} + + # Checking for a tarball before downloading makes testing easier :-) + local QEMU_URL="http://wiki.qemu-project.org/download/qemu-${VERSION}.tar.xz" + local QEMU_FOLDER="qemu-${VERSION}" + unpackifnotexists ${QEMU_URL} ${QEMU_FOLDER} + cd ${QEMU_FOLDER} || exit + + ./configure \ + --prefix="${QEMU_INSTALL}" \ + --target-list="${TARGETS}" \ + --disable-docs \ + --disable-sdl \ + --disable-gtk \ + --disable-gnutls \ + --disable-gcrypt \ + --disable-nettle \ + --disable-curses \ + --static + + make -j4 + make install + + echo "$VERSION $TARGETS" > ${QEMU_INSTALL}/.build +} + +function assert_defined(){ + local VALUE=${1} + : "${VALUE?"${1} needs to be defined"}" +} + +function integrate() { + cd "${PROJECT_FOLDER}" || exit + cmake -H. -B"${BUILD_DIR}" ${DEFAULT_CMAKE_ARGS} ${CMAKE_ADDITIONAL_ARGS} + cmake --build "${BUILD_DIR}" --target all + + if [[ -n "${QEMU_ARCH}" ]]; then + if [[ "${QEMU_ARCH}" == "DISABLED" ]]; then + QEMU="true || " + else + installqemuifneeded + QEMU="${QEMU_INSTALL}/bin/qemu-${QEMU_ARCH} ${QEMU_ARGS}" + fi + else + QEMU="" + fi + # Run tests + for test_binary in ${BUILD_DIR}/test/*_test; do ${QEMU} ${test_binary}; done + # Run demo program + ${QEMU} "${BUILD_DIR}/list_cpu_features" +} + +function expand_linaro_config() { + assert_defined TARGET + local LINARO_ROOT_URL=https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11 + + local GCC_URL=${LINARO_ROOT_URL}/${TARGET}/gcc-linaro-7.2.1-2017.11-x86_64_${TARGET}.tar.xz + local GCC_RELATIVE_FOLDER="gcc-linaro-7.2.1-2017.11-x86_64_${TARGET}" + unpackifnotexists "${GCC_URL}" "${GCC_RELATIVE_FOLDER}" + + local SYSROOT_URL=${LINARO_ROOT_URL}/${TARGET}/sysroot-glibc-linaro-2.25-2017.11-${TARGET}.tar.xz + local SYSROOT_RELATIVE_FOLDER=sysroot-glibc-linaro-2.25-2017.11-${TARGET} + unpackifnotexists "${SYSROOT_URL}" "${SYSROOT_RELATIVE_FOLDER}" + + local SYSROOT_FOLDER=${ARCHIVE_FOLDER}/${SYSROOT_RELATIVE_FOLDER} + local GCC_FOLDER=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER} + + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_SYSROOT=${SYSROOT_FOLDER}" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_C_COMPILER=${GCC_FOLDER}/bin/${TARGET}-gcc" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_CXX_COMPILER=${GCC_FOLDER}/bin/${TARGET}-g++" + + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=ONLY" + + QEMU_ARGS+=" -L ${SYSROOT_FOLDER}" + QEMU_ARGS+=" -E LD_LIBRARY_PATH=/lib" +} + +function expand_codescape_config() { + assert_defined TARGET + local FLAVOUR=${QEMU_ARCH}-r2-hard + local DATE=2016.05-03 + local CODESCAPE_URL=http://codescape-mips-sdk.imgtec.com/components/toolchain/${DATE}/Codescape.GNU.Tools.Package.${DATE}.for.MIPS.MTI.Linux.CentOS-5.x86_64.tar.gz + local GCC_URL=${CODESCAPE_URL} + local GCC_RELATIVE_FOLDER=${TARGET}/${DATE} + unpackifnotexists "${GCC_URL}" "${GCC_RELATIVE_FOLDER}" + + local SYSROOT_URL=${CODESCAPE_URL} + local SYSROOT_FOLDER=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}/sysroot/${FLAVOUR} + unpackifnotexists "${SYSROOT_URL}" "${SYSROOT_RELATIVE_FOLDER}" + + CMAKE_ADDITIONAL_ARGS+=" -DENABLE_MSA=1" + CMAKE_ADDITIONAL_ARGS+=" -DMIPS_CPU=p5600" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_TOOLCHAIN_FILE=cmake/mips32-linux-gcc.cmake" + CMAKE_ADDITIONAL_ARGS+=" -DCROSS=${TARGET}-" + CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}" + + QEMU_ARGS+=" -L ${SYSROOT_FOLDER}" + QEMU_ARGS+=" -E LD_LIBRARY_PATH=/lib" + QEMU_ARGS+=" -cpu P5600" +} + +function expand_environment_and_integrate() { + assert_defined PROJECT_FOLDER + assert_defined TARGET + + BUILD_DIR="${PROJECT_FOLDER}/cmake_build/${TARGET}" + mkdir -p "${BUILD_DIR}" + + CMAKE_ADDITIONAL_ARGS="" + QEMU_ARGS="" + + case ${TOOLCHAIN} in + LINARO) expand_linaro_config ;; + CODESCAPE) expand_codescape_config ;; + NATIVE) QEMU_ARCH="" ;; + *) + echo "Unknown toolchain '${TOOLCHAIN}'..." + exit 1 + esac + integrate +} + +if [ "${CONTINUOUS_INTEGRATION}" = "true" ]; then + QEMU_ARCHES=${QEMU_ARCH} + expand_environment_and_integrate +fi diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh new file mode 100755 index 00000000..53d1d3b8 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/scripts/test_integration.sh @@ -0,0 +1,80 @@ +source "$(dirname -- "$0")"/run_integration.sh + +# Toolchains for little-endian, 64-bit ARMv8 for GNU/Linux systems +function set_aarch64-linux-gnu() { + TOOLCHAIN=LINARO + TARGET=aarch64-linux-gnu + QEMU_ARCH=aarch64 +} + +# Toolchains for little-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_arm-linux-gnueabihf() { + TOOLCHAIN=LINARO + TARGET=arm-linux-gnueabihf + QEMU_ARCH=arm +} + +# Toolchains for little-endian, 32-bit ARMv8 for GNU/Linux systems +function set_armv8l-linux-gnueabihf() { + TOOLCHAIN=LINARO + TARGET=armv8l-linux-gnueabihf + QEMU_ARCH=arm +} + +# Toolchains for little-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_arm-linux-gnueabi() { + TOOLCHAIN=LINARO + TARGET=arm-linux-gnueabi + QEMU_ARCH=arm +} + +# Toolchains for big-endian, 64-bit ARMv8 for GNU/Linux systems +function set_aarch64_be-linux-gnu() { + TOOLCHAIN=LINARO + TARGET=aarch64_be-linux-gnu + QEMU_ARCH="DISABLED" +} + +# Toolchains for big-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_armeb-linux-gnueabihf() { + TOOLCHAIN=LINARO + TARGET=armeb-linux-gnueabihf + QEMU_ARCH="DISABLED" +} + +# Toolchains for big-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems +function set_armeb-linux-gnueabi() { + TOOLCHAIN=LINARO + TARGET=armeb-linux-gnueabi + QEMU_ARCH="DISABLED" +} + + +function set_mips() { + TOOLCHAIN=CODESCAPE + TARGET=mips-mti-linux-gnu + QEMU_ARCH="DISABLED" +} + +function set_native() { + TOOLCHAIN=NATIVE + TARGET=native + QEMU_ARCH="" +} + +ENVIRONMENTS=" + set_aarch64-linux-gnu + set_arm-linux-gnueabihf + set_armv8l-linux-gnueabihf + set_arm-linux-gnueabi + set_aarch64_be-linux-gnu + set_armeb-linux-gnueabihf + set_armeb-linux-gnueabi + set_native + set_mips +" + +for SET_ENVIRONMENT in ${ENVIRONMENTS}; do + ${SET_ENVIRONMENT} + expand_environment_and_integrate +done diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c new file mode 100755 index 00000000..472e7125 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_clang_gcc.c @@ -0,0 +1,36 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/cpuid_x86.h" + +#if defined(CPU_FEATURES_ARCH_X86) +#if defined(CPU_FEATURES_COMPILER_CLANG) || defined(CPU_FEATURES_COMPILER_GCC) + +#include + +Leaf CpuId(uint32_t leaf_id) { + Leaf leaf; + __cpuid_count(leaf_id, 0, leaf.eax, leaf.ebx, leaf.ecx, leaf.edx); + return leaf; +} + +uint32_t GetXCR0Eax(void) { + uint32_t eax, edx; + __asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0)); + return eax; +} + +#endif // defined(CPU_FEATURES_COMPILER_CLANG) || + // defined(CPU_FEATURES_COMPILER_GCC) +#endif // defined(CPU_FEATURES_ARCH_X86) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c new file mode 100755 index 00000000..cd8f19f2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuid_x86_msvc.c @@ -0,0 +1,34 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/cpuid_x86.h" + +#if defined(CPU_FEATURES_ARCH_X86) && defined(CPU_FEATURES_COMPILER_MSC) +#include +#include // For __cpuidex() + +Leaf CpuId(uint32_t leaf_id) { + Leaf leaf; + int data[4]; + __cpuid(data, leaf_id); + leaf.eax = data[0]; + leaf.ebx = data[1]; + leaf.ecx = data[2]; + leaf.edx = data[3]; + return leaf; +} + +uint32_t GetXCR0Eax(void) { return _xgetbv(0); } + +#endif // defined(CPU_FEATURES_ARCH_X86) && defined(CPU_FEATURES_COMPILER_MSC) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c new file mode 100755 index 00000000..0d111ff9 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_aarch64.c @@ -0,0 +1,141 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_aarch64.h" + +#include "internal/filesystem.h" +#include "internal/hwcaps.h" +#include "internal/linux_features_aggregator.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +#include + +DECLARE_SETTER(Aarch64Features, fp) +DECLARE_SETTER(Aarch64Features, asimd) +DECLARE_SETTER(Aarch64Features, aes) +DECLARE_SETTER(Aarch64Features, pmull) +DECLARE_SETTER(Aarch64Features, sha1) +DECLARE_SETTER(Aarch64Features, sha2) +DECLARE_SETTER(Aarch64Features, crc32) + +static const CapabilityConfig kConfigs[] = { + {{AARCH64_HWCAP_FP, 0}, "fp", &set_fp}, // + {{AARCH64_HWCAP_ASIMD, 0}, "asimd", &set_asimd}, // + {{AARCH64_HWCAP_AES, 0}, "aes", &set_aes}, // + {{AARCH64_HWCAP_PMULL, 0}, "pmull", &set_pmull}, // + {{AARCH64_HWCAP_SHA1, 0}, "sha1", &set_sha1}, // + {{AARCH64_HWCAP_SHA2, 0}, "sha2", &set_sha2}, // + {{AARCH64_HWCAP_CRC32, 0}, "crc32", &set_crc32}, // +}; + +static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig); + +static bool HandleAarch64Line(const LineResult result, + Aarch64Info* const info) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("Features"))) { + CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, &info->features); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) { + info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) { + info->variant = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) { + info->part = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) { + info->revision = CpuFeatures_StringView_ParsePositiveNumber(value); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(Aarch64Info* const info) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleAarch64Line(StackLineReader_NextLine(&reader), info)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const Aarch64Info kEmptyAarch64Info; + +Aarch64Info GetAarch64Info(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + Aarch64Info info = kEmptyAarch64Info; + + FillProcCpuInfoData(&info); + CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs, + CpuFeatures_GetHardwareCapabilities(), + &info.features); + + return info; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetAarch64FeaturesEnumValue(const Aarch64Features* features, + Aarch64FeaturesEnum value) { + switch (value) { + case AARCH64_FP: + return features->fp; + case AARCH64_ASIMD: + return features->asimd; + case AARCH64_AES: + return features->aes; + case AARCH64_PMULL: + return features->pmull; + case AARCH64_SHA1: + return features->sha1; + case AARCH64_SHA2: + return features->sha2; + case AARCH64_CRC32: + return features->crc32; + case AARCH64_LAST_: + break; + } + return false; +} + +const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum value) { + switch (value) { + case AARCH64_FP: + return "fp"; + case AARCH64_ASIMD: + return "asimd"; + case AARCH64_AES: + return "aes"; + case AARCH64_PMULL: + return "pmull"; + case AARCH64_SHA1: + return "sha1"; + case AARCH64_SHA2: + return "sha2"; + case AARCH64_CRC32: + return "crc32"; + case AARCH64_LAST_: + break; + } + return "unknown feature"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c new file mode 100755 index 00000000..3ea06419 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_arm.c @@ -0,0 +1,259 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_arm.h" + +#include "internal/bit_utils.h" +#include "internal/filesystem.h" +#include "internal/hwcaps.h" +#include "internal/linux_features_aggregator.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +#include + +DECLARE_SETTER(ArmFeatures, vfp) +DECLARE_SETTER(ArmFeatures, iwmmxt) +DECLARE_SETTER(ArmFeatures, neon) +DECLARE_SETTER(ArmFeatures, vfpv3) +DECLARE_SETTER(ArmFeatures, vfpv3d16) +DECLARE_SETTER(ArmFeatures, vfpv4) +DECLARE_SETTER(ArmFeatures, idiva) +DECLARE_SETTER(ArmFeatures, idivt) +DECLARE_SETTER(ArmFeatures, aes) +DECLARE_SETTER(ArmFeatures, pmull) +DECLARE_SETTER(ArmFeatures, sha1) +DECLARE_SETTER(ArmFeatures, sha2) +DECLARE_SETTER(ArmFeatures, crc32) + +static const CapabilityConfig kConfigs[] = { + {{ARM_HWCAP_VFP, 0}, "vfp", &set_vfp}, // + {{ARM_HWCAP_IWMMXT, 0}, "iwmmxt", &set_iwmmxt}, // + {{ARM_HWCAP_NEON, 0}, "neon", &set_neon}, // + {{ARM_HWCAP_VFPV3, 0}, "vfpv3", &set_vfpv3}, // + {{ARM_HWCAP_VFPV3D16, 0}, "vfpv3d16", &set_vfpv3d16}, // + {{ARM_HWCAP_VFPV4, 0}, "vfpv4", &set_vfpv4}, // + {{ARM_HWCAP_IDIVA, 0}, "idiva", &set_idiva}, // + {{ARM_HWCAP_IDIVT, 0}, "idivt", &set_idivt}, // + {{0, ARM_HWCAP2_AES}, "aes", &set_aes}, // + {{0, ARM_HWCAP2_PMULL}, "pmull", &set_pmull}, // + {{0, ARM_HWCAP2_SHA1}, "sha1", &set_sha1}, // + {{0, ARM_HWCAP2_SHA2}, "sha2", &set_sha2}, // + {{0, ARM_HWCAP2_CRC32}, "crc32", &set_crc32}, // +}; + +static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig); + +typedef struct { + bool processor_reports_armv6; + bool hardware_reports_goldfish; +} ProcCpuInfoData; + +static int IndexOfNonDigit(StringView str) { + size_t index = 0; + while (str.size && isdigit(CpuFeatures_StringView_Front(str))) { + str = CpuFeatures_StringView_PopFront(str, 1); + ++index; + } + return index; +} + +static bool HandleArmLine(const LineResult result, ArmInfo* const info, + ProcCpuInfoData* const proc_info) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("Features"))) { + CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, &info->features); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) { + info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) { + info->variant = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) { + info->part = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) { + info->revision = CpuFeatures_StringView_ParsePositiveNumber(value); + } else if (CpuFeatures_StringView_IsEquals(key, str("CPU architecture"))) { + // CPU architecture is a number that may be followed by letters. e.g. + // "6TEJ", "7". + const StringView digits = + CpuFeatures_StringView_KeepFront(value, IndexOfNonDigit(value)); + info->architecture = CpuFeatures_StringView_ParsePositiveNumber(digits); + } else if (CpuFeatures_StringView_IsEquals(key, str("Processor"))) { + proc_info->processor_reports_armv6 = + CpuFeatures_StringView_IndexOf(value, str("(v6l)")) >= 0; + } else if (CpuFeatures_StringView_IsEquals(key, str("Hardware"))) { + proc_info->hardware_reports_goldfish = + CpuFeatures_StringView_IsEquals(value, str("Goldfish")); + } + } + return !result.eof; +} + +static uint32_t GetCpuId(const ArmInfo* const info) { + return (ExtractBitRange(info->implementer, 7, 0) << 24) | + (ExtractBitRange(info->variant, 3, 0) << 20) | + (ExtractBitRange(info->part, 11, 0) << 4) | + (ExtractBitRange(info->revision, 3, 0) << 0); +} + +static void FixErrors(ArmInfo* const info, + ProcCpuInfoData* const proc_cpu_info_data) { + // Fixing Samsung kernel reporting invalid cpu architecture. + // http://code.google.com/p/android/issues/detail?id=10812 + if (proc_cpu_info_data->processor_reports_armv6 && info->architecture >= 7) { + info->architecture = 6; + } + + // Handle kernel configuration bugs that prevent the correct reporting of CPU + // features. + switch (GetCpuId(info)) { + case 0x4100C080: + // Special case: The emulator-specific Android 4.2 kernel fails to report + // support for the 32-bit ARM IDIV instruction. Technically, this is a + // feature of the virtual CPU implemented by the emulator. Note that it + // could also support Thumb IDIV in the future, and this will have to be + // slightly updated. + if (info->architecture >= 7 && + proc_cpu_info_data->hardware_reports_goldfish) { + info->features.idiva = true; + } + break; + case 0x511004D0: + // https://crbug.com/341598. + info->features.neon = false; + break; + case 0x510006F2: + case 0x510006F3: + // The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report + // IDIV support. + info->features.idiva = true; + info->features.idivt = true; + break; + } + + // Propagate cpu features. + if (info->features.vfpv4) info->features.vfpv3 = true; + if (info->features.neon) info->features.vfpv3 = true; + if (info->features.vfpv3) info->features.vfp = true; +} + +static void FillProcCpuInfoData(ArmInfo* const info, + ProcCpuInfoData* proc_cpu_info_data) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleArmLine(StackLineReader_NextLine(&reader), info, + proc_cpu_info_data)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const ArmInfo kEmptyArmInfo; + +static const ProcCpuInfoData kEmptyProcCpuInfoData; + +ArmInfo GetArmInfo(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + ArmInfo info = kEmptyArmInfo; + ProcCpuInfoData proc_cpu_info_data = kEmptyProcCpuInfoData; + + FillProcCpuInfoData(&info, &proc_cpu_info_data); + CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs, + CpuFeatures_GetHardwareCapabilities(), + &info.features); + + FixErrors(&info, &proc_cpu_info_data); + + return info; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetArmFeaturesEnumValue(const ArmFeatures* features, + ArmFeaturesEnum value) { + switch (value) { + case ARM_VFP: + return features->vfp; + case ARM_IWMMXT: + return features->iwmmxt; + case ARM_NEON: + return features->neon; + case ARM_VFPV3: + return features->vfpv3; + case ARM_VFPV3D16: + return features->vfpv3d16; + case ARM_VFPV4: + return features->vfpv4; + case ARM_IDIVA: + return features->idiva; + case ARM_IDIVT: + return features->idivt; + case ARM_AES: + return features->aes; + case ARM_PMULL: + return features->pmull; + case ARM_SHA1: + return features->sha1; + case ARM_SHA2: + return features->sha2; + case ARM_CRC32: + return features->crc32; + case ARM_LAST_: + break; + } + return false; +} + +const char* GetArmFeaturesEnumName(ArmFeaturesEnum value) { + switch (value) { + case ARM_VFP: + return "vfp"; + case ARM_IWMMXT: + return "iwmmxt"; + case ARM_NEON: + return "neon"; + case ARM_VFPV3: + return "vfpv3"; + case ARM_VFPV3D16: + return "vfpv3d16"; + case ARM_VFPV4: + return "vfpv4"; + case ARM_IDIVA: + return "idiva"; + case ARM_IDIVT: + return "idivt"; + case ARM_AES: + return "aes"; + case ARM_PMULL: + return "pmull"; + case ARM_SHA1: + return "sha1"; + case ARM_SHA2: + return "sha2"; + case ARM_CRC32: + return "crc32"; + case ARM_LAST_: + break; + } + return "unknown feature"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c new file mode 100755 index 00000000..a61cdd81 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_mips.c @@ -0,0 +1,98 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_mips.h" + +#include "internal/filesystem.h" +#include "internal/linux_features_aggregator.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +DECLARE_SETTER(MipsFeatures, msa) +DECLARE_SETTER(MipsFeatures, eva) + +static const CapabilityConfig kConfigs[] = { + {{MIPS_HWCAP_MSA, 0}, "msa", &set_msa}, // + {{MIPS_HWCAP_EVA, 0}, "eva", &set_eva}, // +}; +static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig); + +static bool HandleMipsLine(const LineResult result, + MipsFeatures* const features) { + StringView key, value; + // See tests for an example. + if (CpuFeatures_StringView_GetAttributeKeyValue(result.line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("ASEs implemented"))) { + CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, features); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(MipsFeatures* const features) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleMipsLine(StackLineReader_NextLine(&reader), features)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const MipsInfo kEmptyMipsInfo; + +MipsInfo GetMipsInfo(void) { + // capabilities are fetched from both getauxval and /proc/cpuinfo so we can + // have some information if the executable is sandboxed (aka no access to + // /proc/cpuinfo). + MipsInfo info = kEmptyMipsInfo; + + FillProcCpuInfoData(&info.features); + CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs, + CpuFeatures_GetHardwareCapabilities(), + &info.features); + return info; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetMipsFeaturesEnumValue(const MipsFeatures* features, + MipsFeaturesEnum value) { + switch (value) { + case MIPS_MSA: + return features->msa; + case MIPS_EVA: + return features->eva; + case MIPS_LAST_: + break; + } + return false; +} + +const char* GetMipsFeaturesEnumName(MipsFeaturesEnum value) { + switch (value) { + case MIPS_MSA: + return "msa"; + case MIPS_EVA: + return "eva"; + case MIPS_LAST_: + break; + } + return "unknown feature"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c new file mode 100755 index 00000000..59b9ecca --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_ppc.c @@ -0,0 +1,358 @@ +// Copyright 2018 IBM. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "cpuinfo_ppc.h" +#include "internal/bit_utils.h" +#include "internal/filesystem.h" +#include "internal/linux_features_aggregator.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +DECLARE_SETTER(PPCFeatures, ppc32) +DECLARE_SETTER(PPCFeatures, ppc64) +DECLARE_SETTER(PPCFeatures, ppc601) +DECLARE_SETTER(PPCFeatures, altivec) +DECLARE_SETTER(PPCFeatures, fpu) +DECLARE_SETTER(PPCFeatures, mmu) +DECLARE_SETTER(PPCFeatures, mac_4xx) +DECLARE_SETTER(PPCFeatures, unifiedcache) +DECLARE_SETTER(PPCFeatures, spe) +DECLARE_SETTER(PPCFeatures, efpsingle) +DECLARE_SETTER(PPCFeatures, efpdouble) +DECLARE_SETTER(PPCFeatures, no_tb) +DECLARE_SETTER(PPCFeatures, power4) +DECLARE_SETTER(PPCFeatures, power5) +DECLARE_SETTER(PPCFeatures, power5plus) +DECLARE_SETTER(PPCFeatures, cell) +DECLARE_SETTER(PPCFeatures, booke) +DECLARE_SETTER(PPCFeatures, smt) +DECLARE_SETTER(PPCFeatures, icachesnoop) +DECLARE_SETTER(PPCFeatures, arch205) +DECLARE_SETTER(PPCFeatures, pa6t) +DECLARE_SETTER(PPCFeatures, dfp) +DECLARE_SETTER(PPCFeatures, power6ext) +DECLARE_SETTER(PPCFeatures, arch206) +DECLARE_SETTER(PPCFeatures, vsx) +DECLARE_SETTER(PPCFeatures, pseries_perfmon_compat) +DECLARE_SETTER(PPCFeatures, truele) +DECLARE_SETTER(PPCFeatures, ppcle) +DECLARE_SETTER(PPCFeatures, arch207) +DECLARE_SETTER(PPCFeatures, htm) +DECLARE_SETTER(PPCFeatures, dscr) +DECLARE_SETTER(PPCFeatures, ebb) +DECLARE_SETTER(PPCFeatures, isel) +DECLARE_SETTER(PPCFeatures, tar) +DECLARE_SETTER(PPCFeatures, vcrypto) +DECLARE_SETTER(PPCFeatures, htm_nosc) +DECLARE_SETTER(PPCFeatures, arch300) +DECLARE_SETTER(PPCFeatures, ieee128) +DECLARE_SETTER(PPCFeatures, darn) +DECLARE_SETTER(PPCFeatures, scv) +DECLARE_SETTER(PPCFeatures, htm_no_suspend) + +static const CapabilityConfig kConfigs[] = { + {{PPC_FEATURE_32, 0}, "ppc32", &set_ppc32}, + {{PPC_FEATURE_64, 0}, "ppc64", &set_ppc64}, + {{PPC_FEATURE_601_INSTR, 0}, "ppc601", &set_ppc601}, + {{PPC_FEATURE_HAS_ALTIVEC, 0}, "altivec", &set_altivec}, + {{PPC_FEATURE_HAS_FPU, 0}, "fpu", &set_fpu}, + {{PPC_FEATURE_HAS_MMU, 0}, "mmu", &set_mmu}, + {{PPC_FEATURE_HAS_4xxMAC, 0}, "4xxmac", &set_mac_4xx}, + {{PPC_FEATURE_UNIFIED_CACHE, 0}, "ucache", &set_unifiedcache}, + {{PPC_FEATURE_HAS_SPE, 0}, "spe", &set_spe}, + {{PPC_FEATURE_HAS_EFP_SINGLE, 0}, "efpsingle", &set_efpsingle}, + {{PPC_FEATURE_HAS_EFP_DOUBLE, 0}, "efpdouble", &set_efpdouble}, + {{PPC_FEATURE_NO_TB, 0}, "notb", &set_no_tb}, + {{PPC_FEATURE_POWER4, 0}, "power4", &set_power4}, + {{PPC_FEATURE_POWER5, 0}, "power5", &set_power5}, + {{PPC_FEATURE_POWER5_PLUS, 0}, "power5+", &set_power5plus}, + {{PPC_FEATURE_CELL, 0}, "cellbe", &set_cell}, + {{PPC_FEATURE_BOOKE, 0}, "booke", &set_booke}, + {{PPC_FEATURE_SMT, 0}, "smt", &set_smt}, + {{PPC_FEATURE_ICACHE_SNOOP, 0}, "ic_snoop", &set_icachesnoop}, + {{PPC_FEATURE_ARCH_2_05, 0}, "arch_2_05", &set_arch205}, + {{PPC_FEATURE_PA6T, 0}, "pa6t", &set_pa6t}, + {{PPC_FEATURE_HAS_DFP, 0}, "dfp", &set_dfp}, + {{PPC_FEATURE_POWER6_EXT, 0}, "power6x", &set_power6ext}, + {{PPC_FEATURE_ARCH_2_06, 0}, "arch_2_06", &set_arch206}, + {{PPC_FEATURE_HAS_VSX, 0}, "vsx", &set_vsx}, + {{PPC_FEATURE_PSERIES_PERFMON_COMPAT, 0}, + "archpmu", + &set_pseries_perfmon_compat}, + {{PPC_FEATURE_TRUE_LE, 0}, "true_le", &set_truele}, + {{PPC_FEATURE_PPC_LE, 0}, "ppcle", &set_ppcle}, + {{0, PPC_FEATURE2_ARCH_2_07}, "arch_2_07", &set_arch207}, + {{0, PPC_FEATURE2_HTM}, "htm", &set_htm}, + {{0, PPC_FEATURE2_DSCR}, "dscr", &set_dscr}, + {{0, PPC_FEATURE2_EBB}, "ebb", &set_ebb}, + {{0, PPC_FEATURE2_ISEL}, "isel", &set_isel}, + {{0, PPC_FEATURE2_TAR}, "tar", &set_tar}, + {{0, PPC_FEATURE2_VEC_CRYPTO}, "vcrypto", &set_vcrypto}, + {{0, PPC_FEATURE2_HTM_NOSC}, "htm-nosc", &set_htm_nosc}, + {{0, PPC_FEATURE2_ARCH_3_00}, "arch_3_00", &set_arch300}, + {{0, PPC_FEATURE2_HAS_IEEE128}, "ieee128", &set_ieee128}, + {{0, PPC_FEATURE2_DARN}, "darn", &set_darn}, + {{0, PPC_FEATURE2_SCV}, "scv", &set_scv}, + {{0, PPC_FEATURE2_HTM_NO_SUSPEND}, "htm-no-suspend", &set_htm_no_suspend}, +}; +static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig); + +static bool HandlePPCLine(const LineResult result, + PPCPlatformStrings* const strings) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_HasWord(key, "platform")) { + CpuFeatures_StringView_CopyString(value, strings->platform, + sizeof(strings->platform)); + } else if (CpuFeatures_StringView_IsEquals(key, str("model"))) { + CpuFeatures_StringView_CopyString(value, strings->model, + sizeof(strings->platform)); + } else if (CpuFeatures_StringView_IsEquals(key, str("machine"))) { + CpuFeatures_StringView_CopyString(value, strings->machine, + sizeof(strings->platform)); + } else if (CpuFeatures_StringView_IsEquals(key, str("cpu"))) { + CpuFeatures_StringView_CopyString(value, strings->cpu, + sizeof(strings->platform)); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(PPCPlatformStrings* const strings) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandlePPCLine(StackLineReader_NextLine(&reader), strings)) { + break; + } + } + CpuFeatures_CloseFile(fd); + } +} + +static const PPCInfo kEmptyPPCInfo; + +PPCInfo GetPPCInfo(void) { + /* + * On Power feature flags aren't currently in cpuinfo so we only look at + * the auxilary vector. + */ + PPCInfo info = kEmptyPPCInfo; + + CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs, + CpuFeatures_GetHardwareCapabilities(), + &info.features); + return info; +} + +static const PPCPlatformStrings kEmptyPPCPlatformStrings; + +PPCPlatformStrings GetPPCPlatformStrings(void) { + PPCPlatformStrings strings = kEmptyPPCPlatformStrings; + + FillProcCpuInfoData(&strings); + strings.type = CpuFeatures_GetPlatformType(); + return strings; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetPPCFeaturesEnumValue(const PPCFeatures* features, + PPCFeaturesEnum value) { + switch (value) { + case PPC_32: + return features->ppc32; + case PPC_64: + return features->ppc64; + case PPC_601_INSTR: + return features->ppc601; + case PPC_HAS_ALTIVEC: + return features->altivec; + case PPC_HAS_FPU: + return features->fpu; + case PPC_HAS_MMU: + return features->mmu; + case PPC_HAS_4xxMAC: + return features->mac_4xx; + case PPC_UNIFIED_CACHE: + return features->unifiedcache; + case PPC_HAS_SPE: + return features->spe; + case PPC_HAS_EFP_SINGLE: + return features->efpsingle; + case PPC_HAS_EFP_DOUBLE: + return features->efpdouble; + case PPC_NO_TB: + return features->no_tb; + case PPC_POWER4: + return features->power4; + case PPC_POWER5: + return features->power5; + case PPC_POWER5_PLUS: + return features->power5plus; + case PPC_CELL: + return features->cell; + case PPC_BOOKE: + return features->booke; + case PPC_SMT: + return features->smt; + case PPC_ICACHE_SNOOP: + return features->icachesnoop; + case PPC_ARCH_2_05: + return features->arch205; + case PPC_PA6T: + return features->pa6t; + case PPC_HAS_DFP: + return features->dfp; + case PPC_POWER6_EXT: + return features->power6ext; + case PPC_ARCH_2_06: + return features->arch206; + case PPC_HAS_VSX: + return features->vsx; + case PPC_PSERIES_PERFMON_COMPAT: + return features->pseries_perfmon_compat; + case PPC_TRUE_LE: + return features->truele; + case PPC_PPC_LE: + return features->ppcle; + case PPC_ARCH_2_07: + return features->arch207; + case PPC_HTM: + return features->htm; + case PPC_DSCR: + return features->dscr; + case PPC_EBB: + return features->ebb; + case PPC_ISEL: + return features->isel; + case PPC_TAR: + return features->tar; + case PPC_VEC_CRYPTO: + return features->vcrypto; + case PPC_HTM_NOSC: + return features->htm_nosc; + case PPC_ARCH_3_00: + return features->arch300; + case PPC_HAS_IEEE128: + return features->ieee128; + case PPC_DARN: + return features->darn; + case PPC_SCV: + return features->scv; + case PPC_HTM_NO_SUSPEND: + return features->htm_no_suspend; + case PPC_LAST_: + break; + } + return false; +} + +/* Have used the same names as glibc */ +const char* GetPPCFeaturesEnumName(PPCFeaturesEnum value) { + switch (value) { + case PPC_32: + return "ppc32"; + case PPC_64: + return "ppc64"; + case PPC_601_INSTR: + return "ppc601"; + case PPC_HAS_ALTIVEC: + return "altivec"; + case PPC_HAS_FPU: + return "fpu"; + case PPC_HAS_MMU: + return "mmu"; + case PPC_HAS_4xxMAC: + return "4xxmac"; + case PPC_UNIFIED_CACHE: + return "ucache"; + case PPC_HAS_SPE: + return "spe"; + case PPC_HAS_EFP_SINGLE: + return "efpsingle"; + case PPC_HAS_EFP_DOUBLE: + return "efpdouble"; + case PPC_NO_TB: + return "notb"; + case PPC_POWER4: + return "power4"; + case PPC_POWER5: + return "power5"; + case PPC_POWER5_PLUS: + return "power5+"; + case PPC_CELL: + return "cellbe"; + case PPC_BOOKE: + return "booke"; + case PPC_SMT: + return "smt"; + case PPC_ICACHE_SNOOP: + return "ic_snoop"; + case PPC_ARCH_2_05: + return "arch_2_05"; + case PPC_PA6T: + return "pa6t"; + case PPC_HAS_DFP: + return "dfp"; + case PPC_POWER6_EXT: + return "power6x"; + case PPC_ARCH_2_06: + return "arch_2_06"; + case PPC_HAS_VSX: + return "vsx"; + case PPC_PSERIES_PERFMON_COMPAT: + return "archpmu"; + case PPC_TRUE_LE: + return "true_le"; + case PPC_PPC_LE: + return "ppcle"; + case PPC_ARCH_2_07: + return "arch_2_07"; + case PPC_HTM: + return "htm"; + case PPC_DSCR: + return "dscr"; + case PPC_EBB: + return "ebb"; + case PPC_ISEL: + return "isel"; + case PPC_TAR: + return "tar"; + case PPC_VEC_CRYPTO: + return "vcrypto"; + case PPC_HTM_NOSC: + return "htm-nosc"; + case PPC_ARCH_3_00: + return "arch_3_00"; + case PPC_HAS_IEEE128: + return "ieee128"; + case PPC_DARN: + return "darn"; + case PPC_SCV: + return "scv"; + case PPC_HTM_NO_SUSPEND: + return "htm-no-suspend"; + case PPC_LAST_: + break; + } + return "unknown_feature"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c new file mode 100755 index 00000000..390e8c92 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/cpuinfo_x86.c @@ -0,0 +1,447 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_x86.h" +#include "internal/bit_utils.h" +#include "internal/cpuid_x86.h" + +#include +#include + +static const Leaf kEmptyLeaf; + +static Leaf SafeCpuId(uint32_t max_cpuid_leaf, uint32_t leaf_id) { + if (leaf_id <= max_cpuid_leaf) { + return CpuId(leaf_id); + } else { + return kEmptyLeaf; + } +} + +#define MASK_XMM 0x2 +#define MASK_YMM 0x4 +#define MASK_MASKREG 0x20 +#define MASK_ZMM0_15 0x40 +#define MASK_ZMM16_31 0x80 + +static bool HasMask(uint32_t value, uint32_t mask) { + return (value & mask) == mask; +} + +// Checks that operating system saves and restores xmm registers during context +// switches. +static bool HasXmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM); +} + +// Checks that operating system saves and restores ymm registers during context +// switches. +static bool HasYmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM | MASK_YMM); +} + +// Checks that operating system saves and restores zmm registers during context +// switches. +static bool HasZmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 | + MASK_ZMM16_31); +} + +static void SetVendor(const Leaf leaf, char* const vendor) { + *(uint32_t*)(vendor) = leaf.ebx; + *(uint32_t*)(vendor + 4) = leaf.edx; + *(uint32_t*)(vendor + 8) = leaf.ecx; + vendor[12] = '\0'; +} + +static int IsVendor(const Leaf leaf, const char* const name) { + const uint32_t ebx = *(const uint32_t*)(name); + const uint32_t edx = *(const uint32_t*)(name + 4); + const uint32_t ecx = *(const uint32_t*)(name + 8); + return leaf.ebx == ebx && leaf.ecx == ecx && leaf.edx == edx; +} + +// Reference https://en.wikipedia.org/wiki/CPUID. +static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info) { + const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); + const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); + + const bool have_xsave = IsBitSet(leaf_1.ecx, 26); + const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); + const uint32_t xcr0_eax = (have_xsave && have_osxsave) ? GetXCR0Eax() : 0; + const bool have_sse_os_support = HasXmmOsXSave(xcr0_eax); + const bool have_avx_os_support = HasYmmOsXSave(xcr0_eax); + const bool have_avx512_os_support = HasZmmOsXSave(xcr0_eax); + + const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8); + const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20); + const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4); + const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16); + + X86Features* const features = &info->features; + + info->family = extended_family + family; + info->model = (extended_model << 4) + model; + info->stepping = ExtractBitRange(leaf_1.eax, 3, 0); + + features->smx = IsBitSet(leaf_1.ecx, 6); + features->cx16 = IsBitSet(leaf_1.ecx, 13); + features->aes = IsBitSet(leaf_1.ecx, 25); + features->f16c = IsBitSet(leaf_1.ecx, 29); + features->sgx = IsBitSet(leaf_7.ebx, 2); + features->bmi1 = IsBitSet(leaf_7.ebx, 3); + features->bmi2 = IsBitSet(leaf_7.ebx, 8); + features->erms = IsBitSet(leaf_7.ebx, 9); + features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10); + + if (have_sse_os_support) { + features->ssse3 = IsBitSet(leaf_1.ecx, 9); + features->sse4_1 = IsBitSet(leaf_1.ecx, 19); + features->sse4_2 = IsBitSet(leaf_1.ecx, 20); + } + + if (have_avx_os_support) { + features->fma3 = IsBitSet(leaf_1.ecx, 12); + features->avx = IsBitSet(leaf_1.ecx, 28); + features->avx2 = IsBitSet(leaf_7.ebx, 5); + } + + if (have_avx512_os_support) { + features->avx512f = IsBitSet(leaf_7.ebx, 16); + features->avx512cd = IsBitSet(leaf_7.ebx, 28); + features->avx512er = IsBitSet(leaf_7.ebx, 27); + features->avx512pf = IsBitSet(leaf_7.ebx, 26); + features->avx512bw = IsBitSet(leaf_7.ebx, 30); + features->avx512dq = IsBitSet(leaf_7.ebx, 17); + features->avx512vl = IsBitSet(leaf_7.ebx, 31); + features->avx512ifma = IsBitSet(leaf_7.ebx, 21); + features->avx512vbmi = IsBitSet(leaf_7.ecx, 1); + features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6); + features->avx512vnni = IsBitSet(leaf_7.ecx, 11); + features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); + features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); + features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); + features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); + } +} + +static const X86Info kEmptyX86Info; + +X86Info GetX86Info(void) { + X86Info info = kEmptyX86Info; + const Leaf leaf_0 = CpuId(0); + const uint32_t max_cpuid_leaf = leaf_0.eax; + SetVendor(leaf_0, info.vendor); + if (IsVendor(leaf_0, "GenuineIntel") || IsVendor(leaf_0, "AuthenticAMD")) { + ParseCpuId(max_cpuid_leaf, &info); + } + return info; +} + +#define CPUID(FAMILY, MODEL) (((FAMILY & 0xFF) << 8) | (MODEL & 0xFF)) + +X86Microarchitecture GetX86Microarchitecture(const X86Info* info) { + if (memcmp(info->vendor, "GenuineIntel", sizeof(info->vendor)) == 0) { + switch (CPUID(info->family, info->model)) { + case CPUID(0x06, 0x35): + case CPUID(0x06, 0x36): + // https://en.wikipedia.org/wiki/Bonnell_(microarchitecture) + return INTEL_ATOM_BNL; + case CPUID(0x06, 0x37): + case CPUID(0x06, 0x4C): + // https://en.wikipedia.org/wiki/Silvermont + return INTEL_ATOM_SMT; + case CPUID(0x06, 0x5C): + // https://en.wikipedia.org/wiki/Goldmont + return INTEL_ATOM_GMT; + case CPUID(0x06, 0x0F): + case CPUID(0x06, 0x16): + // https://en.wikipedia.org/wiki/Intel_Core_(microarchitecture) + return INTEL_CORE; + case CPUID(0x06, 0x17): + case CPUID(0x06, 0x1D): + // https://en.wikipedia.org/wiki/Penryn_(microarchitecture) + return INTEL_PNR; + case CPUID(0x06, 0x1A): + case CPUID(0x06, 0x1E): + case CPUID(0x06, 0x1F): + case CPUID(0x06, 0x2E): + // https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) + return INTEL_NHM; + case CPUID(0x06, 0x25): + case CPUID(0x06, 0x2C): + case CPUID(0x06, 0x2F): + // https://en.wikipedia.org/wiki/Westmere_(microarchitecture) + return INTEL_WSM; + case CPUID(0x06, 0x2A): + case CPUID(0x06, 0x2D): + // https://en.wikipedia.org/wiki/Sandy_Bridge#Models_and_steppings + return INTEL_SNB; + case CPUID(0x06, 0x3A): + case CPUID(0x06, 0x3E): + // https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)#Models_and_steppings + return INTEL_IVB; + case CPUID(0x06, 0x3C): + case CPUID(0x06, 0x3F): + case CPUID(0x06, 0x45): + case CPUID(0x06, 0x46): + // https://en.wikipedia.org/wiki/Haswell_(microarchitecture) + return INTEL_HSW; + case CPUID(0x06, 0x3D): + case CPUID(0x06, 0x47): + case CPUID(0x06, 0x4F): + case CPUID(0x06, 0x56): + // https://en.wikipedia.org/wiki/Broadwell_(microarchitecture) + return INTEL_BDW; + case CPUID(0x06, 0x4E): + case CPUID(0x06, 0x55): + case CPUID(0x06, 0x5E): + // https://en.wikipedia.org/wiki/Skylake_(microarchitecture) + return INTEL_SKL; + case CPUID(0x06, 0x8E): + case CPUID(0x06, 0x9E): + // https://en.wikipedia.org/wiki/Kaby_Lake + return INTEL_KBL; + default: + return X86_UNKNOWN; + } + } + if (memcmp(info->vendor, "AuthenticAMD", sizeof(info->vendor)) == 0) { + switch (info->family) { + // https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures + case 0x0F: + return AMD_HAMMER; + case 0x10: + return AMD_K10; + case 0x14: + return AMD_BOBCAT; + case 0x15: + return AMD_BULLDOZER; + case 0x16: + return AMD_JAGUAR; + case 0x17: + return AMD_ZEN; + default: + return X86_UNKNOWN; + } + } + return X86_UNKNOWN; +} + +static void SetString(const uint32_t max_cpuid_ext_leaf, const uint32_t leaf_id, + char* buffer) { + const Leaf leaf = SafeCpuId(max_cpuid_ext_leaf, leaf_id); + // We allow calling memcpy from SetString which is only called when requesting + // X86BrandString. + memcpy(buffer, &leaf, sizeof(Leaf)); +} + +void FillX86BrandString(char brand_string[49]) { + const Leaf leaf_ext_0 = CpuId(0x80000000); + const uint32_t max_cpuid_leaf_ext = leaf_ext_0.eax; + SetString(max_cpuid_leaf_ext, 0x80000002, brand_string); + SetString(max_cpuid_leaf_ext, 0x80000003, brand_string + 16); + SetString(max_cpuid_leaf_ext, 0x80000004, brand_string + 32); + brand_string[48] = '\0'; +} + +//////////////////////////////////////////////////////////////////////////////// +// Introspection functions + +int GetX86FeaturesEnumValue(const X86Features* features, + X86FeaturesEnum value) { + switch (value) { + case X86_AES: + return features->aes; + case X86_ERMS: + return features->erms; + case X86_F16C: + return features->f16c; + case X86_FMA3: + return features->fma3; + case X86_VPCLMULQDQ: + return features->vpclmulqdq; + case X86_BMI1: + return features->bmi1; + case X86_BMI2: + return features->bmi2; + case X86_SSSE3: + return features->ssse3; + case X86_SSE4_1: + return features->sse4_1; + case X86_SSE4_2: + return features->sse4_2; + case X86_AVX: + return features->avx; + case X86_AVX2: + return features->avx2; + case X86_AVX512F: + return features->avx512f; + case X86_AVX512CD: + return features->avx512cd; + case X86_AVX512ER: + return features->avx512er; + case X86_AVX512PF: + return features->avx512pf; + case X86_AVX512BW: + return features->avx512bw; + case X86_AVX512DQ: + return features->avx512dq; + case X86_AVX512VL: + return features->avx512vl; + case X86_AVX512IFMA: + return features->avx512ifma; + case X86_AVX512VBMI: + return features->avx512vbmi; + case X86_AVX512VBMI2: + return features->avx512vbmi2; + case X86_AVX512VNNI: + return features->avx512vnni; + case X86_AVX512BITALG: + return features->avx512bitalg; + case X86_AVX512VPOPCNTDQ: + return features->avx512vpopcntdq; + case X86_AVX512_4VNNIW: + return features->avx512_4vnniw; + case X86_AVX512_4VBMI2: + return features->avx512_4vbmi2; + case X86_SMX: + return features->smx; + case X86_SGX: + return features->sgx; + case X86_CX16: + return features->cx16; + case X86_LAST_: + break; + } + return false; +} + +const char* GetX86FeaturesEnumName(X86FeaturesEnum value) { + switch (value) { + case X86_AES: + return "aes"; + case X86_ERMS: + return "erms"; + case X86_F16C: + return "f16c"; + case X86_FMA3: + return "fma3"; + case X86_VPCLMULQDQ: + return "vpclmulqdq"; + case X86_BMI1: + return "bmi1"; + case X86_BMI2: + return "bmi2"; + case X86_SSSE3: + return "ssse3"; + case X86_SSE4_1: + return "sse4_1"; + case X86_SSE4_2: + return "sse4_2"; + case X86_AVX: + return "avx"; + case X86_AVX2: + return "avx2"; + case X86_AVX512F: + return "avx512f"; + case X86_AVX512CD: + return "avx512cd"; + case X86_AVX512ER: + return "avx512er"; + case X86_AVX512PF: + return "avx512pf"; + case X86_AVX512BW: + return "avx512bw"; + case X86_AVX512DQ: + return "avx512dq"; + case X86_AVX512VL: + return "avx512vl"; + case X86_AVX512IFMA: + return "avx512ifma"; + case X86_AVX512VBMI: + return "avx512vbmi"; + case X86_AVX512VBMI2: + return "avx512vbmi2"; + case X86_AVX512VNNI: + return "avx512vnni"; + case X86_AVX512BITALG: + return "avx512bitalg"; + case X86_AVX512VPOPCNTDQ: + return "avx512vpopcntdq"; + case X86_AVX512_4VNNIW: + return "avx512_4vnniw"; + case X86_AVX512_4VBMI2: + return "avx512_4vbmi2"; + case X86_SMX: + return "smx"; + case X86_SGX: + return "sgx"; + case X86_CX16: + return "cx16"; + case X86_LAST_: + break; + } + return "unknown_feature"; +} + +const char* GetX86MicroarchitectureName(X86Microarchitecture uarch) { + switch (uarch) { + case X86_UNKNOWN: + return "X86_UNKNOWN"; + case INTEL_CORE: + return "INTEL_CORE"; + case INTEL_PNR: + return "INTEL_PNR"; + case INTEL_NHM: + return "INTEL_NHM"; + case INTEL_ATOM_BNL: + return "INTEL_ATOM_BNL"; + case INTEL_WSM: + return "INTEL_WSM"; + case INTEL_SNB: + return "INTEL_SNB"; + case INTEL_IVB: + return "INTEL_IVB"; + case INTEL_ATOM_SMT: + return "INTEL_ATOM_SMT"; + case INTEL_HSW: + return "INTEL_HSW"; + case INTEL_BDW: + return "INTEL_BDW"; + case INTEL_SKL: + return "INTEL_SKL"; + case INTEL_ATOM_GMT: + return "INTEL_ATOM_GMT"; + case INTEL_KBL: + return "INTEL_KBL"; + case INTEL_CFL: + return "INTEL_CFL"; + case INTEL_CNL: + return "INTEL_CNL"; + case AMD_HAMMER: + return "AMD_HAMMER"; + case AMD_K10: + return "AMD_K10"; + case AMD_BOBCAT: + return "AMD_BOBCAT"; + case AMD_BULLDOZER: + return "AMD_BULLDOZER"; + case AMD_JAGUAR: + return "AMD_JAGUAR"; + case AMD_ZEN: + return "AMD_ZEN"; + } + return "unknown microarchitecture"; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c new file mode 100755 index 00000000..286a9ccb --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/filesystem.c @@ -0,0 +1,57 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/filesystem.h" + +#include +#include +#include +#include + +#if defined(_MSC_VER) +#include +int CpuFeatures_OpenFile(const char* filename) { + return _open(filename, _O_RDONLY); +} + +void CpuFeatures_CloseFile(int file_descriptor) { _close(file_descriptor); } + +int CpuFeatures_ReadFile(int file_descriptor, void* buffer, + size_t buffer_size) { + return _read(file_descriptor, buffer, buffer_size); +} + +#else +#include + +int CpuFeatures_OpenFile(const char* filename) { + int result; + do { + result = open(filename, O_RDONLY); + } while (result == -1L && errno == EINTR); + return result; +} + +void CpuFeatures_CloseFile(int file_descriptor) { close(file_descriptor); } + +int CpuFeatures_ReadFile(int file_descriptor, void* buffer, + size_t buffer_size) { + int result; + do { + result = read(file_descriptor, buffer, buffer_size); + } while (result == -1L && errno == EINTR); + return result; +} + +#endif diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c new file mode 100755 index 00000000..99ea74b5 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/hwcaps.c @@ -0,0 +1,194 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "cpu_features_macros.h" +#include "internal/filesystem.h" +#include "internal/hwcaps.h" +#include "internal/string_view.h" + +#if defined(NDEBUG) +#define D(...) +#else +#include +#define D(...) \ + do { \ + printf(__VA_ARGS__); \ + fflush(stdout); \ + } while (0) +#endif + +#if defined(CPU_FEATURES_ARCH_MIPS) || defined(CPU_FEATURES_ARCH_ANY_ARM) +#define HWCAPS_ANDROID_MIPS_OR_ARM +#endif + +#if defined(CPU_FEATURES_OS_LINUX_OR_ANDROID) && \ + !defined(HWCAPS_ANDROID_MIPS_OR_ARM) +#define HWCAPS_REGULAR_LINUX +#endif + +#if defined(HWCAPS_ANDROID_MIPS_OR_ARM) || defined(HWCAPS_REGULAR_LINUX) +#define HWCAPS_SUPPORTED +#endif + +//////////////////////////////////////////////////////////////////////////////// +// Implementation of GetElfHwcapFromGetauxval +//////////////////////////////////////////////////////////////////////////////// + +// On Linux we simply use getauxval. +#if defined(HWCAPS_REGULAR_LINUX) +#include +#include +static unsigned long GetElfHwcapFromGetauxval(uint32_t hwcap_type) { + return getauxval(hwcap_type); +} +#endif // defined(HWCAPS_REGULAR_LINUX) + +// On Android we probe the system's C library for a 'getauxval' function and +// call it if it exits, or return 0 for failure. This function is available +// since API level 20. +// +// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the edge +// case where some NDK developers use headers for a platform that is newer than +// the one really targetted by their application. This is typically done to use +// newer native APIs only when running on more recent Android versions, and +// requires careful symbol management. +// +// Note that getauxval() can't really be re-implemented here, because its +// implementation does not parse /proc/self/auxv. Instead it depends on values +// that are passed by the kernel at process-init time to the C runtime +// initialization layer. +#if defined(HWCAPS_ANDROID_MIPS_OR_ARM) +#include +#define AT_HWCAP 16 +#define AT_HWCAP2 26 +#define AT_PLATFORM 15 +#define AT_BASE_PLATFORM 24 + +typedef unsigned long getauxval_func_t(unsigned long); + +static uint32_t GetElfHwcapFromGetauxval(uint32_t hwcap_type) { + uint32_t ret = 0; + void* libc_handle = NULL; + getauxval_func_t* func = NULL; + + dlerror(); // Cleaning error state before calling dlopen. + libc_handle = dlopen("libc.so", RTLD_NOW); + if (!libc_handle) { + D("Could not dlopen() C library: %s\n", dlerror()); + return 0; + } + func = (getauxval_func_t*)dlsym(libc_handle, "getauxval"); + if (!func) { + D("Could not find getauxval() in C library\n"); + } else { + // Note: getauxval() returns 0 on failure. Doesn't touch errno. + ret = (uint32_t)(*func)(hwcap_type); + } + dlclose(libc_handle); + return ret; +} +#endif // defined(HWCAPS_ANDROID_MIPS_OR_ARM) + +#if defined(HWCAPS_SUPPORTED) +//////////////////////////////////////////////////////////////////////////////// +// Implementation of GetHardwareCapabilities for Android and Linux +//////////////////////////////////////////////////////////////////////////////// + +// Fallback when getauxval is not available, retrieves hwcaps from +// "/proc/self/auxv". +static uint32_t GetElfHwcapFromProcSelfAuxv(uint32_t hwcap_type) { + struct { + uint32_t tag; + uint32_t value; + } entry; + uint32_t result = 0; + const char filepath[] = "/proc/self/auxv"; + const int fd = CpuFeatures_OpenFile(filepath); + if (fd < 0) { + D("Could not open %s\n", filepath); + return 0; + } + for (;;) { + const int ret = CpuFeatures_ReadFile(fd, (char*)&entry, sizeof entry); + if (ret < 0) { + D("Error while reading %s\n", filepath); + break; + } + // Detect end of list. + if (ret == 0 || (entry.tag == 0 && entry.value == 0)) { + break; + } + if (entry.tag == hwcap_type) { + result = entry.value; + break; + } + } + CpuFeatures_CloseFile(fd); + return result; +} + +// Retrieves hardware capabilities by first trying to call getauxval, if not +// available falls back to reading "/proc/self/auxv". +static unsigned long GetHardwareCapabilitiesFor(uint32_t type) { + unsigned long hwcaps = GetElfHwcapFromGetauxval(type); + if (!hwcaps) { + D("Parsing /proc/self/auxv to extract ELF hwcaps!\n"); + hwcaps = GetElfHwcapFromProcSelfAuxv(type); + } + return hwcaps; +} + +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) { + HardwareCapabilities capabilities; + capabilities.hwcaps = GetHardwareCapabilitiesFor(AT_HWCAP); + capabilities.hwcaps2 = GetHardwareCapabilitiesFor(AT_HWCAP2); + return capabilities; +} + +PlatformType kEmptyPlatformType; + +PlatformType CpuFeatures_GetPlatformType(void) { + PlatformType type = kEmptyPlatformType; + char *platform = (char *)GetHardwareCapabilitiesFor(AT_PLATFORM); + char *base_platform = (char *)GetHardwareCapabilitiesFor(AT_BASE_PLATFORM); + + if (platform != NULL) + CpuFeatures_StringView_CopyString(str(platform), type.platform, + sizeof(type.platform)); + if (base_platform != NULL) + CpuFeatures_StringView_CopyString(str(base_platform), type.base_platform, + sizeof(type.base_platform)); + return type; +} +#else // (defined(HWCAPS_SUPPORTED) + +PlatformType kEmptyPlatformType; + +PlatformType CpuFeatures_GetPlatformType(void) { + PlatformType type = kEmptyPlatformType; + return type; +} + +//////////////////////////////////////////////////////////////////////////////// +// Implementation of GetHardwareCapabilities for unsupported platforms. +//////////////////////////////////////////////////////////////////////////////// + +const HardwareCapabilities kEmptyHardwareCapabilities; +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) { + return kEmptyHardwareCapabilities; +} +#endif diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c new file mode 100755 index 00000000..b7f8f3d9 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/linux_features_aggregator.c @@ -0,0 +1,51 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/linux_features_aggregator.h" +#include "internal/string_view.h" + +void CpuFeatures_SetFromFlags(const size_t configs_size, + const CapabilityConfig* configs, + const StringView flags_line, + void* const features) { + size_t i = 0; + for (; i < configs_size; ++i) { + const CapabilityConfig config = configs[i]; + config.set_bit(features, CpuFeatures_StringView_HasWord( + flags_line, config.proc_cpuinfo_flag)); + } +} + +static bool IsSet(const uint32_t mask, const uint32_t value) { + return (value & mask) == mask; +} + +static bool IsHwCapsSet(const HardwareCapabilities hwcaps_mask, + const HardwareCapabilities hwcaps) { + return IsSet(hwcaps_mask.hwcaps, hwcaps.hwcaps) && + IsSet(hwcaps_mask.hwcaps2, hwcaps.hwcaps2); +} + +void CpuFeatures_OverrideFromHwCaps(const size_t configs_size, + const CapabilityConfig* configs, + const HardwareCapabilities hwcaps, + void* const features) { + size_t i = 0; + for (; i < configs_size; ++i) { + const CapabilityConfig* config = &configs[i]; + if (IsHwCapsSet(config->hwcaps_mask, hwcaps)) { + config->set_bit(features, true); + } + } +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c new file mode 100755 index 00000000..b2c48ba6 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/stack_line_reader.c @@ -0,0 +1,131 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/stack_line_reader.h" +#include "internal/filesystem.h" + +#include +#include +#include + +void StackLineReader_Initialize(StackLineReader* reader, int fd) { + reader->view.ptr = reader->buffer; + reader->view.size = 0; + reader->skip_mode = false; + reader->fd = fd; +} + +// Replaces the content of buffer with bytes from the file. +static int LoadFullBuffer(StackLineReader* reader) { + const int read = CpuFeatures_ReadFile(reader->fd, reader->buffer, + STACK_LINE_READER_BUFFER_SIZE); + assert(read >= 0); + reader->view.ptr = reader->buffer; + reader->view.size = read; + return read; +} + +// Appends with bytes from the file to buffer, filling the remaining space. +static int LoadMore(StackLineReader* reader) { + char* const ptr = reader->buffer + reader->view.size; + const size_t size_to_read = STACK_LINE_READER_BUFFER_SIZE - reader->view.size; + const int read = CpuFeatures_ReadFile(reader->fd, ptr, size_to_read); + assert(read >= 0); + assert(read <= (int)size_to_read); + reader->view.size += read; + return read; +} + +static int IndexOfEol(StackLineReader* reader) { + return CpuFeatures_StringView_IndexOfChar(reader->view, '\n'); +} + +// Relocate buffer's pending bytes at the beginning of the array and fills the +// remaining space with bytes from the file. +static int BringToFrontAndLoadMore(StackLineReader* reader) { + if (reader->view.size && reader->view.ptr != reader->buffer) { + memmove(reader->buffer, reader->view.ptr, reader->view.size); + } + reader->view.ptr = reader->buffer; + return LoadMore(reader); +} + +// Loads chunks of buffer size from disks until it contains a newline character +// or end of file. +static void SkipToNextLine(StackLineReader* reader) { + for (;;) { + const int read = LoadFullBuffer(reader); + if (read == 0) { + break; + } else { + const int eol_index = IndexOfEol(reader); + if (eol_index >= 0) { + reader->view = + CpuFeatures_StringView_PopFront(reader->view, eol_index + 1); + break; + } + } + } +} + +static LineResult CreateLineResult(bool eof, bool full_line, StringView view) { + LineResult result; + result.eof = eof; + result.full_line = full_line; + result.line = view; + return result; +} + +// Helper methods to provide clearer semantic in StackLineReader_NextLine. +static LineResult CreateEOFLineResult(StringView view) { + return CreateLineResult(true, true, view); +} + +static LineResult CreateTruncatedLineResult(StringView view) { + return CreateLineResult(false, false, view); +} + +static LineResult CreateValidLineResult(StringView view) { + return CreateLineResult(false, true, view); +} + +LineResult StackLineReader_NextLine(StackLineReader* reader) { + if (reader->skip_mode) { + SkipToNextLine(reader); + reader->skip_mode = false; + } + { + const bool can_load_more = + reader->view.size < STACK_LINE_READER_BUFFER_SIZE; + int eol_index = IndexOfEol(reader); + if (eol_index < 0 && can_load_more) { + const int read = BringToFrontAndLoadMore(reader); + if (read == 0) { + return CreateEOFLineResult(reader->view); + } + eol_index = IndexOfEol(reader); + } + if (eol_index < 0) { + reader->skip_mode = true; + return CreateTruncatedLineResult(reader->view); + } + { + StringView line = + CpuFeatures_StringView_KeepFront(reader->view, eol_index); + reader->view = + CpuFeatures_StringView_PopFront(reader->view, eol_index + 1); + return CreateValidLineResult(line); + } + } +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c new file mode 100755 index 00000000..4f27cbdb --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/string_view.c @@ -0,0 +1,182 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/string_view.h" + +#include +#include +#include + +int CpuFeatures_StringView_IndexOfChar(const StringView view, char c) { + if (view.ptr && view.size) { + const char* const found = (const char*)memchr(view.ptr, c, view.size); + if (found) { + return found - view.ptr; + } + } + return -1; +} + +int CpuFeatures_StringView_IndexOf(const StringView view, + const StringView sub_view) { + if (sub_view.size) { + StringView remainder = view; + while (remainder.size >= sub_view.size) { + const int found_index = + CpuFeatures_StringView_IndexOfChar(remainder, sub_view.ptr[0]); + if (found_index < 0) break; + remainder = CpuFeatures_StringView_PopFront(remainder, found_index); + if (CpuFeatures_StringView_StartsWith(remainder, sub_view)) { + return remainder.ptr - view.ptr; + } + remainder = CpuFeatures_StringView_PopFront(remainder, 1); + } + } + return -1; +} + +bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b) { + if (a.size == b.size) { + return a.ptr == b.ptr || memcmp(a.ptr, b.ptr, b.size) == 0; + } + return false; +} + +bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b) { + return a.ptr && b.ptr && b.size && a.size >= b.size + ? memcmp(a.ptr, b.ptr, b.size) == 0 + : false; +} + +StringView CpuFeatures_StringView_PopFront(const StringView str_view, + size_t count) { + if (count > str_view.size) { + return kEmptyStringView; + } + return view(str_view.ptr + count, str_view.size - count); +} + +StringView CpuFeatures_StringView_PopBack(const StringView str_view, + size_t count) { + if (count > str_view.size) { + return kEmptyStringView; + } + return view(str_view.ptr, str_view.size - count); +} + +StringView CpuFeatures_StringView_KeepFront(const StringView str_view, + size_t count) { + return count <= str_view.size ? view(str_view.ptr, count) : str_view; +} + +char CpuFeatures_StringView_Front(const StringView view) { + assert(view.size); + assert(view.ptr); + return view.ptr[0]; +} + +char CpuFeatures_StringView_Back(const StringView view) { + assert(view.size); + return view.ptr[view.size - 1]; +} + +StringView CpuFeatures_StringView_TrimWhitespace(StringView view) { + while (view.size && isspace(CpuFeatures_StringView_Front(view))) + view = CpuFeatures_StringView_PopFront(view, 1); + while (view.size && isspace(CpuFeatures_StringView_Back(view))) + view = CpuFeatures_StringView_PopBack(view, 1); + return view; +} + +static int HexValue(const char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; +} + +// Returns -1 if view contains non digits. +static int ParsePositiveNumberWithBase(const StringView view, int base) { + int result = 0; + StringView remainder = view; + for (; remainder.size; + remainder = CpuFeatures_StringView_PopFront(remainder, 1)) { + const int value = HexValue(CpuFeatures_StringView_Front(remainder)); + if (value < 0 || value >= base) return -1; + result = (result * base) + value; + } + return result; +} + +int CpuFeatures_StringView_ParsePositiveNumber(const StringView view) { + if (view.size) { + const StringView hex_prefix = str("0x"); + if (CpuFeatures_StringView_StartsWith(view, hex_prefix)) { + const StringView span_no_prefix = + CpuFeatures_StringView_PopFront(view, hex_prefix.size); + return ParsePositiveNumberWithBase(span_no_prefix, 16); + } + return ParsePositiveNumberWithBase(view, 10); + } + return -1; +} + +void CpuFeatures_StringView_CopyString(const StringView src, char* dst, + size_t dst_size) { + if (dst_size > 0) { + const size_t max_copy_size = dst_size - 1; + const size_t copy_size = + src.size > max_copy_size ? max_copy_size : src.size; + memcpy(dst, src.ptr, copy_size); + dst[copy_size] = '\0'; + } +} + +bool CpuFeatures_StringView_HasWord(const StringView line, + const char* const word_str) { + const StringView word = str(word_str); + StringView remainder = line; + for (;;) { + const int index_of_word = CpuFeatures_StringView_IndexOf(remainder, word); + if (index_of_word < 0) { + return false; + } else { + const StringView before = + CpuFeatures_StringView_KeepFront(line, index_of_word); + const StringView after = + CpuFeatures_StringView_PopFront(line, index_of_word + word.size); + const bool valid_before = + before.size == 0 || CpuFeatures_StringView_Back(before) == ' '; + const bool valid_after = + after.size == 0 || CpuFeatures_StringView_Front(after) == ' '; + if (valid_before && valid_after) return true; + remainder = + CpuFeatures_StringView_PopFront(remainder, index_of_word + word.size); + } + } + return false; +} + +bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line, + StringView* key, + StringView* value) { + const StringView sep = str(": "); + const int index_of_separator = CpuFeatures_StringView_IndexOf(line, sep); + if (index_of_separator < 0) return false; + *value = CpuFeatures_StringView_TrimWhitespace( + CpuFeatures_StringView_PopFront(line, index_of_separator + sep.size)); + *key = CpuFeatures_StringView_TrimWhitespace( + CpuFeatures_StringView_KeepFront(line, index_of_separator)); + return true; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c new file mode 100755 index 00000000..a5f7f8ce --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/src/utils/list_cpu_features.c @@ -0,0 +1,237 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "cpu_features_macros.h" +#include "cpuinfo_aarch64.h" +#include "cpuinfo_arm.h" +#include "cpuinfo_mips.h" +#include "cpuinfo_ppc.h" +#include "cpuinfo_x86.h" + +static void PrintEscapedAscii(const char* str) { + putchar('"'); + for (; str && *str; ++str) { + switch (*str) { + case '\"': + case '\\': + case '/': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + putchar('\\'); + } + putchar(*str); + } + putchar('"'); +} + +static void PrintVoid(void) {} +static void PrintComma(void) { putchar(','); } +static void PrintLineFeed(void) { putchar('\n'); } +static void PrintOpenBrace(void) { putchar('{'); } +static void PrintCloseBrace(void) { putchar('}'); } +static void PrintOpenBracket(void) { putchar('['); } +static void PrintCloseBracket(void) { putchar(']'); } +static void PrintString(const char* field) { printf("%s", field); } +static void PrintAlignedHeader(const char* field) { printf("%-15s : ", field); } +static void PrintIntValue(int value) { printf("%d", value); } +static void PrintDecHexValue(int value) { + printf("%3d (0x%02X)", value, value); +} +static void PrintJsonHeader(const char* field) { + PrintEscapedAscii(field); + putchar(':'); +} + +typedef struct { + void (*Start)(void); + void (*ArrayStart)(void); + void (*ArraySeparator)(void); + void (*ArrayEnd)(void); + void (*PrintString)(const char* value); + void (*PrintValue)(int value); + void (*EndField)(void); + void (*StartField)(const char* field); + void (*End)(void); +} Printer; + +static Printer getJsonPrinter(void) { + return (Printer){ + .Start = &PrintOpenBrace, + .ArrayStart = &PrintOpenBracket, + .ArraySeparator = &PrintComma, + .ArrayEnd = &PrintCloseBracket, + .PrintString = &PrintEscapedAscii, + .PrintValue = &PrintIntValue, + .EndField = &PrintComma, + .StartField = &PrintJsonHeader, + .End = &PrintCloseBrace, + }; +} + +static Printer getTextPrinter(void) { + return (Printer){ + .Start = &PrintVoid, + .ArrayStart = &PrintVoid, + .ArraySeparator = &PrintComma, + .ArrayEnd = &PrintVoid, + .PrintString = &PrintString, + .PrintValue = &PrintDecHexValue, + .EndField = &PrintLineFeed, + .StartField = &PrintAlignedHeader, + .End = &PrintVoid, + }; +} + +// Prints a named numeric value in both decimal and hexadecimal. +static void PrintN(const Printer p, const char* field, int value) { + p.StartField(field); + p.PrintValue(value); + p.EndField(); +} + +// Prints a named string. +static void PrintS(const Printer p, const char* field, const char* value) { + p.StartField(field); + p.PrintString(value); + p.EndField(); +} + +static int cmp(const void* p1, const void* p2) { + return strcmp(*(const char* const*)p1, *(const char* const*)p2); +} + +#define DEFINE_PRINT_FLAGS(HasFeature, FeatureName, FeatureType, LastEnum) \ + static void PrintFlags(const Printer p, const FeatureType* features) { \ + size_t i; \ + const char* ptrs[LastEnum] = {0}; \ + size_t count = 0; \ + for (i = 0; i < LastEnum; ++i) { \ + if (HasFeature(features, i)) { \ + ptrs[count] = FeatureName(i); \ + ++count; \ + } \ + } \ + qsort(ptrs, count, sizeof(char*), cmp); \ + p.StartField("flags"); \ + p.ArrayStart(); \ + for (i = 0; i < count; ++i) { \ + if (i > 0) p.ArraySeparator(); \ + p.PrintString(ptrs[i]); \ + } \ + p.ArrayEnd(); \ + } + +#if defined(CPU_FEATURES_ARCH_X86) +DEFINE_PRINT_FLAGS(GetX86FeaturesEnumValue, GetX86FeaturesEnumName, X86Features, + X86_LAST_) +#elif defined(CPU_FEATURES_ARCH_ARM) +DEFINE_PRINT_FLAGS(GetArmFeaturesEnumValue, GetArmFeaturesEnumName, ArmFeatures, + ARM_LAST_) +#elif defined(CPU_FEATURES_ARCH_AARCH64) +DEFINE_PRINT_FLAGS(GetAarch64FeaturesEnumValue, GetAarch64FeaturesEnumName, + Aarch64Features, AARCH64_LAST_) +#elif defined(CPU_FEATURES_ARCH_MIPS) +DEFINE_PRINT_FLAGS(GetMipsFeaturesEnumValue, GetMipsFeaturesEnumName, + MipsFeatures, MIPS_LAST_) +#elif defined(CPU_FEATURES_ARCH_PPC) +DEFINE_PRINT_FLAGS(GetPPCFeaturesEnumValue, GetPPCFeaturesEnumName, PPCFeatures, + PPC_LAST_) +#endif + +static void PrintFeatures(const Printer printer) { +#if defined(CPU_FEATURES_ARCH_X86) + char brand_string[49]; + const X86Info info = GetX86Info(); + FillX86BrandString(brand_string); + PrintS(printer, "arch", "x86"); + PrintS(printer, "brand", brand_string); + PrintN(printer, "family", info.family); + PrintN(printer, "model", info.model); + PrintN(printer, "stepping", info.stepping); + PrintS(printer, "uarch", + GetX86MicroarchitectureName(GetX86Microarchitecture(&info))); + PrintFlags(printer, &info.features); +#elif defined(CPU_FEATURES_ARCH_ARM) + const ArmInfo info = GetArmInfo(); + PrintS(printer, "arch", "ARM"); + PrintN(printer, "implementer", info.implementer); + PrintN(printer, "architecture", info.architecture); + PrintN(printer, "variant", info.variant); + PrintN(printer, "part", info.part); + PrintN(printer, "revision", info.revision); + PrintFlags(printer, &info.features); +#elif defined(CPU_FEATURES_ARCH_AARCH64) + const Aarch64Info info = GetAarch64Info(); + PrintS(printer, "arch", "aarch64"); + PrintN(printer, "implementer", info.implementer); + PrintN(printer, "variant", info.variant); + PrintN(printer, "part", info.part); + PrintN(printer, "revision", info.revision); + PrintFlags(printer, &info.features); +#elif defined(CPU_FEATURES_ARCH_MIPS) + const MipsInfo info = GetMipsInfo(); + PrintS(printer, "arch", "mips"); + PrintFlags(printer, &info.features); +#elif defined(CPU_FEATURES_ARCH_PPC) + const PPCInfo info = GetPPCInfo(); + const PPCPlatformStrings strings = GetPPCPlatformStrings(); + PrintS(printer, "arch", "ppc"); + PrintS(printer, "platform", strings.platform); + PrintS(printer, "model", strings.model); + PrintS(printer, "machine", strings.machine); + PrintS(printer, "cpu", strings.cpu); + PrintS(printer, "instruction set", strings.type.platform); + PrintS(printer, "microarchitecture", strings.type.base_platform); + PrintFlags(printer, &info.features); +#endif +} + +static void showUsage(const char* name) { + printf( + "\n" + "Usage: %s [options]\n" + " Options:\n" + " -h | --help Show help message.\n" + " -j | --json Format output as json instead of plain text.\n" + "\n", + name); +} + +int main(int argc, char** argv) { + Printer printer = getTextPrinter(); + int i = 1; + for (; i < argc; ++i) { + const char* arg = argv[i]; + if (strcmp(arg, "-j") == 0 || strcmp(arg, "--json") == 0) { + printer = getJsonPrinter(); + } else { + showUsage(argv[0]); + if (strcmp(arg, "-h") == 0 || strcmp(arg, "--help") == 0) + return EXIT_SUCCESS; + return EXIT_FAILURE; + } + } + printer.Start(); + PrintFeatures(printer); + printer.End(); + PrintLineFeed(); + return EXIT_SUCCESS; +} diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt new file mode 100755 index 00000000..794ef04b --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/CMakeLists.txt @@ -0,0 +1,79 @@ +# +# libraries for tests +# + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) # prefer use of -std11 instead of -gnustd11 + +include_directories(../include) +add_definitions(-DCPU_FEATURES_TEST) + +##------------------------------------------------------------------------------ +add_library(string_view ../src/string_view.c) +##------------------------------------------------------------------------------ +add_library(filesystem_for_testing filesystem_for_testing.cc) +##------------------------------------------------------------------------------ +add_library(hwcaps_for_testing hwcaps_for_testing.cc) +target_link_libraries(hwcaps_for_testing filesystem_for_testing) +##------------------------------------------------------------------------------ +add_library(stack_line_reader ../src/stack_line_reader.c) +target_compile_definitions(stack_line_reader PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024) +target_link_libraries(stack_line_reader string_view) +##------------------------------------------------------------------------------ +add_library(stack_line_reader_for_test ../src/stack_line_reader.c) +target_compile_definitions(stack_line_reader_for_test PUBLIC STACK_LINE_READER_BUFFER_SIZE=16) +target_link_libraries(stack_line_reader_for_test string_view filesystem_for_testing) +##------------------------------------------------------------------------------ +add_library(all_libraries ../src/stack_line_reader.c ../src/linux_features_aggregator.c) +target_link_libraries(all_libraries hwcaps_for_testing stack_line_reader string_view) + +# +# tests +# +link_libraries(gtest gmock_main) + +## bit_utils_test +add_executable(bit_utils_test bit_utils_test.cc) +target_link_libraries(bit_utils_test) +add_test(NAME bit_utils_test COMMAND bit_utils_test) +##------------------------------------------------------------------------------ +## string_view_test +add_executable(string_view_test string_view_test.cc ../src/string_view.c) +target_link_libraries(string_view_test string_view) +add_test(NAME string_view_test COMMAND string_view_test) +##------------------------------------------------------------------------------ +## stack_line_reader_test +add_executable(stack_line_reader_test stack_line_reader_test.cc) +target_link_libraries(stack_line_reader_test stack_line_reader_for_test) +add_test(NAME stack_line_reader_test COMMAND stack_line_reader_test) +##------------------------------------------------------------------------------ +## linux_features_aggregator_test +add_executable(linux_features_aggregator_test linux_features_aggregator_test.cc) +target_link_libraries(linux_features_aggregator_test all_libraries) +add_test(NAME linux_features_aggregator_test COMMAND linux_features_aggregator_test) +##------------------------------------------------------------------------------ +## cpuinfo_x86_test +add_executable(cpuinfo_x86_test cpuinfo_x86_test.cc ../src/cpuinfo_x86.c) +target_link_libraries(cpuinfo_x86_test all_libraries) +add_test(NAME cpuinfo_x86_test COMMAND cpuinfo_x86_test) +##------------------------------------------------------------------------------ +## cpuinfo_arm_test +add_executable(cpuinfo_arm_test cpuinfo_arm_test.cc ../src/cpuinfo_arm.c) +target_link_libraries(cpuinfo_arm_test all_libraries) +add_test(NAME cpuinfo_arm_test COMMAND cpuinfo_arm_test) +##------------------------------------------------------------------------------ +## cpuinfo_aarch64_test +add_executable(cpuinfo_aarch64_test cpuinfo_aarch64_test.cc ../src/cpuinfo_aarch64.c) +target_link_libraries(cpuinfo_aarch64_test all_libraries) +add_test(NAME cpuinfo_aarch64_test COMMAND cpuinfo_aarch64_test) +##------------------------------------------------------------------------------ +## cpuinfo_mips_test +add_executable(cpuinfo_mips_test cpuinfo_mips_test.cc ../src/cpuinfo_mips.c) +target_link_libraries(cpuinfo_mips_test all_libraries) +add_test(NAME cpuinfo_mips_test COMMAND cpuinfo_mips_test) +##------------------------------------------------------------------------------ +## cpuinfo_ppc_test +add_executable(cpuinfo_ppc_test cpuinfo_ppc_test.cc ../src/cpuinfo_ppc.c) +target_link_libraries(cpuinfo_ppc_test all_libraries) +add_test(NAME cpuinfo_ppc_test COMMAND cpuinfo_ppc_test) diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc new file mode 100755 index 00000000..8937cbc2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/bit_utils_test.cc @@ -0,0 +1,53 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/bit_utils.h" + +#include "gtest/gtest.h" + +namespace cpu_features { +namespace { + +TEST(UtilsTest, IsBitSet) { + for (size_t bit_set = 0; bit_set < 32; ++bit_set) { + const uint32_t value = 1UL << bit_set; + for (size_t i = 0; i < 32; ++i) { + EXPECT_EQ(IsBitSet(value, i), i == bit_set); + } + } + + // testing 0, all bits should be 0. + for (size_t i = 0; i < 32; ++i) { + EXPECT_FALSE(IsBitSet(0, i)); + } + + // testing ~0, all bits should be 1. + for (size_t i = 0; i < 32; ++i) { + EXPECT_TRUE(IsBitSet(-1, i)); + } +} + +TEST(UtilsTest, ExtractBitRange) { + // Extracting all bits gives the same number. + EXPECT_EQ(ExtractBitRange(123, 31, 0), 123); + // Extracting 1 bit gives parity. + EXPECT_EQ(ExtractBitRange(123, 0, 0), 1); + EXPECT_EQ(ExtractBitRange(122, 0, 0), 0); + + EXPECT_EQ(ExtractBitRange(0xF0, 7, 4), 0xF); + EXPECT_EQ(ExtractBitRange(0x42 << 2, 10, 2), 0x42); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc new file mode 100755 index 00000000..bdb4d17c --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_aarch64_test.cc @@ -0,0 +1,74 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_aarch64.h" +#include "filesystem_for_testing.h" +#include "hwcaps_for_testing.h" + +#include "gtest/gtest.h" + +namespace cpu_features { +namespace { + +void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); } + +TEST(CpuinfoAarch64Test, FromHardwareCap) { + SetHardwareCapabilities(AARCH64_HWCAP_FP | AARCH64_HWCAP_AES, 0); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetAarch64Info(); + EXPECT_TRUE(info.features.fp); + EXPECT_FALSE(info.features.asimd); + EXPECT_TRUE(info.features.aes); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + EXPECT_FALSE(info.features.crc32); +} + +TEST(CpuinfoAarch64Test, ARMCortexA53) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor : AArch64 Processor rev 3 (aarch64) +processor : 0 +processor : 1 +processor : 2 +processor : 3 +processor : 4 +processor : 5 +processor : 6 +processor : 7 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 +CPU implementer : 0x41 +CPU architecture: AArch64 +CPU variant : 0x0 +CPU part : 0xd03 +CPU revision : 3)"); + const auto info = GetAarch64Info(); + EXPECT_EQ(info.implementer, 0x41); + EXPECT_EQ(info.variant, 0x0); + EXPECT_EQ(info.part, 0xd03); + EXPECT_EQ(info.revision, 3); + + EXPECT_TRUE(info.features.fp); + EXPECT_TRUE(info.features.asimd); + EXPECT_TRUE(info.features.aes); + EXPECT_TRUE(info.features.pmull); + EXPECT_TRUE(info.features.sha1); + EXPECT_TRUE(info.features.sha2); + EXPECT_TRUE(info.features.crc32); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc new file mode 100755 index 00000000..a72c5662 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_arm_test.cc @@ -0,0 +1,182 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_arm.h" +#include "filesystem_for_testing.h" +#include "hwcaps_for_testing.h" + +#include "gtest/gtest.h" + +namespace cpu_features { +namespace { + +void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); } + +TEST(CpuinfoArmTest, FromHardwareCap) { + SetHardwareCapabilities(ARM_HWCAP_NEON, ARM_HWCAP2_AES | ARM_HWCAP2_CRC32); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.vfp); // triggered by vfpv3 + EXPECT_TRUE(info.features.vfpv3); // triggered by neon + EXPECT_TRUE(info.features.neon); + EXPECT_TRUE(info.features.aes); + EXPECT_TRUE(info.features.crc32); + + EXPECT_FALSE(info.features.vfpv4); + EXPECT_FALSE(info.features.iwmmxt); + EXPECT_FALSE(info.features.vfpv3d16); + EXPECT_FALSE(info.features.idiva); + EXPECT_FALSE(info.features.idivt); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); +} + +TEST(CpuinfoArmTest, ODroidFromCpuInfo) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(processor : 0 +model name : ARMv7 Processor rev 3 (v71) +BogoMIPS : 120.00 +Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x2 +CPU part : 0xc0f +CPU revision : 3)"); + const auto info = GetArmInfo(); + EXPECT_EQ(info.implementer, 0x41); + EXPECT_EQ(info.variant, 0x2); + EXPECT_EQ(info.part, 0xc0f); + EXPECT_EQ(info.revision, 3); + EXPECT_EQ(info.architecture, 7); + + EXPECT_TRUE(info.features.vfp); + EXPECT_FALSE(info.features.iwmmxt); + EXPECT_TRUE(info.features.neon); + EXPECT_TRUE(info.features.vfpv3); + EXPECT_FALSE(info.features.vfpv3d16); + EXPECT_TRUE(info.features.vfpv4); + EXPECT_TRUE(info.features.idiva); + EXPECT_TRUE(info.features.idivt); + EXPECT_FALSE(info.features.aes); + EXPECT_FALSE(info.features.pmull); + EXPECT_FALSE(info.features.sha1); + EXPECT_FALSE(info.features.sha2); + EXPECT_FALSE(info.features.crc32); +} + +// http://code.google.com/p/android/issues/detail?id=10812 +TEST(CpuinfoArmTest, InvalidArmv7) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor : ARMv6-compatible processor rev 6 (v6l) +BogoMIPS : 199.47 +Features : swp half thumb fastmult vfp edsp java +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0xb76 +CPU revision : 6 + +Hardware : SPICA +Revision : 0020 +Serial : 33323613546d00ec )"); + const auto info = GetArmInfo(); + EXPECT_EQ(info.architecture, 6); +} + +// https://crbug.com/341598. +TEST(CpuinfoArmTest, InvalidNeon) { + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor: ARMv7 Processory rev 0 (v71) +processor: 0 +BogoMIPS: 13.50 + +Processor: 1 +BogoMIPS: 13.50 + +Features: swp half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt +CPU implementer : 0x51 +CPU architecture: 7 +CPU variant: 0x1 +CPU part: 0x04d +CPU revision: 0 + +Hardware: SAMSUNG M2 +Revision: 0010 +Serial: 00001e030000354e)"); + const auto info = GetArmInfo(); + EXPECT_FALSE(info.features.neon); +} + +// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV +// support. +TEST(CpuinfoArmTest, Nexus4_0x510006f2) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(CPU implementer : 0x51 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0x6f +CPU revision : 2)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.idiva); + EXPECT_TRUE(info.features.idivt); +} + +// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV +// support. +TEST(CpuinfoArmTest, Nexus4_0x510006f3) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(CPU implementer : 0x51 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0x6f +CPU revision : 3)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.idiva); + EXPECT_TRUE(info.features.idivt); +} + +// The emulator-specific Android 4.2 kernel fails to report support for the +// 32-bit ARM IDIV instruction. Technically, this is a feature of the virtual +// CPU implemented by the emulator. +TEST(CpuinfoArmTest, EmulatorSpecificIdiv) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(Processor : ARMv7 Processor rev 0 (v7l) +BogoMIPS : 629.14 +Features : swp half thumb fastmult vfp edsp neon vfpv3 +CPU implementer : 0x41 +CPU architecture: 7 +CPU variant : 0x0 +CPU part : 0xc08 +CPU revision : 0 + +Hardware : Goldfish +Revision : 0000 +Serial : 0000000000000000)"); + const auto info = GetArmInfo(); + EXPECT_TRUE(info.features.idiva); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc new file mode 100755 index 00000000..7c5a6752 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_mips_test.cc @@ -0,0 +1,125 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_mips.h" +#include "filesystem_for_testing.h" +#include "hwcaps_for_testing.h" +#include "internal/stack_line_reader.h" +#include "internal/string_view.h" + +#include "gtest/gtest.h" + +namespace cpu_features { + +namespace { + +void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); } + +TEST(CpuinfoMipsTest, FromHardwareCapBoth) { + SetHardwareCapabilities(MIPS_HWCAP_EVA | MIPS_HWCAP_MSA, 0); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetMipsInfo(); + EXPECT_TRUE(info.features.msa); + EXPECT_TRUE(info.features.eva); +} + +TEST(CpuinfoMipsTest, FromHardwareCapOnlyOne) { + SetHardwareCapabilities(MIPS_HWCAP_MSA, 0); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetMipsInfo(); + EXPECT_TRUE(info.features.msa); + EXPECT_FALSE(info.features.eva); +} + +TEST(CpuinfoMipsTest, Ci40) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(system type : IMG Pistachio SoC (B0) +machine : IMG Marduk – Ci40 with cc2520 +processor : 0 +cpu model : MIPS interAptiv (multi) V2.0 FPU V0.0 +BogoMIPS : 363.72 +wait instruction : yes +microsecond timers : yes +tlb_entries : 64 +extra interrupt vector : yes +hardware watchpoint : yes, count: 4, address/irw mask: [0x0ffc, 0x0ffc, 0x0ffb, 0x0ffb] +isa : mips1 mips2 mips32r1 mips32r2 +ASEs implemented : mips16 dsp mt eva +shadow register sets : 1 +kscratch registers : 0 +package : 0 +core : 0 +VCED exceptions : not available +VCEI exceptions : not available +VPE : 0 +)"); + const auto info = GetMipsInfo(); + EXPECT_FALSE(info.features.msa); + EXPECT_TRUE(info.features.eva); +} + +TEST(CpuinfoMipsTest, AR7161) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(system type : Atheros AR7161 rev 2 +machine : NETGEAR WNDR3700/WNDR3800/WNDRMAC +processor : 0 +cpu model : MIPS 24Kc V7.4 +BogoMIPS : 452.19 +wait instruction : yes +microsecond timers : yes +tlb_entries : 16 +extra interrupt vector : yes +hardware watchpoint : yes, count: 4, address/irw mask: [0x0000, 0x0f98, 0x0f78, 0x0df8] +ASEs implemented : mips16 +shadow register sets : 1 +kscratch registers : 0 +core : 0 +VCED exceptions : not available +VCEI exceptions : not available +)"); + const auto info = GetMipsInfo(); + EXPECT_FALSE(info.features.msa); + EXPECT_FALSE(info.features.eva); +} + +TEST(CpuinfoMipsTest, Goldfish) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", R"(system type : MIPS-Goldfish +Hardware : goldfish +Revison : 1 +processor : 0 +cpu model : MIPS 24Kc V0.0 FPU V0.0 +BogoMIPS : 1042.02 +wait instruction : yes +microsecond timers : yes +tlb_entries : 16 +extra interrupt vector : yes +hardware watchpoint : yes, count: 1, address/irw mask: [0x0ff8] +ASEs implemented : +shadow register sets : 1 +core : 0 +VCED exceptions : not available +VCEI exceptions : not available +)"); + const auto info = GetMipsInfo(); + EXPECT_FALSE(info.features.msa); + EXPECT_FALSE(info.features.eva); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc new file mode 100755 index 00000000..5d5e7980 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_ppc_test.cc @@ -0,0 +1,119 @@ +// Copyright 2018 IBM. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpuinfo_ppc.h" +#include "filesystem_for_testing.h" +#include "hwcaps_for_testing.h" +#include "internal/string_view.h" + +#include "gtest/gtest.h" + +namespace cpu_features { +namespace { + +void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); } + +TEST(CpustringsPPCTest, FromHardwareCap) { + SetHardwareCapabilities(PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_VSX, + PPC_FEATURE2_ARCH_3_00); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetPPCInfo(); + EXPECT_TRUE(info.features.fpu); + EXPECT_FALSE(info.features.mmu); + EXPECT_TRUE(info.features.vsx); + EXPECT_TRUE(info.features.arch300); + EXPECT_FALSE(info.features.power4); + EXPECT_FALSE(info.features.altivec); + EXPECT_FALSE(info.features.vcrypto); + EXPECT_FALSE(info.features.htm); +} + +TEST(CpustringsPPCTest, Blade) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(processor : 14 +cpu : POWER7 (architected), altivec supported +clock : 3000.000000MHz +revision : 2.1 (pvr 003f 0201) + +processor : 15 +cpu : POWER7 (architected), altivec supported +clock : 3000.000000MHz +revision : 2.1 (pvr 003f 0201) + +timebase : 512000000 +platform : pSeries +model : IBM,8406-70Y +machine : CHRP IBM,8406-70Y)"); + SetPlatformTypes("power7", "power8"); + const auto strings = GetPPCPlatformStrings(); + ASSERT_STREQ(strings.platform, "pSeries"); + ASSERT_STREQ(strings.model, "IBM,8406-70Y"); + ASSERT_STREQ(strings.machine, "CHRP IBM,8406-70Y"); + ASSERT_STREQ(strings.cpu, "POWER7 (architected), altivec supported"); + ASSERT_STREQ(strings.type.platform, "power7"); + ASSERT_STREQ(strings.type.base_platform, "power8"); +} + +TEST(CpustringsPPCTest, Firestone) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(processor : 126 +cpu : POWER8 (raw), altivec supported +clock : 2061.000000MHz +revision : 2.0 (pvr 004d 0200) + +processor : 127 +cpu : POWER8 (raw), altivec supported +clock : 2061.000000MHz +revision : 2.0 (pvr 004d 0200) + +timebase : 512000000 +platform : PowerNV +model : 8335-GTA +machine : PowerNV 8335-GTA +firmware : OPAL v3)"); + const auto strings = GetPPCPlatformStrings(); + ASSERT_STREQ(strings.platform, "PowerNV"); + ASSERT_STREQ(strings.model, "8335-GTA"); + ASSERT_STREQ(strings.machine, "PowerNV 8335-GTA"); + ASSERT_STREQ(strings.cpu, "POWER8 (raw), altivec supported"); +} + +TEST(CpustringsPPCTest, w8) { + DisableHardwareCapabilities(); + auto& fs = GetEmptyFilesystem(); + fs.CreateFile("/proc/cpuinfo", + R"(processor : 143 +cpu : POWER9, altivec supported +clock : 2300.000000MHz +revision : 2.2 (pvr 004e 1202) + +timebase : 512000000 +platform : PowerNV +model : 0000000000000000 +machine : PowerNV 0000000000000000 +firmware : OPAL +MMU : Radix)"); + const auto strings = GetPPCPlatformStrings(); + ASSERT_STREQ(strings.platform, "PowerNV"); + ASSERT_STREQ(strings.model, "0000000000000000"); + ASSERT_STREQ(strings.machine, "PowerNV 0000000000000000"); + ASSERT_STREQ(strings.cpu, "POWER9, altivec supported"); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc new file mode 100755 index 00000000..f7fc0817 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/cpuinfo_x86_test.cc @@ -0,0 +1,172 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" + +#include "cpuinfo_x86.h" +#include "internal/cpuid_x86.h" + +namespace cpu_features { + +class FakeCpu { + public: + Leaf CpuId(uint32_t leaf_id) const { + const auto itr = cpuid_leaves_.find(leaf_id); + EXPECT_TRUE(itr != cpuid_leaves_.end()) << "Missing leaf " << leaf_id; + return itr->second; + } + + uint32_t GetXCR0Eax() const { return xcr0_eax_; } + + void SetLeaves(std::map configuration) { + cpuid_leaves_ = std::move(configuration); + } + + void SetOsBackupsExtendedRegisters(bool os_backups_extended_registers) { + xcr0_eax_ = os_backups_extended_registers ? -1 : 0; + } + + private: + std::map cpuid_leaves_; + uint32_t xcr0_eax_; +}; + +auto* g_fake_cpu = new FakeCpu(); + +extern "C" Leaf CpuId(uint32_t leaf_id) { return g_fake_cpu->CpuId(leaf_id); } +extern "C" uint32_t GetXCR0Eax(void) { return g_fake_cpu->GetXCR0Eax(); } + +namespace { + +TEST(CpuidX86Test, SandyBridge) { + g_fake_cpu->SetOsBackupsExtendedRegisters(true); + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {0x00000001, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}}, + {0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + }); + const auto info = GetX86Info(); + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x02A); + EXPECT_EQ(info.stepping, 0x06); + // Leaf 7 is zeroed out so none of the Leaf 7 flags are set. + const auto features = info.features; + EXPECT_FALSE(features.erms); + EXPECT_FALSE(features.avx2); + EXPECT_FALSE(features.avx512f); + EXPECT_FALSE(features.avx512cd); + EXPECT_FALSE(features.avx512er); + EXPECT_FALSE(features.avx512pf); + EXPECT_FALSE(features.avx512bw); + EXPECT_FALSE(features.avx512dq); + EXPECT_FALSE(features.avx512vl); + EXPECT_FALSE(features.avx512ifma); + EXPECT_FALSE(features.avx512vbmi); + EXPECT_FALSE(features.avx512vbmi2); + EXPECT_FALSE(features.avx512vnni); + EXPECT_FALSE(features.avx512bitalg); + EXPECT_FALSE(features.avx512vpopcntdq); + EXPECT_FALSE(features.avx512_4vnniw); + EXPECT_FALSE(features.avx512_4vbmi2); + // All old cpu features should be set. + EXPECT_TRUE(features.aes); + EXPECT_TRUE(features.ssse3); + EXPECT_TRUE(features.sse4_1); + EXPECT_TRUE(features.sse4_2); + EXPECT_TRUE(features.avx); +} + +TEST(CpuidX86Test, SandyBridgeTestOsSupport) { + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {0x00000001, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}}, + {0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + }); + // avx is disabled if os does not support backing up ymm registers. + g_fake_cpu->SetOsBackupsExtendedRegisters(false); + EXPECT_FALSE(GetX86Info().features.avx); + // avx is disabled if os does not support backing up ymm registers. + g_fake_cpu->SetOsBackupsExtendedRegisters(true); + EXPECT_TRUE(GetX86Info().features.avx); +} + +TEST(CpuidX86Test, SkyLake) { + g_fake_cpu->SetOsBackupsExtendedRegisters(true); + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {0x00000001, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}}, + {0x00000007, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}}, + }); + const auto info = GetX86Info(); + EXPECT_STREQ(info.vendor, "GenuineIntel"); + EXPECT_EQ(info.family, 0x06); + EXPECT_EQ(info.model, 0x04E); + EXPECT_EQ(info.stepping, 0x03); + EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::INTEL_SKL); +} + +TEST(CpuidX86Test, Branding) { + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {0x00000001, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}}, + {0x00000007, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}}, + {0x80000000, Leaf{0x80000008, 0x00000000, 0x00000000, 0x00000000}}, + {0x80000001, Leaf{0x00000000, 0x00000000, 0x00000121, 0x2C100000}}, + {0x80000002, Leaf{0x65746E49, 0x2952286C, 0x726F4320, 0x4D542865}}, + {0x80000003, Leaf{0x37692029, 0x3035362D, 0x43205530, 0x40205550}}, + {0x80000004, Leaf{0x352E3220, 0x7A484730, 0x00000000, 0x00000000}}, + }); + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "Intel(R) Core(TM) i7-6500U CPU @ 2.50GHz"); +} + +// http://users.atw.hu/instlatx64/AuthenticAMD0630F81_K15_Godavari_CPUID.txt +TEST(CpuidX86Test, AMD_K15) { + g_fake_cpu->SetLeaves({ + {0x00000000, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}}, + {0x00000001, Leaf{0x00630F81, 0x00040800, 0x3E98320B, 0x178BFBFF}}, + {0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}}, + {0x80000000, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}}, + {0x80000001, Leaf{0x00630F81, 0x10000000, 0x0FEBBFFF, 0x2FD3FBFF}}, + {0x80000002, Leaf{0x20444D41, 0x372D3841, 0x4B303736, 0x64615220}}, + {0x80000003, Leaf{0x206E6F65, 0x202C3752, 0x43203031, 0x75706D6F}}, + {0x80000004, Leaf{0x43206574, 0x7365726F, 0x2B433420, 0x00204736}}, + {0x80000005, Leaf{0xFF40FF18, 0xFF40FF30, 0x10040140, 0x60030140}}, + }); + const auto info = GetX86Info(); + + EXPECT_STREQ(info.vendor, "AuthenticAMD"); + EXPECT_EQ(info.family, 0x15); + EXPECT_EQ(info.model, 0x38); + EXPECT_EQ(info.stepping, 0x01); + EXPECT_EQ(GetX86Microarchitecture(&info), + X86Microarchitecture::AMD_BULLDOZER); + + char brand_string[49]; + FillX86BrandString(brand_string); + EXPECT_STREQ(brand_string, "AMD A8-7670K Radeon R7, 10 Compute Cores 4C+6G "); +} + +// TODO(user): test what happens when xsave/osxsave are not present. +// TODO(user): test what happens when xmm/ymm/zmm os support are not +// present. + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc new file mode 100755 index 00000000..4554c1f0 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.cc @@ -0,0 +1,103 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "filesystem_for_testing.h" + +#include +#include +#include +#include +#include + +namespace cpu_features { + +FakeFile::FakeFile(int file_descriptor, const char* content) + : file_descriptor_(file_descriptor), content_(content) {} + +FakeFile::~FakeFile() { assert(!opened_); } + +void FakeFile::Open() { + assert(!opened_); + opened_ = true; +} + +void FakeFile::Close() { + assert(opened_); + opened_ = false; +} + +int FakeFile::Read(int fd, void* buf, size_t count) { + assert(count < INT_MAX); + assert(fd == file_descriptor_); + const size_t remainder = content_.size() - head_index_; + const size_t read = count > remainder ? remainder : count; + memcpy(buf, content_.data() + head_index_, read); + head_index_ += read; + assert(read < INT_MAX); + return read; +} + +void FakeFilesystem::Reset() { files_.clear(); } + +FakeFile* FakeFilesystem::CreateFile(const std::string& filename, + const char* content) { + auto& file = files_[filename]; + file = + std::unique_ptr(new FakeFile(next_file_descriptor_++, content)); + return file.get(); +} + +FakeFile* FakeFilesystem::FindFileOrNull(const std::string& filename) const { + const auto itr = files_.find(filename); + return itr == files_.end() ? nullptr : itr->second.get(); +} + +FakeFile* FakeFilesystem::FindFileOrDie(const int file_descriptor) const { + for (const auto& filename_file_pair : files_) { + FakeFile* const file_ptr = filename_file_pair.second.get(); + if (file_ptr->GetFileDescriptor() == file_descriptor) { + return file_ptr; + } + } + assert(false); + return nullptr; +} + +static FakeFilesystem* kFilesystem = new FakeFilesystem(); + +FakeFilesystem& GetEmptyFilesystem() { + kFilesystem->Reset(); + return *kFilesystem; +} + +extern "C" int CpuFeatures_OpenFile(const char* filename) { + auto* const file = kFilesystem->FindFileOrNull(filename); + if (file) { + file->Open(); + return file->GetFileDescriptor(); + } + return -1; +} + +extern "C" void CpuFeatures_CloseFile(int file_descriptor) { + kFilesystem->FindFileOrDie(file_descriptor)->Close(); +} + +extern "C" int CpuFeatures_ReadFile(int file_descriptor, void* buffer, + size_t buffer_size) { + return kFilesystem->FindFileOrDie(file_descriptor) + ->Read(file_descriptor, buffer, buffer_size); +} + +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h new file mode 100755 index 00000000..ca269e52 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/filesystem_for_testing.h @@ -0,0 +1,61 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements a fake filesystem, useful for tests. +#ifndef CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_ +#define CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_ + +#include +#include +#include + +#include "internal/filesystem.h" + +namespace cpu_features { + +class FakeFile { + public: + explicit FakeFile(int file_descriptor, const char* content); + ~FakeFile(); + + void Open(); + void Close(); + int Read(int fd, void* buf, size_t count); + + int GetFileDescriptor() const { return file_descriptor_; } + + private: + const int file_descriptor_; + const std::string content_; + bool opened_ = false; + size_t head_index_ = 0; +}; + +class FakeFilesystem { + public: + void Reset(); + FakeFile* CreateFile(const std::string& filename, const char* content); + FakeFile* FindFileOrDie(const int file_descriptor) const; + FakeFile* FindFileOrNull(const std::string& filename) const; + + private: + size_t next_file_descriptor_ = 0; + std::unordered_map> files_; +}; + +FakeFilesystem& GetEmptyFilesystem(); + +} // namespace cpu_features + +#endif // CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc new file mode 100755 index 00000000..07f68e8a --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.cc @@ -0,0 +1,45 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "hwcaps_for_testing.h" +#include "internal/string_view.h" + +namespace cpu_features { + +namespace { +static auto* const g_hardware_capabilities = new HardwareCapabilities(); +static auto* const g_platform_types = new PlatformType(); +} // namespace + +void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2) { + g_hardware_capabilities->hwcaps = hwcaps; + g_hardware_capabilities->hwcaps2 = hwcaps2; +} + +HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) { + return *g_hardware_capabilities; +} + +void SetPlatformTypes(const char* platform, const char* base_platform) { + CpuFeatures_StringView_CopyString(str(platform), g_platform_types->platform, + sizeof(g_platform_types->platform)); + CpuFeatures_StringView_CopyString(str(base_platform), + g_platform_types->base_platform, + sizeof(g_platform_types->base_platform)); +} + +PlatformType CpuFeatures_GetPlatformType(void) { return *g_platform_types; } +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h new file mode 100755 index 00000000..0d037772 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/hwcaps_for_testing.h @@ -0,0 +1,27 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_ +#define CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_ + +#include "internal/hwcaps.h" + +namespace cpu_features { + +void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2); +void SetPlatformTypes(const char *platform, const char *base_platform); + +} // namespace cpu_features + +#endif // CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_ diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc new file mode 100755 index 00000000..99367dc4 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/linux_features_aggregator_test.cc @@ -0,0 +1,95 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "internal/linux_features_aggregator.h" + +#include "gtest/gtest.h" + +namespace cpu_features { + +namespace { + +struct Features { + bool a = false; + bool b = false; + bool c = false; +}; + +DECLARE_SETTER(Features, a) +DECLARE_SETTER(Features, b) +DECLARE_SETTER(Features, c) + +class LinuxFeatureAggregatorTest : public testing::Test { + public: + const std::array kConfigs = { + {{{0b0001, 0b0000}, "a", &set_a}, + {{0b0010, 0b0000}, "b", &set_b}, + {{0b0000, 0b1100}, "c", &set_c}}}; +}; + +TEST_F(LinuxFeatureAggregatorTest, FromFlagsEmpty) { + Features features; + CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str(""), + &features); + EXPECT_FALSE(features.a); + EXPECT_FALSE(features.b); + EXPECT_FALSE(features.c); +} + +TEST_F(LinuxFeatureAggregatorTest, FromFlagsAllSet) { + Features features; + CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str("a c b"), + &features); + EXPECT_TRUE(features.a); + EXPECT_TRUE(features.b); + EXPECT_TRUE(features.c); +} + +TEST_F(LinuxFeatureAggregatorTest, FromFlagsOnlyA) { + Features features; + CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str("a"), + &features); + EXPECT_TRUE(features.a); + EXPECT_FALSE(features.b); + EXPECT_FALSE(features.c); +} + +TEST_F(LinuxFeatureAggregatorTest, FromHwcapsNone) { + HardwareCapabilities capability; + capability.hwcaps = 0; // matches none + capability.hwcaps2 = 0; // matches none + Features features; + CpuFeatures_OverrideFromHwCaps(kConfigs.size(), kConfigs.data(), capability, + &features); + EXPECT_FALSE(features.a); + EXPECT_FALSE(features.b); + EXPECT_FALSE(features.c); +} + +TEST_F(LinuxFeatureAggregatorTest, FromHwcapsSet) { + HardwareCapabilities capability; + capability.hwcaps = 0b0010; // matches b but not a + capability.hwcaps2 = 0b1111; // matches c + Features features; + CpuFeatures_OverrideFromHwCaps(kConfigs.size(), kConfigs.data(), capability, + &features); + EXPECT_FALSE(features.a); + EXPECT_TRUE(features.b); + EXPECT_TRUE(features.c); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc new file mode 100755 index 00000000..c8f96910 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/stack_line_reader_test.cc @@ -0,0 +1,132 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/stack_line_reader.h" +#include "filesystem_for_testing.h" + +#include "gtest/gtest.h" + +namespace cpu_features { + +bool operator==(const StringView& a, const StringView& b) { + return CpuFeatures_StringView_IsEquals(a, b); +} + +namespace { + +std::string ToString(StringView view) { return {view.ptr, view.size}; } + +TEST(StackLineReaderTest, Empty) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", ""); + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("")); + } +} + +TEST(StackLineReaderTest, ManySmallLines) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", "a\nb\nc"); + + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("a")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("b")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("c")); + } +} + +TEST(StackLineReaderTest, TruncatedLine) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", R"(First +Second +More than 16 characters, this will be truncated. +last)"); + + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("First")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("Second")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_FALSE(result.full_line); + EXPECT_EQ(result.line, str("More than 16 cha")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("last")); + } +} + +TEST(StackLineReaderTest, TruncatedLines) { + auto& fs = GetEmptyFilesystem(); + auto* file = fs.CreateFile("/proc/cpuinfo", R"(More than 16 characters +Another line that is too long)"); + + StackLineReader reader; + StackLineReader_Initialize(&reader, file->GetFileDescriptor()); + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_FALSE(result.full_line); + EXPECT_EQ(result.line, str("More than 16 cha")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_FALSE(result.eof); + EXPECT_FALSE(result.full_line); + EXPECT_EQ(result.line, str("Another line tha")); + } + { + const auto result = StackLineReader_NextLine(&reader); + EXPECT_TRUE(result.eof); + EXPECT_TRUE(result.full_line); + EXPECT_EQ(result.line, str("")); + } +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc new file mode 100755 index 00000000..abfcc2cd --- /dev/null +++ b/src/crypto/argon2_hasher/hash/cpu/cpu_features/test/string_view_test.cc @@ -0,0 +1,144 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "internal/string_view.h" + +#include "gtest/gtest.h" + +namespace cpu_features { + +bool operator==(const StringView& a, const StringView& b) { + return CpuFeatures_StringView_IsEquals(a, b); +} + +namespace { + +TEST(StringViewTest, Empty) { + EXPECT_EQ(kEmptyStringView.ptr, nullptr); + EXPECT_EQ(kEmptyStringView.size, 0); +} + +TEST(StringViewTest, Build) { + const auto view = str("test"); + EXPECT_EQ(view.ptr[0], 't'); + EXPECT_EQ(view.size, 4); +} + +TEST(StringViewTest, CpuFeatures_StringView_IndexOfChar) { + // Found. + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'e'), 1); + // Not found. + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'z'), -1); + // Empty. + EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(kEmptyStringView, 'z'), -1); +} + +TEST(StringViewTest, CpuFeatures_StringView_IndexOf) { + // Found. + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("es")), 1); + // Not found. + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("aa")), -1); + // Empty. + EXPECT_EQ(CpuFeatures_StringView_IndexOf(kEmptyStringView, str("aa")), -1); + EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("aa"), kEmptyStringView), -1); +} + +TEST(StringViewTest, CpuFeatures_StringView_StartsWith) { + EXPECT_TRUE(CpuFeatures_StringView_StartsWith(str("test"), str("te"))); + EXPECT_FALSE(CpuFeatures_StringView_StartsWith(str("test"), str(""))); + EXPECT_FALSE( + CpuFeatures_StringView_StartsWith(str("test"), kEmptyStringView)); + EXPECT_FALSE( + CpuFeatures_StringView_StartsWith(kEmptyStringView, str("test"))); +} + +TEST(StringViewTest, CpuFeatures_StringView_IsEquals) { + EXPECT_TRUE( + CpuFeatures_StringView_IsEquals(kEmptyStringView, kEmptyStringView)); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str(""))); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str(""), kEmptyStringView)); + EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str("a"), str("a"))); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), str("b"))); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), kEmptyStringView)); + EXPECT_FALSE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str("a"))); +} + +TEST(StringViewTest, CpuFeatures_StringView_PopFront) { + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 2), str("st")); + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 0), str("test")); + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 4), str("")); + EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 100), str("")); +} + +TEST(StringViewTest, CpuFeatures_StringView_ParsePositiveNumber) { + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("42")), 42); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2a")), 42); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2A")), 42); + + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("-0x2A")), -1); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("abc")), -1); + EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("")), -1); +} + +TEST(StringViewTest, CpuFeatures_StringView_CopyString) { + char buf[4]; + buf[0] = 'X'; + + // Empty + CpuFeatures_StringView_CopyString(str(""), buf, sizeof(buf)); + EXPECT_STREQ(buf, ""); + + // Less + CpuFeatures_StringView_CopyString(str("a"), buf, sizeof(buf)); + EXPECT_STREQ(buf, "a"); + + // exact + CpuFeatures_StringView_CopyString(str("abc"), buf, sizeof(buf)); + EXPECT_STREQ(buf, "abc"); + + // More + CpuFeatures_StringView_CopyString(str("abcd"), buf, sizeof(buf)); + EXPECT_STREQ(buf, "abc"); +} + +TEST(StringViewTest, CpuFeatures_StringView_HasWord) { + // Find flags at beginning, middle and end. + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first middle last"), "first")); + EXPECT_TRUE( + CpuFeatures_StringView_HasWord(str("first middle last"), "middle")); + EXPECT_TRUE(CpuFeatures_StringView_HasWord(str("first middle last"), "last")); + // Do not match partial flags + EXPECT_FALSE( + CpuFeatures_StringView_HasWord(str("first middle last"), "irst")); + EXPECT_FALSE(CpuFeatures_StringView_HasWord(str("first middle last"), "mid")); + EXPECT_FALSE(CpuFeatures_StringView_HasWord(str("first middle last"), "las")); +} + +TEST(StringViewTest, CpuFeatures_StringView_GetAttributeKeyValue) { + const StringView line = str(" key : first middle last "); + StringView key, value; + EXPECT_TRUE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)); + EXPECT_EQ(key, str("key")); + EXPECT_EQ(value, str("first middle last")); +} + +TEST(StringViewTest, FailingGetAttributeKeyValue) { + const StringView line = str("key first middle last"); + StringView key, value; + EXPECT_FALSE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)); +} + +} // namespace +} // namespace cpu_features diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu b/src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu new file mode 100644 index 00000000..db94e488 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/blake2b.cu @@ -0,0 +1,353 @@ +#define BLOCK_BYTES 32 +#define OUT_BYTES 16 +#define BLAKE_SHARED_MEM 480 +#define BLAKE_SHARED_MEM_UINT 120 + +#define G(m, r, i, a, b, c, d) \ +do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ +} while ((void)0, 0) + +#define G_S(m, a, b, c, d) \ +do { \ + a = a + b + m; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ +} while ((void)0, 0) + +#define ROUND(m, t, r) \ +do { \ + G(m, r, t, v0, v1, v2, v3); \ + v1 = __shfl_sync(0xFFFFFFFF, v1, t + 1, 4); \ + v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \ + v3 = __shfl_sync(0xFFFFFFFF, v3, t + 3, 4); \ + G(m, r, (t + 4), v0, v1, v2, v3); \ + v1 = __shfl_sync(0xFFFFFFFF, v1, t + 3, 4); \ + v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \ + v3 = __shfl_sync(0xFFFFFFFF, v3, t + 1, 4); \ +} while ((void)0, 0) + +#define ROUND_S(m, t) \ +do { \ + G_S(m, v0, v1, v2, v3); \ + v1 = __shfl_sync(0xFFFFFFFF, v1, t + 1, 4); \ + v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \ + v3 = __shfl_sync(0xFFFFFFFF, v3, t + 3, 4); \ + G_S(m, v0, v1, v2, v3); \ + v1 = __shfl_sync(0xFFFFFFFF, v1, t + 3, 4); \ + v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \ + v3 = __shfl_sync(0xFFFFFFFF, v3, t + 1, 4); \ +} while ((void)0, 0) + +__constant__ uint64_t blake2b_IV[8] = { + 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, + 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, + 0x510E527FADE682D1, 0x9B05688C2B3E6C1F, + 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179 +}; + +__constant__ uint32_t blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +__device__ uint64_t rotr64(uint64_t x, uint32_t n) +{ + return (x >> n) | (x << (64 - n)); +} + +__device__ __forceinline__ void blake2b_compress(uint64_t *h, uint64_t *m, uint64_t f0, int thr_id) +{ + uint64_t v0, v1, v2, v3; + + v0 = h[thr_id]; + v1 = h[thr_id + 4]; + v2 = blake2b_IV[thr_id]; + v3 = blake2b_IV[thr_id + 4]; + + if(thr_id == 0) v3 ^= h[8]; + if(thr_id == 1) v3 ^= h[9]; + if(thr_id == 2) v3 ^= f0; + + ROUND(m, thr_id, 0); + ROUND(m, thr_id, 1); + ROUND(m, thr_id, 2); + ROUND(m, thr_id, 3); + ROUND(m, thr_id, 4); + ROUND(m, thr_id, 5); + ROUND(m, thr_id, 6); + ROUND(m, thr_id, 7); + ROUND(m, thr_id, 8); + ROUND(m, thr_id, 9); + ROUND(m, thr_id, 10); + ROUND(m, thr_id, 11); + + h[thr_id] ^= v0 ^ v2; + h[thr_id + 4] ^= v1 ^ v3; +} + +__device__ __forceinline__ void blake2b_compress_static(uint64_t *h, uint64_t m, uint64_t f0, int thr_id) +{ + uint64_t v0, v1, v2, v3; + + v0 = h[thr_id]; + v1 = h[thr_id + 4]; + v2 = blake2b_IV[thr_id]; + v3 = blake2b_IV[thr_id + 4]; + + if(thr_id == 0) v3 ^= h[8]; + if(thr_id == 1) v3 ^= h[9]; + if(thr_id == 2) v3 ^= f0; + + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + ROUND_S(m, thr_id); + + h[thr_id] ^= v0 ^ v2; + h[thr_id + 4] ^= v1 ^ v3; +} + +__device__ __forceinline__ int blake2b_init(uint64_t *h, int out_len, int thr_id) +{ + h[thr_id * 2] = blake2b_IV[thr_id * 2]; + h[thr_id * 2 + 1] = blake2b_IV[thr_id * 2 + 1]; + + if(thr_id == 0) { + h[8] = h[9] = 0; + h[0] = 0x6A09E667F3BCC908 ^ ((out_len * 4) | (1 << 16) | (1 << 24)); + } + + return 0; +} + +__device__ __forceinline__ void blake2b_incrementCounter(uint64_t *h, int inc) +{ + h[8] += (inc * 4); + h[9] += (h[8] < (inc * 4)); +} + +__device__ __forceinline__ int blake2b_update(uint32_t *in, int in_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id) +{ + uint32_t *cursor_in = in; + uint32_t *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (uint64_t*)buf, 0, thr_id); + + buf_len = 0; + + in_len -= left; + in += left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + cursor_in = in; + cursor_out = buf; + + for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + blake2b_compress(h, (uint64_t *)buf, 0, thr_id); + + in_len -= BLOCK_BYTES; + in += BLOCK_BYTES; + } + } + + cursor_in = in; + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } + + return buf_len + in_len; +} + +__device__ __forceinline__ int blake2b_update_static(uint32_t in, int in_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id) +{ + uint64_t in64 = in; + in64 = in64 << 32; + in64 = in64 | in; + + uint32_t *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = in; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = in; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (uint64_t*)buf, 0, thr_id); + + buf_len = 0; + + in_len -= left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + blake2b_compress_static(h, in64, 0, thr_id); + + in_len -= BLOCK_BYTES; + } + } + + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = in; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = in; + } + } + + return buf_len + in_len; +} + +__device__ __forceinline__ void blake2b_final(uint32_t *out, int out_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id) +{ + int left = BLOCK_BYTES - buf_len; + uint32_t *cursor_out = buf + buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = 0; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = 0; + } + blake2b_incrementCounter(h, buf_len); + } + + blake2b_compress(h, (uint64_t*)buf, 0xFFFFFFFFFFFFFFFF, thr_id); + + uint32_t *cursor_in = (uint32_t *)h; + cursor_out = out; + + for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (out_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } +} + +__device__ void blake2b_digestLong(uint32_t *out, int out_len, uint32_t *in, int in_len, int thr_id, uint32_t *shared) +{ + uint64_t *h = (uint64_t*)shared; + uint32_t *buf = (uint32_t*)&h[10]; + uint32_t *out_buffer = &buf[32]; + int buf_len; + + if(thr_id == 0) buf[0] = (out_len * 4); + buf_len = 1; + + if (out_len <= OUT_BYTES) { + blake2b_init(h, out_len, thr_id); + buf_len = blake2b_update(in, in_len, h, buf, buf_len, thr_id); + blake2b_final(out, out_len, h, buf, buf_len, thr_id); + } else { + uint32_t *cursor_in = out_buffer; + uint32_t *cursor_out = out; + + blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update(in, in_len, h, buf, buf_len, thr_id); + blake2b_final(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id); + + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + + int to_produce = out_len - OUT_BYTES / 2; + while (to_produce > OUT_BYTES) { + buf_len = blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id); + blake2b_final(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id); + + cursor_out = out; + cursor_in = out_buffer; + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + to_produce -= OUT_BYTES / 2; + } + + buf_len = blake2b_init(h, to_produce, thr_id); + buf_len = blake2b_update(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id); + blake2b_final(out, to_produce, h, buf, buf_len, thr_id); + } +} \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp new file mode 100644 index 00000000..2046a321 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.cpp @@ -0,0 +1,340 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#include + +#include "../../../common/common.h" + +#include "crypto/argon2_hasher/hash/Hasher.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" + +#if defined(WITH_CUDA) + +#include +#include + +#include "cuda_hasher.h" +#include "../../../common/DLLExport.h" + +cuda_hasher::cuda_hasher() { + m_type = "GPU"; + m_subType = "CUDA"; + m_shortSubType = "NVD"; + m_intensity = 0; + m_description = ""; + m_computingThreads = 0; +} + + +cuda_hasher::~cuda_hasher() { + this->cleanup(); +} + +bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { + cudaError_t error = cudaSuccess; + string error_message; + + m_profile = getArgon2Profile(algorithm, variant); + + __devices = __query_cuda_devices(error, error_message); + + if(error != cudaSuccess) { + m_description = "No compatible GPU detected: " + error_message; + return false; + } + + if (__devices.empty()) { + m_description = "No compatible GPU detected."; + return false; + } + + return true; +} + +vector cuda_hasher::__query_cuda_devices(cudaError_t &error, string &error_message) { + vector devices; + int devCount = 0; + error = cudaGetDeviceCount(&devCount); + + if(error != cudaSuccess) { + error_message = "Error querying CUDA device count."; + return devices; + } + + if(devCount == 0) + return devices; + + for (int i = 0; i < devCount; ++i) + { + cuda_device_info *dev = __get_device_info(i); + if(dev == NULL) + continue; + if(dev->error != cudaSuccess) { + error = dev->error; + error_message = dev->error_message; + continue; + } + devices.push_back(dev); + } + return devices; +} + +cuda_device_info *cuda_hasher::__get_device_info(int device_index) { + cuda_device_info *device_info = new cuda_device_info(); + device_info->error = cudaSuccess; + device_info->cuda_index = device_index; + + device_info->error = cudaSetDevice(device_index); + if(device_info->error != cudaSuccess) { + device_info->error_message = "Error setting current device."; + return device_info; + } + + cudaDeviceProp devProp; + device_info->error = cudaGetDeviceProperties(&devProp, device_index); + if(device_info->error != cudaSuccess) { + device_info->error_message = "Error setting current device."; + return device_info; + } + + device_info->device_string = devProp.name; + + size_t freemem, totalmem; + device_info->error = cudaMemGetInfo(&freemem, &totalmem); + if(device_info->error != cudaSuccess) { + device_info->error_message = "Error setting current device."; + return device_info; + } + + device_info->free_mem_size = freemem; + device_info->max_allocable_mem_size = freemem / 4; + + double mem_in_gb = totalmem / 1073741824.0; + stringstream ss; + ss << setprecision(2) << mem_in_gb; + device_info->device_string += (" (" + ss.str() + "GB)"); + + return device_info; +} + +bool cuda_hasher::configure(xmrig::HasherConfig &config) { + int index = config.getGPUCardsCount(); + double intensity = 0; + + int total_threads = 0; + intensity = config.getAverageGPUIntensity(); + + if (intensity == 0) { + m_intensity = 0; + m_description = "Status: DISABLED - by user."; + return false; + } + + bool cards_selected = false; + intensity = 0; + + for(vector::iterator d = __devices.begin(); d != __devices.end(); d++, index++) { + stringstream ss; + ss << "["<< (index + 1) << "] " << (*d)->device_string; + string device_description = ss.str(); + (*d)->device_index = index; + (*d)->profile_info.profile = m_profile; + + if(config.gpuFilter().size() > 0) { + bool found = false; + for(xmrig::GPUFilter fit : config.gpuFilter()) { + if(device_description.find(fit.filter) != string::npos) { + found = true; + break; + } + } + if(!found) { + (*d)->profile_info.threads = 0; + ss << " - DISABLED" << endl; + m_description += ss.str(); + continue; + } + else { + cards_selected = true; + } + } + else { + cards_selected = true; + } + + ss << endl; + + double device_intensity = config.getGPUIntensity((*d)->device_index); + + m_description += ss.str(); + + if(!(__setup_device_info((*d), device_intensity))) { + m_description += (*d)->error_message; + m_description += "\n"; + continue; + }; + + DeviceInfo device; + + char bus_id[100]; + if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cuda_index) == cudaSuccess) { + device.bus_id = bus_id; + int domain_separator = device.bus_id.find(":"); + if(domain_separator != string::npos) { + device.bus_id.erase(0, domain_separator + 1); + } + } + + device.name = (*d)->device_string; + device.intensity = device_intensity; + storeDeviceInfo((*d)->device_index, device); + + __enabledDevices.push_back(*d); + + total_threads += (*d)->profile_info.threads; + intensity += device_intensity; + } + + config.addGPUCardsCount(index - config.getGPUCardsCount()); + + if(!cards_selected) { + m_intensity = 0; + m_description += "Status: DISABLED - no card enabled because of filtering."; + return false; + } + + if (total_threads == 0) { + m_intensity = 0; + m_description += "Status: DISABLED - not enough resources."; + return false; + } + + if(!buildThreadData()) + return false; + + m_intensity = intensity / __enabledDevices.size(); + m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device + m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads."; + + return true; +} + +void cuda_hasher::cleanup() { + for(vector::iterator d = __devices.begin(); d != __devices.end(); d++) { + cuda_free(*d); + } +} + +bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity) { + device->profile_info.threads_per_chunk = (uint32_t)(device->max_allocable_mem_size / device->profile_info.profile->memSize); + size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize; + + if(chunk_size == 0) { + device->error = cudaErrorInitializationError; + device->error_message = "Not enough memory on GPU."; + return false; + } + + uint64_t usable_memory = device->free_mem_size; + double chunks = (double)usable_memory / (double)chunk_size; + + uint32_t max_threads = (uint32_t)(device->profile_info.threads_per_chunk * chunks); + + if(max_threads == 0) { + device->error = cudaErrorInitializationError; + device->error_message = "Not enough memory on GPU."; + return false; + } + + device->profile_info.threads = (uint32_t)(max_threads * intensity / 100.0); + device->profile_info.threads = (device->profile_info.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution + if(max_threads > 0 && device->profile_info.threads == 0 && intensity > 0) + device->profile_info.threads = 2; + + chunks = (double)device->profile_info.threads / (double)device->profile_info.threads_per_chunk; + + cuda_allocate(device, chunks, chunk_size); + + if(device->error != cudaSuccess) + return false; + + return true; +} + +bool cuda_hasher::buildThreadData() { + __thread_data = new cuda_gpumgmt_thread_data[__enabledDevices.size() * 2]; + + for(int i=0; i < __enabledDevices.size(); i++) { + cuda_device_info *device = __enabledDevices[i]; + for(int threadId = 0; threadId < 2; threadId ++) { + cuda_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId]; + thread_data.device = device; + thread_data.thread_id = threadId; + + cudaStream_t stream; + device->error = cudaStreamCreate(&stream); + if(device->error != cudaSuccess) { + LOG("Error running kernel: (" + to_string(device->error) + ") cannot create cuda stream."); + return false; + } + + thread_data.device_data = stream; + + #ifdef PARALLEL_CUDA + if(threadId == 0) { + thread_data.threads_idx = 0; + thread_data.threads = device->profile_info.threads / 2; + } + else { + thread_data.threads_idx = device->profile_info.threads / 2; + thread_data.threads = device->profile_info.threads - thread_data.threads_idx; + } + #else + thread_data.threads_idx = 0; + thread_data.threads = device->profile_info.threads; + #endif + + thread_data.argon2 = new Argon2(cuda_kernel_prehasher, cuda_kernel_filler, cuda_kernel_posthasher, + nullptr, &thread_data); + thread_data.argon2->setThreads(thread_data.threads); + thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4; + } + } + + return true; +} + +int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { + cuda_gpumgmt_thread_data &threadData = __thread_data[threadIdx]; + + cudaSetDevice(threadData.device->cuda_index); + + threadData.hashData.input = input; + threadData.hashData.inSize = size; + threadData.hashData.output = output; + int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData); + if(threadData.device->error != cudaSuccess) { + LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message); + return 0; + } + + uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39)); + (*nonce) += threadData.threads; + + return hashCount; + +} + +size_t cuda_hasher::parallelism(int workerIdx) { + cuda_gpumgmt_thread_data &threadData = __thread_data[workerIdx]; + return threadData.threads; +} + +size_t cuda_hasher::deviceCount() { + return __enabledDevices.size(); +} + +REGISTER_HASHER(cuda_hasher); + +#endif //WITH_CUDA diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h new file mode 100644 index 00000000..2e668b8e --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_hasher.h @@ -0,0 +1,126 @@ +// +// Created by Haifa Bogdan Adnan on 18/09/2018. +// + +#ifndef ARGON2_CUDA_HASHER_H +#define ARGON2_CUDA_HASHER_H + +#if defined(WITH_CUDA) + +struct cuda_kernel_arguments { + void *memory_chunk_0; + void *memory_chunk_1; + void *memory_chunk_2; + void *memory_chunk_3; + void *memory_chunk_4; + void *memory_chunk_5; + + uint32_t *refs; + uint32_t *idxs; + uint32_t *segments; + + uint32_t *preseed_memory[2]; + uint32_t *seed_memory[2]; + uint32_t *out_memory[2]; + uint32_t *hash_memory[2]; + + uint32_t *host_seed_memory[2]; +}; + +struct argon2profile_info { + argon2profile_info() { + threads = 0; + threads_per_chunk = 0; + } + uint32_t threads; + uint32_t threads_per_chunk; + Argon2Profile *profile; +}; + +struct cuda_device_info { + cuda_device_info() { + device_index = 0; + device_string = ""; + free_mem_size = 0; + max_allocable_mem_size = 0; + + error = cudaSuccess; + error_message = ""; + } + + int device_index; + int cuda_index; + + string device_string; + uint64_t free_mem_size; + uint64_t max_allocable_mem_size; + + argon2profile_info profile_info; + cuda_kernel_arguments arguments; + + mutex device_lock; + + cudaError_t error; + string error_message; +}; + +struct cuda_gpumgmt_thread_data { + void lock() { +#ifndef PARALLEL_CUDA + device->device_lock.lock(); +#endif + } + + void unlock() { +#ifndef PARALLEL_CUDA + device->device_lock.unlock(); +#endif + } + + int thread_id; + cuda_device_info *device; + Argon2 *argon2; + HashData hashData; + + void *device_data; + + int threads; + int threads_idx; +}; + +class cuda_hasher : public Hasher { +public: + cuda_hasher(); + ~cuda_hasher(); + + virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); + virtual bool configure(xmrig::HasherConfig &config); + virtual void cleanup(); + virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output); + virtual size_t parallelism(int workerIdx); + virtual size_t deviceCount(); + +private: + cuda_device_info *__get_device_info(int device_index); + bool __setup_device_info(cuda_device_info *device, double intensity); + vector __query_cuda_devices(cudaError_t &error, string &error_message); + bool buildThreadData(); + + vector __devices; + vector __enabledDevices; + cuda_gpumgmt_thread_data *__thread_data; + + Argon2Profile *m_profile; +}; + +// CUDA kernel exports +extern void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size); +extern void cuda_free(cuda_device_info *device); +extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data); +extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data); +extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data); +// end CUDA kernel exports + +#endif //WITH_CUDA + +#endif //ARGON2_CUDA_HASHER_H \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu new file mode 100644 index 00000000..eea358f2 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/cuda/cuda_kernel.cu @@ -0,0 +1,1132 @@ +#include + +#include + +#include "../../../common/common.h" + +#include "crypto/argon2_hasher/hash/Hasher.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" + +#include "cuda_hasher.h" + +#define THREADS_PER_LANE 32 +#define BLOCK_SIZE_UINT4 64 +#define BLOCK_SIZE_UINT 256 +#define KERNEL_WORKGROUP_SIZE 32 +#define ARGON2_PREHASH_DIGEST_LENGTH_UINT 16 +#define ARGON2_PREHASH_SEED_LENGTH_UINT 18 + + +#include "blake2b.cu" + +#define COMPUTE \ + asm ("{" \ + ".reg .u32 s1, s2, s3, s4;\n\t" \ + "mul.lo.u32 s3, %0, %2;\n\t" \ + "mul.hi.u32 s4, %0, %2;\n\t" \ + "add.cc.u32 s3, s3, s3;\n\t" \ + "addc.u32 s4, s4, s4;\n\t" \ + "add.cc.u32 s1, %0, %2;\n\t" \ + "addc.u32 s2, %1, %3;\n\t" \ + "add.cc.u32 %0, s1, s3;\n\t" \ + "addc.u32 %1, s2, s4;\n\t" \ + "xor.b32 s1, %0, %6;\n\t" \ + "xor.b32 %6, %1, %7;\n\t" \ + "mov.b32 %7, s1;\n\t" \ + "mul.lo.u32 s3, %4, %6;\n\t" \ + "mul.hi.u32 s4, %4, %6;\n\t" \ + "add.cc.u32 s3, s3, s3;\n\t" \ + "addc.u32 s4, s4, s4;\n\t" \ + "add.cc.u32 s1, %4, %6;\n\t" \ + "addc.u32 s2, %5, %7;\n\t" \ + "add.cc.u32 %4, s1, s3;\n\t" \ + "addc.u32 %5, s2, s4;\n\t" \ + "xor.b32 s3, %2, %4;\n\t" \ + "xor.b32 s4, %3, %5;\n\t" \ + "shf.r.wrap.b32 %3, s4, s3, 24;\n\t" \ + "shf.r.wrap.b32 %2, s3, s4, 24;\n\t" \ + "mul.lo.u32 s3, %0, %2;\n\t" \ + "mul.hi.u32 s4, %0, %2;\n\t" \ + "add.cc.u32 s3, s3, s3;\n\t" \ + "addc.u32 s4, s4, s4;\n\t" \ + "add.cc.u32 s1, %0, %2;\n\t" \ + "addc.u32 s2, %1, %3;\n\t" \ + "add.cc.u32 %0, s1, s3;\n\t" \ + "addc.u32 %1, s2, s4;\n\t" \ + "xor.b32 s3, %0, %6;\n\t" \ + "xor.b32 s4, %1, %7;\n\t" \ + "shf.r.wrap.b32 %7, s4, s3, 16;\n\t" \ + "shf.r.wrap.b32 %6, s3, s4, 16;\n\t" \ + "mul.lo.u32 s3, %4, %6;\n\t" \ + "mul.hi.u32 s4, %4, %6;\n\t" \ + "add.cc.u32 s3, s3, s3;\n\t" \ + "addc.u32 s4, s4, s4;\n\t" \ + "add.cc.u32 s1, %4, %6;\n\t" \ + "addc.u32 s2, %5, %7;\n\t" \ + "add.cc.u32 %4, s1, s3;\n\t" \ + "addc.u32 %5, s2, s4;\n\t" \ + "xor.b32 s3, %2, %4;\n\t" \ + "xor.b32 s4, %3, %5;\n\t" \ + "shf.r.wrap.b32 %3, s3, s4, 31;\n\t" \ + "shf.r.wrap.b32 %2, s4, s3, 31;\n\t" \ + "}" : "+r"(tmp_a.x), "+r"(tmp_a.y), "+r"(tmp_a.z), "+r"(tmp_a.w), "+r"(tmp_b.x), "+r"(tmp_b.y), "+r"(tmp_b.z), "+r"(tmp_b.w)); + +#define G1(data) \ +{ \ + COMPUTE \ + tmp_a.z = __shfl_sync(0xffffffff, tmp_a.z, i_shfl1_1); \ + tmp_a.w = __shfl_sync(0xffffffff, tmp_a.w, i_shfl1_1); \ + tmp_b.x = __shfl_sync(0xffffffff, tmp_b.x, i_shfl1_2); \ + tmp_b.y = __shfl_sync(0xffffffff, tmp_b.y, i_shfl1_2); \ + tmp_b.z = __shfl_sync(0xffffffff, tmp_b.z, i_shfl1_3); \ + tmp_b.w = __shfl_sync(0xffffffff, tmp_b.w, i_shfl1_3); \ +} + +#define G2(data) \ +{ \ + COMPUTE \ + data[i2_0_0] = tmp_a.x; \ + data[i2_0_1] = tmp_a.y; \ + data[i2_1_0] = tmp_a.z; \ + data[i2_1_1] = tmp_a.w; \ + data[i2_2_0] = tmp_b.x; \ + data[i2_2_1] = tmp_b.y; \ + data[i2_3_0] = tmp_b.z; \ + data[i2_3_1] = tmp_b.w; \ + __syncwarp(); \ +} + +#define G3(data) \ +{ \ + tmp_a.x = data[i3_0_0]; \ + tmp_a.y = data[i3_0_1]; \ + tmp_a.z = data[i3_1_0]; \ + tmp_a.w = data[i3_1_1]; \ + tmp_b.x = data[i3_2_0]; \ + tmp_b.y = data[i3_2_1]; \ + tmp_b.z = data[i3_3_0]; \ + tmp_b.w = data[i3_3_1]; \ + COMPUTE \ + tmp_a.z = __shfl_sync(0xffffffff, tmp_a.z, i_shfl2_1); \ + tmp_a.w = __shfl_sync(0xffffffff, tmp_a.w, i_shfl2_1); \ + tmp_b.x = __shfl_sync(0xffffffff, tmp_b.x, i_shfl2_2); \ + tmp_b.y = __shfl_sync(0xffffffff, tmp_b.y, i_shfl2_2); \ + tmp_b.z = __shfl_sync(0xffffffff, tmp_b.z, i_shfl2_3); \ + tmp_b.w = __shfl_sync(0xffffffff, tmp_b.w, i_shfl2_3); \ +} + +#define G4(data) \ +{ \ + COMPUTE \ + data[i4_0_0] = tmp_a.x; \ + data[i4_0_1] = tmp_a.y; \ + data[i4_1_0] = tmp_a.z; \ + data[i4_1_1] = tmp_a.w; \ + data[i4_2_0] = tmp_b.x; \ + data[i4_2_1] = tmp_b.y; \ + data[i4_3_0] = tmp_b.z; \ + data[i4_3_1] = tmp_b.w; \ + __syncwarp(); \ + tmp_a.x = data[i1_0_0]; \ + tmp_a.y = data[i1_0_1]; \ + tmp_a.z = data[i1_1_0]; \ + tmp_a.w = data[i1_1_1]; \ + tmp_b.x = data[i1_2_0]; \ + tmp_b.y = data[i1_2_1]; \ + tmp_b.z = data[i1_3_0]; \ + tmp_b.w = data[i1_3_1]; \ +} + +__constant__ int offsets[768] = { + 0, 4, 8, 12, + 1, 5, 9, 13, + 2, 6, 10, 14, + 3, 7, 11, 15, + 16, 20, 24, 28, + 17, 21, 25, 29, + 18, 22, 26, 30, + 19, 23, 27, 31, + 32, 36, 40, 44, + 33, 37, 41, 45, + 34, 38, 42, 46, + 35, 39, 43, 47, + 48, 52, 56, 60, + 49, 53, 57, 61, + 50, 54, 58, 62, + 51, 55, 59, 63, + 64, 68, 72, 76, + 65, 69, 73, 77, + 66, 70, 74, 78, + 67, 71, 75, 79, + 80, 84, 88, 92, + 81, 85, 89, 93, + 82, 86, 90, 94, + 83, 87, 91, 95, + 96, 100, 104, 108, + 97, 101, 105, 109, + 98, 102, 106, 110, + 99, 103, 107, 111, + 112, 116, 120, 124, + 113, 117, 121, 125, + 114, 118, 122, 126, + 115, 119, 123, 127, + 0, 5, 10, 15, + 1, 6, 11, 12, + 2, 7, 8, 13, + 3, 4, 9, 14, + 16, 21, 26, 31, + 17, 22, 27, 28, + 18, 23, 24, 29, + 19, 20, 25, 30, + 32, 37, 42, 47, + 33, 38, 43, 44, + 34, 39, 40, 45, + 35, 36, 41, 46, + 48, 53, 58, 63, + 49, 54, 59, 60, + 50, 55, 56, 61, + 51, 52, 57, 62, + 64, 69, 74, 79, + 65, 70, 75, 76, + 66, 71, 72, 77, + 67, 68, 73, 78, + 80, 85, 90, 95, + 81, 86, 91, 92, + 82, 87, 88, 93, + 83, 84, 89, 94, + 96, 101, 106, 111, + 97, 102, 107, 108, + 98, 103, 104, 109, + 99, 100, 105, 110, + 112, 117, 122, 127, + 113, 118, 123, 124, + 114, 119, 120, 125, + 115, 116, 121, 126, + 0, 32, 64, 96, + 1, 33, 65, 97, + 2, 34, 66, 98, + 3, 35, 67, 99, + 4, 36, 68, 100, + 5, 37, 69, 101, + 6, 38, 70, 102, + 7, 39, 71, 103, + 8, 40, 72, 104, + 9, 41, 73, 105, + 10, 42, 74, 106, + 11, 43, 75, 107, + 12, 44, 76, 108, + 13, 45, 77, 109, + 14, 46, 78, 110, + 15, 47, 79, 111, + 16, 48, 80, 112, + 17, 49, 81, 113, + 18, 50, 82, 114, + 19, 51, 83, 115, + 20, 52, 84, 116, + 21, 53, 85, 117, + 22, 54, 86, 118, + 23, 55, 87, 119, + 24, 56, 88, 120, + 25, 57, 89, 121, + 26, 58, 90, 122, + 27, 59, 91, 123, + 28, 60, 92, 124, + 29, 61, 93, 125, + 30, 62, 94, 126, + 31, 63, 95, 127, + 0, 33, 80, 113, + 1, 48, 81, 96, + 2, 35, 82, 115, + 3, 50, 83, 98, + 4, 37, 84, 117, + 5, 52, 85, 100, + 6, 39, 86, 119, + 7, 54, 87, 102, + 8, 41, 88, 121, + 9, 56, 89, 104, + 10, 43, 90, 123, + 11, 58, 91, 106, + 12, 45, 92, 125, + 13, 60, 93, 108, + 14, 47, 94, 127, + 15, 62, 95, 110, + 16, 49, 64, 97, + 17, 32, 65, 112, + 18, 51, 66, 99, + 19, 34, 67, 114, + 20, 53, 68, 101, + 21, 36, 69, 116, + 22, 55, 70, 103, + 23, 38, 71, 118, + 24, 57, 72, 105, + 25, 40, 73, 120, + 26, 59, 74, 107, + 27, 42, 75, 122, + 28, 61, 76, 109, + 29, 44, 77, 124, + 30, 63, 78, 111, + 31, 46, 79, 126, + 0, 1, 2, 3, + 1, 2, 3, 0, + 2, 3, 0, 1, + 3, 0, 1, 2, + 4, 5, 6, 7, + 5, 6, 7, 4, + 6, 7, 4, 5, + 7, 4, 5, 6, + 8, 9, 10, 11, + 9, 10, 11, 8, + 10, 11, 8, 9, + 11, 8, 9, 10, + 12, 13, 14, 15, + 13, 14, 15, 12, + 14, 15, 12, 13, + 15, 12, 13, 14, + 16, 17, 18, 19, + 17, 18, 19, 16, + 18, 19, 16, 17, + 19, 16, 17, 18, + 20, 21, 22, 23, + 21, 22, 23, 20, + 22, 23, 20, 21, + 23, 20, 21, 22, + 24, 25, 26, 27, + 25, 26, 27, 24, + 26, 27, 24, 25, + 27, 24, 25, 26, + 28, 29, 30, 31, + 29, 30, 31, 28, + 30, 31, 28, 29, + 31, 28, 29, 30, + 0, 1, 16, 17, + 1, 16, 17, 0, + 2, 3, 18, 19, + 3, 18, 19, 2, + 4, 5, 20, 21, + 5, 20, 21, 4, + 6, 7, 22, 23, + 7, 22, 23, 6, + 8, 9, 24, 25, + 9, 24, 25, 8, + 10, 11, 26, 27, + 11, 26, 27, 10, + 12, 13, 28, 29, + 13, 28, 29, 12, + 14, 15, 30, 31, + 15, 30, 31, 14, + 16, 17, 0, 1, + 17, 0, 1, 16, + 18, 19, 2, 3, + 19, 2, 3, 18, + 20, 21, 4, 5, + 21, 4, 5, 20, + 22, 23, 6, 7, + 23, 6, 7, 22, + 24, 25, 8, 9, + 25, 8, 9, 24, + 26, 27, 10, 11, + 27, 10, 11, 26, + 28, 29, 12, 13, + 29, 12, 13, 28, + 30, 31, 14, 15, + 31, 14, 15, 30 +}; + +inline __host__ __device__ void operator^=( uint4& a, uint4 s) { + a.x ^= s.x; a.y ^= s.y; a.z ^= s.z; a.w ^= s.w; +} + +__global__ void fill_blocks(uint32_t *scratchpad0, + uint32_t *scratchpad1, + uint32_t *scratchpad2, + uint32_t *scratchpad3, + uint32_t *scratchpad4, + uint32_t *scratchpad5, + uint32_t *seed, + uint32_t *out, + uint32_t *refs, // 32 bit + uint32_t *idxs, // first bit is keep flag, next 31 bit is current idx + uint32_t *segments, + int memsize, + int lanes, + int seg_length, + int seg_count, + int threads_per_chunk, + int thread_idx) { + extern __shared__ uint32_t shared[]; // lanes * BLOCK_SIZE_UINT [local state] + lanes * 32 [refs buffer] ( + lanes * 32 [idx buffer]) + + uint32_t *local_state = shared; + uint32_t *local_refs = shared + (lanes * BLOCK_SIZE_UINT); + uint32_t *local_idxs = shared + (lanes * BLOCK_SIZE_UINT + lanes * 32); + + uint4 tmp_a, tmp_b, tmp_c, tmp_d, tmp_p, tmp_q, tmp_l, tmp_m; + + int hash = blockIdx.x; + int mem_hash = hash + thread_idx; + int local_id = threadIdx.x; + int lane_length = seg_length * 4; + + int id = local_id % THREADS_PER_LANE; + int lane = local_id / THREADS_PER_LANE; + + int offset = id << 2; + + int i1_0_0 = 2 * offsets[offset]; + int i1_0_1 = i1_0_0 + 1; + int i1_1_0 = 2 * offsets[offset + 1]; + int i1_1_1 = i1_1_0 + 1; + int i1_2_0 = 2 * offsets[offset + 2]; + int i1_2_1 = i1_2_0 + 1; + int i1_3_0 = 2 * offsets[offset + 3]; + int i1_3_1 = i1_3_0 + 1; + + int i2_0_0 = 2 * offsets[offset + 128]; + int i2_0_1 = i2_0_0 + 1; + int i2_1_0 = 2 * offsets[offset + 129]; + int i2_1_1 = i2_1_0 + 1; + int i2_2_0 = 2 * offsets[offset + 130]; + int i2_2_1 = i2_2_0 + 1; + int i2_3_0 = 2 * offsets[offset + 131]; + int i2_3_1 = i2_3_0 + 1; + + int i3_0_0 = 2 * offsets[offset + 256]; + int i3_0_1 = i3_0_0 + 1; + int i3_1_0 = 2 * offsets[offset + 257]; + int i3_1_1 = i3_1_0 + 1; + int i3_2_0 = 2 * offsets[offset + 258]; + int i3_2_1 = i3_2_0 + 1; + int i3_3_0 = 2 * offsets[offset + 259]; + int i3_3_1 = i3_3_0 + 1; + + int i4_0_0 = 2 * offsets[offset + 384]; + int i4_0_1 = i4_0_0 + 1; + int i4_1_0 = 2 * offsets[offset + 385]; + int i4_1_1 = i4_1_0 + 1; + int i4_2_0 = 2 * offsets[offset + 386]; + int i4_2_1 = i4_2_0 + 1; + int i4_3_0 = 2 * offsets[offset + 387]; + int i4_3_1 = i4_3_0 + 1; + + int i_shfl1_1 = offsets[offset + 513]; + int i_shfl1_2 = offsets[offset + 514]; + int i_shfl1_3 = offsets[offset + 515]; + int i_shfl2_1 = offsets[offset + 641]; + int i_shfl2_2 = offsets[offset + 642]; + int i_shfl2_3 = offsets[offset + 643]; + + int scratchpad_location = mem_hash / threads_per_chunk; + uint4 *memory = reinterpret_cast(scratchpad0); + if(scratchpad_location == 1) memory = reinterpret_cast(scratchpad1); + if(scratchpad_location == 2) memory = reinterpret_cast(scratchpad2); + if(scratchpad_location == 3) memory = reinterpret_cast(scratchpad3); + if(scratchpad_location == 4) memory = reinterpret_cast(scratchpad4); + if(scratchpad_location == 5) memory = reinterpret_cast(scratchpad5); + int hash_offset = mem_hash - scratchpad_location * threads_per_chunk; + memory = memory + hash_offset * (memsize >> 4); // memsize / 16 -> 16 bytes in uint4 + + uint32_t *mem_seed = seed + hash * lanes * 2 * BLOCK_SIZE_UINT; + + uint32_t *seed_src = mem_seed + lane * 2 * BLOCK_SIZE_UINT; + uint4 *seed_dst = memory + lane * lane_length * BLOCK_SIZE_UINT4; + + seed_dst[id] = make_uint4(seed_src[i1_0_0], seed_src[i1_0_1], seed_src[i1_1_0], seed_src[i1_1_1]); + seed_dst[id + 32] = make_uint4(seed_src[i1_2_0], seed_src[i1_2_1], seed_src[i1_3_0], seed_src[i1_3_1]); + seed_src += BLOCK_SIZE_UINT; + seed_dst += BLOCK_SIZE_UINT4; + seed_dst[id] = make_uint4(seed_src[i1_0_0], seed_src[i1_0_1], seed_src[i1_1_0], seed_src[i1_1_1]); + seed_dst[id + 32] = make_uint4(seed_src[i1_2_0], seed_src[i1_2_1], seed_src[i1_3_0], seed_src[i1_3_1]); + + uint4 *next_block; + uint4 *prev_block; + uint4 *ref_block; + uint32_t *seg_refs, *seg_idxs; + + local_state = local_state + lane * BLOCK_SIZE_UINT; + local_refs = local_refs + lane * 32; + local_idxs = local_idxs + lane * 32; + + segments += (lane * 3); + + for(int s = 0; s < (seg_count / lanes); s++) { + int idx = ((s == 0) ? 2 : 0); // index for first slice in each lane is 2 + int with_xor = ((s >= 4) ? 1 : 0); + int keep = 1; + int slice = s % 4; + int pass = s / 4; + + uint32_t *cur_seg = &segments[s * lanes * 3]; + + uint32_t cur_idx = cur_seg[0]; + uint32_t prev_idx = cur_seg[1]; + uint32_t seg_type = cur_seg[2]; + uint32_t ref_idx = 0; + + prev_block = memory + prev_idx * BLOCK_SIZE_UINT4; + + tmp_a = prev_block[id]; + tmp_b = prev_block[id + 32]; + + __syncthreads(); + + if(seg_type == 0) { + seg_refs = refs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2); + if(idxs != NULL) seg_idxs = idxs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2); + + for (cur_idx--;idx < seg_length; seg_refs += 32, seg_idxs += 32) { + uint64_t i_limit = seg_length - idx; + if (i_limit > 32) i_limit = 32; + + local_refs[id] = seg_refs[id]; + ref_idx = local_refs[0]; + + if(idxs != NULL) { + local_idxs[id] = seg_idxs[id]; + cur_idx = local_idxs[0]; + keep = cur_idx & 0x80000000; + cur_idx = cur_idx & 0x7FFFFFFF; + } else + cur_idx++; + + ref_block = memory + ref_idx * BLOCK_SIZE_UINT4; + tmp_p = ref_block[id]; + tmp_q = ref_block[id + 32]; + + for (int i = 0; i < i_limit; i++, idx++) { + next_block = memory + cur_idx * BLOCK_SIZE_UINT4; + if(with_xor == 1) { + tmp_l = next_block[id]; + tmp_m = next_block[id + 32]; + } + + tmp_a ^= tmp_p; + tmp_b ^= tmp_q; + + if (i < (i_limit - 1)) { + ref_idx = local_refs[i + 1]; + + if(idxs != NULL) { + cur_idx = local_idxs[i + 1]; + keep = cur_idx & 0x80000000; + cur_idx = cur_idx & 0x7FFFFFFF; + } + else + cur_idx++; + + ref_block = memory + ref_idx * BLOCK_SIZE_UINT4; + tmp_p = ref_block[id]; + tmp_q = ref_block[id + 32]; + } + + tmp_c = tmp_a; + tmp_d = tmp_b; + + G1(local_state); + G2(local_state); + G3(local_state); + G4(local_state); + + if(with_xor == 1) { + tmp_c ^= tmp_l; + tmp_d ^= tmp_m; + } + + tmp_a ^= tmp_c; + tmp_b ^= tmp_d; + + if(keep > 0) { + next_block[id] = tmp_a; + next_block[id + 32] = tmp_b; + } + } + } + } + else { + + for (; idx < seg_length; idx++, cur_idx++) { + next_block = memory + cur_idx * BLOCK_SIZE_UINT4; + + if(with_xor == 1) { + tmp_l = next_block[id]; + tmp_m = next_block[id + 32]; + } + + uint32_t pseudo_rand_lo = __shfl_sync(0xffffffff, tmp_a.x, 0); + uint32_t pseudo_rand_hi = __shfl_sync(0xffffffff, tmp_a.y, 0); + + uint64_t ref_lane = pseudo_rand_hi % lanes; // thr_cost + uint32_t reference_area_size = 0; + if(pass > 0) { + if (lane == ref_lane) { + reference_area_size = lane_length - seg_length + idx - 1; + } else { + reference_area_size = lane_length - seg_length + ((idx == 0) ? (-1) : 0); + } + } + else { + if (lane == ref_lane) { + reference_area_size = slice * seg_length + idx - 1; // seg_length + } else { + reference_area_size = slice * seg_length + ((idx == 0) ? (-1) : 0); + } + } + asm("{mul.hi.u32 %0, %1, %1; mul.hi.u32 %0, %0, %2; }": "=r"(pseudo_rand_lo) : "r"(pseudo_rand_lo), "r"(reference_area_size)); + + uint32_t relative_position = reference_area_size - 1 - pseudo_rand_lo; + + ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length; + + ref_block = memory + ref_idx * BLOCK_SIZE_UINT4; + + tmp_a ^= ref_block[id]; + tmp_b ^= ref_block[id + 32]; + + tmp_c = tmp_a; + tmp_d = tmp_b; + + G1(local_state); + G2(local_state); + G3(local_state); + G4(local_state); + + if(with_xor == 1) { + tmp_c ^= tmp_l; + tmp_d ^= tmp_m; + } + + tmp_a ^= tmp_c; + tmp_b ^= tmp_d; + + next_block[id] = tmp_a; + next_block[id + 32] = tmp_b; + } + } + } + + local_state[i1_0_0] = tmp_a.x; + local_state[i1_0_1] = tmp_a.y; + local_state[i1_1_0] = tmp_a.z; + local_state[i1_1_1] = tmp_a.w; + local_state[i1_2_0] = tmp_b.x; + local_state[i1_2_1] = tmp_b.y; + local_state[i1_3_0] = tmp_b.z; + local_state[i1_3_1] = tmp_b.w; + + __syncthreads(); + + // at this point local_state will contain the final blocks + + if(lane == 0) { // first lane needs to acumulate results + tmp_a = make_uint4(0, 0, 0, 0); + tmp_b = make_uint4(0, 0, 0, 0); + + for(int l=0; l> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (pwdlen % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + + uint32_t nonce = (preseed[9] >> 24) | (preseed[10] << 8); + nonce += hash; + local_preseed[9] = (preseed[9] & 0x00FFFFFF) | (nonce << 24); + local_preseed[10] = (preseed[10] & 0xFF000000) | (nonce >> 8); + } + + int buf_len = blake2b_init(h, ARGON2_PREHASH_DIGEST_LENGTH_UINT, thr_id); + *value = lanes; //lanes + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = 32; //outlen + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = memsz; //m_cost + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = passes; //t_cost + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = ARGON2_VERSION; //version + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = ARGON2_TYPE_VALUE; //type + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + *value = pwdlen * 4; //pw_len + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + buf_len = blake2b_update(local_preseed, pwdlen, h, buf, buf_len, thr_id); + *value = saltlen * 4; //salt_len + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + buf_len = blake2b_update(local_preseed, saltlen, h, buf, buf_len, thr_id); + *value = 0; //secret_len + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + buf_len = blake2b_update(NULL, 0, h, buf, buf_len, thr_id); + *value = 0; //ad_len + buf_len = blake2b_update(value, 1, h, buf, buf_len, thr_id); + buf_len = blake2b_update(NULL, 0, h, buf, buf_len, thr_id); + + blake2b_final(local_mem, ARGON2_PREHASH_DIGEST_LENGTH_UINT, h, buf, buf_len, thr_id); + + if (thr_id == 0) { + local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT] = idx; + local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT + 1] = lane; + } + + blake2b_digestLong(local_seed, ARGON2_DWORDS_IN_BLOCK, local_mem, ARGON2_PREHASH_SEED_LENGTH_UINT, thr_id, + &local_mem[20]); + } +} + +__global__ void posthash ( + uint32_t *hash, + uint32_t *out, + uint32_t *preseed) { + extern __shared__ uint32_t shared[]; // size = 120 + + int hash_id = blockIdx.x; + int thread = threadIdx.x; + + uint32_t *local_hash = hash + hash_id * ((ARGON2_RAW_LENGTH / 4) + 1); + uint32_t *local_out = out + hash_id * BLOCK_SIZE_UINT; + + blake2b_digestLong(local_hash, ARGON2_RAW_LENGTH / 4, local_out, ARGON2_DWORDS_IN_BLOCK, thread, shared); + + if(thread == 0) { + uint32_t nonce = (preseed[9] >> 24) | (preseed[10] << 8); + nonce += hash_id; + local_hash[ARGON2_RAW_LENGTH / 4] = nonce; + } +} + +void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size) { + Argon2Profile *profile = device->profile_info.profile; + + device->error = cudaSetDevice(device->cuda_index); + if(device->error != cudaSuccess) { + device->error_message = "Error setting current device for memory allocation."; + return; + } + + size_t allocated_mem_for_current_chunk = 0; + + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_0, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_1, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_2, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_3, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_4, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + if (chunks > 0) { + allocated_mem_for_current_chunk = chunks > 1 ? chunk_size : (size_t)ceil(chunk_size * chunks); + chunks -= 1; + } + else { + allocated_mem_for_current_chunk = 1; + } + device->error = cudaMalloc(&device->arguments.memory_chunk_5, allocated_mem_for_current_chunk); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + + uint32_t *refs = (uint32_t *)malloc(profile->blockRefsSize * sizeof(uint32_t)); + for(int i=0;iblockRefsSize;i++) { + refs[i] = profile->blockRefs[i*3 + 1]; + } + + device->error = cudaMalloc(&device->arguments.refs, profile->blockRefsSize * sizeof(uint32_t)); + if(device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + + device->error = cudaMemcpy(device->arguments.refs, refs, profile->blockRefsSize * sizeof(uint32_t), cudaMemcpyHostToDevice); + if(device->error != cudaSuccess) { + device->error_message = "Error copying memory."; + return; + } + free(refs); + + if(profile->succesiveIdxs == 1) { + device->arguments.idxs = NULL; + } + else { + uint32_t *idxs = (uint32_t *) malloc(profile->blockRefsSize * sizeof(uint32_t)); + for (int i = 0; i < profile->blockRefsSize; i++) { + idxs[i] = profile->blockRefs[i * 3]; + if (profile->blockRefs[i * 3 + 2] == 1) { + idxs[i] |= 0x80000000; + } + } + + device->error = cudaMalloc(&device->arguments.idxs, profile->blockRefsSize * sizeof(uint32_t)); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + + device->error = cudaMemcpy(device->arguments.idxs, idxs, profile->blockRefsSize * sizeof(uint32_t), + cudaMemcpyHostToDevice); + if (device->error != cudaSuccess) { + device->error_message = "Error copying memory."; + return; + } + free(idxs); + } + + //reorganize segments data + device->error = cudaMalloc(&device->arguments.segments, profile->segCount * 3 * sizeof(uint32_t)); + if(device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMemcpy(device->arguments.segments, profile->segments, profile->segCount * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice); + if(device->error != cudaSuccess) { + device->error_message = "Error copying memory."; + return; + } + +#ifdef PARALLEL_CUDA + int threads = device->profile_info.threads / 2; +#else + int threads = device->profile_info.threads; +#endif + + size_t preseed_memory_size = profile->pwdLen * 4; + size_t seed_memory_size = threads * (profile->thrCost * 2) * ARGON2_BLOCK_SIZE; + size_t out_memory_size = threads * ARGON2_BLOCK_SIZE; + size_t hash_memory_size = threads * (xmrig::ARGON2_HASHLEN + 4); + + device->error = cudaMalloc(&device->arguments.preseed_memory[0], preseed_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.seed_memory[0], seed_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.out_memory[0], out_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.hash_memory[0], hash_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMallocHost(&device->arguments.host_seed_memory[0], 132 * threads); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating pinned memory."; + return; + } + device->error = cudaMalloc(&device->arguments.preseed_memory[1], preseed_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.seed_memory[1], seed_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.out_memory[1], out_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMalloc(&device->arguments.hash_memory[1], hash_memory_size); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating memory."; + return; + } + device->error = cudaMallocHost(&device->arguments.host_seed_memory[1], 132 * threads); + if (device->error != cudaSuccess) { + device->error_message = "Error allocating pinned memory."; + return; + } +} + +void cuda_free(cuda_device_info *device) { + cudaSetDevice(device->cuda_index); + + if(device->arguments.idxs != NULL) { + cudaFree(device->arguments.idxs); + device->arguments.idxs = NULL; + } + + if(device->arguments.refs != NULL) { + cudaFree(device->arguments.refs); + device->arguments.refs = NULL; + } + + if(device->arguments.segments != NULL) { + cudaFree(device->arguments.segments); + device->arguments.segments = NULL; + } + + if(device->arguments.memory_chunk_0 != NULL) { + cudaFree(device->arguments.memory_chunk_0); + device->arguments.memory_chunk_0 = NULL; + } + + if(device->arguments.memory_chunk_1 != NULL) { + cudaFree(device->arguments.memory_chunk_1); + device->arguments.memory_chunk_1 = NULL; + } + + if(device->arguments.memory_chunk_2 != NULL) { + cudaFree(device->arguments.memory_chunk_2); + device->arguments.memory_chunk_2 = NULL; + } + + if(device->arguments.memory_chunk_3 != NULL) { + cudaFree(device->arguments.memory_chunk_3); + device->arguments.memory_chunk_3 = NULL; + } + + if(device->arguments.memory_chunk_4 != NULL) { + cudaFree(device->arguments.memory_chunk_4); + device->arguments.memory_chunk_4 = NULL; + } + + if(device->arguments.memory_chunk_5 != NULL) { + cudaFree(device->arguments.memory_chunk_5); + device->arguments.memory_chunk_5 = NULL; + } + + if(device->arguments.preseed_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.preseed_memory[i] != NULL) + cudaFree(device->arguments.preseed_memory[i]); + device->arguments.preseed_memory[i] = NULL; + } + } + + if(device->arguments.seed_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.seed_memory[i] != NULL) + cudaFree(device->arguments.seed_memory[i]); + device->arguments.seed_memory[i] = NULL; + } + } + + if(device->arguments.out_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.out_memory[i] != NULL) + cudaFree(device->arguments.out_memory[i]); + device->arguments.out_memory[i] = NULL; + } + } + + if(device->arguments.hash_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.hash_memory[i] != NULL) + cudaFree(device->arguments.hash_memory[i]); + device->arguments.hash_memory[i] = NULL; + } + } + + if(device->arguments.host_seed_memory != NULL) { + for(int i=0;i<2;i++) { + if(device->arguments.host_seed_memory[i] != NULL) + cudaFreeHost(device->arguments.host_seed_memory[i]); + device->arguments.host_seed_memory[i] = NULL; + } + } + + cudaDeviceReset(); +} + +bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { + cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; + cuda_device_info *device = gpumgmt_thread->device; + cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; + + int sessions = max(profile->thrCost * 2, (uint32_t)8); + double hashes_per_block = sessions / (profile->thrCost * 2.0); + size_t work_items = sessions * 4; + + gpumgmt_thread->lock(); + + memcpy(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], memory, gpumgmt_thread->hashData.inSize); + + device->error = cudaMemcpyAsync(device->arguments.preseed_memory[gpumgmt_thread->thread_id], device->arguments.host_seed_memory[gpumgmt_thread->thread_id], gpumgmt_thread->hashData.inSize, cudaMemcpyHostToDevice, stream); + if (device->error != cudaSuccess) { + device->error_message = "Error writing to gpu memory."; + gpumgmt_thread->unlock(); + return false; + } + + prehash <<< ceil(threads / hashes_per_block), work_items, sessions * BLAKE_SHARED_MEM, stream>>> ( + device->arguments.preseed_memory[gpumgmt_thread->thread_id], + device->arguments.seed_memory[gpumgmt_thread->thread_id], + profile->memCost, + profile->thrCost, + profile->segCount / (4 * profile->thrCost), + gpumgmt_thread->hashData.inSize / 4, + profile->saltLen, + threads); + + return true; +} + +void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data) { + cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; + cuda_device_info *device = gpumgmt_thread->device; + cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; + + size_t work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost; + size_t shared_mem = profile->thrCost * (ARGON2_BLOCK_SIZE + 128 + (profile->succesiveIdxs == 1 ? 128 : 0)); + + fill_blocks <<>> ((uint32_t*)device->arguments.memory_chunk_0, + (uint32_t*)device->arguments.memory_chunk_1, + (uint32_t*)device->arguments.memory_chunk_2, + (uint32_t*)device->arguments.memory_chunk_3, + (uint32_t*)device->arguments.memory_chunk_4, + (uint32_t*)device->arguments.memory_chunk_5, + device->arguments.seed_memory[gpumgmt_thread->thread_id], + device->arguments.out_memory[gpumgmt_thread->thread_id], + device->arguments.refs, + device->arguments.idxs, + device->arguments.segments, + profile->memSize, + profile->thrCost, + profile->segSize, + profile->segCount, + device->profile_info.threads_per_chunk, + gpumgmt_thread->threads_idx); + + return (void *)1; +} + +bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { + cuda_gpumgmt_thread_data *gpumgmt_thread = (cuda_gpumgmt_thread_data *)user_data; + cuda_device_info *device = gpumgmt_thread->device; + cudaStream_t stream = (cudaStream_t)gpumgmt_thread->device_data; + + size_t work_items = 4; + + posthash <<>> ( + device->arguments.hash_memory[gpumgmt_thread->thread_id], + device->arguments.out_memory[gpumgmt_thread->thread_id], + device->arguments.preseed_memory[gpumgmt_thread->thread_id]); + + device->error = cudaMemcpyAsync(device->arguments.host_seed_memory[gpumgmt_thread->thread_id], device->arguments.hash_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4), cudaMemcpyDeviceToHost, stream); + if (device->error != cudaSuccess) { + device->error_message = "Error reading gpu memory."; + gpumgmt_thread->unlock(); + return false; + } + + while(cudaStreamQuery(stream) != cudaSuccess) { + this_thread::sleep_for(chrono::milliseconds(10)); + continue; + } + + memcpy(memory, device->arguments.host_seed_memory[gpumgmt_thread->thread_id], threads * (xmrig::ARGON2_HASHLEN + 4)); + gpumgmt_thread->unlock(); + + return memory; +} \ No newline at end of file diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp new file mode 100755 index 00000000..b217dc79 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.cpp @@ -0,0 +1,888 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#include +#include "../../../common/common.h" + +#include "crypto/argon2_hasher/hash/Hasher.h" +#include "crypto/argon2_hasher/hash/argon2/Argon2.h" + +#include "OpenCLHasher.h" +#include "OpenCLKernel.h" + +#include "crypto/argon2_hasher/common/DLLExport.h" + +#if defined(WITH_OPENCL) + +#ifndef CL_DEVICE_BOARD_NAME_AMD +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#endif +#ifndef CL_DEVICE_TOPOLOGY_AMD +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#endif +#ifndef CL_DEVICE_PCI_BUS_ID_NV +#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#endif +#ifndef CL_DEVICE_PCI_SLOT_ID_NV +#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 +#endif + +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} device_topology_amd; + +#define KERNEL_WORKGROUP_SIZE 32 + +opencl_hasher::opencl_hasher() { + m_type = "GPU"; + m_subType = "OPENCL"; + m_shortSubType = "OCL"; + m_intensity = 0; + m_description = ""; + m_computingThreads = 0; +} + +opencl_hasher::~opencl_hasher() { +// this->cleanup(); +} + +bool opencl_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) { + cl_int error = CL_SUCCESS; + string error_message; + + m_profile = getArgon2Profile(algorithm, variant); + + __devices = __query_opencl_devices(error, error_message); + if(error != CL_SUCCESS) { + m_description = "No compatible GPU detected: " + error_message; + return false; + } + + if (__devices.empty()) { + m_description = "No compatible GPU detected."; + return false; + } + + return true; +} + +vector opencl_hasher::__query_opencl_devices(cl_int &error, string &error_message) { + cl_int err; + + cl_uint platform_count = 0; + cl_uint device_count = 0; + + vector result; + + clGetPlatformIDs(0, NULL, &platform_count); + if(platform_count == 0) { + return result; + } + + cl_platform_id *platforms = (cl_platform_id*)malloc(platform_count * sizeof(cl_platform_id)); + + err=clGetPlatformIDs(platform_count, platforms, &platform_count); + if(err != CL_SUCCESS) { + free(platforms); + error = err; + error_message = "Error querying for opencl platforms."; + return result; + } + + int counter = 0; + + for(uint32_t i=0; i < platform_count; i++) { + device_count = 0; + clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &device_count); + if(device_count == 0) { + continue; + } + + cl_device_id * devices = (cl_device_id*)malloc(device_count * sizeof(cl_device_id)); + err=clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, device_count, devices, &device_count); + + if(err != CL_SUCCESS) { + free(devices); + error = err; + error_message = "Error querying for opencl devices."; + continue; + } + + for(uint32_t j=0; j < device_count; j++) { + opencl_device_info *info = __get_device_info(platforms[i], devices[j]); + if(info->error != CL_SUCCESS) { + error = info->error; + error_message = info->error_message; + } + else { + info->device_index = counter; + result.push_back(info); + counter++; + } + } + + free(devices); + } + + free(platforms); + + return result; +} + +opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl_device_id device) { + opencl_device_info *device_info = new opencl_device_info(CL_SUCCESS, ""); + + device_info->platform = platform; + device_info->device = device; + + char *buffer; + size_t sz; + + // device name + string device_vendor; + sz = 0; + clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, NULL, &sz); + buffer = (char *)malloc(sz + 1); + device_info->error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sz, buffer, &sz); + if(device_info->error != CL_SUCCESS) { + free(buffer); + device_info->error_message = "Error querying device vendor."; + return device_info; + } + else { + buffer[sz] = 0; + device_vendor = buffer; + free(buffer); + } + + string device_name; + cl_device_info query_type = CL_DEVICE_NAME; + + if(device_vendor.find("Advanced Micro Devices") != string::npos) + query_type = CL_DEVICE_BOARD_NAME_AMD; + + sz = 0; + clGetDeviceInfo(device, query_type, 0, NULL, &sz); + buffer = (char *) malloc(sz + 1); + device_info->error = clGetDeviceInfo(device, query_type, sz, buffer, &sz); + if (device_info->error != CL_SUCCESS) { + free(buffer); + device_info->error_message = "Error querying device name."; + return device_info; + } else { + buffer[sz] = 0; + device_name = buffer; + free(buffer); + } + + string device_version; + sz = 0; + clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, NULL, &sz); + buffer = (char *)malloc(sz + 1); + device_info->error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sz, buffer, &sz); + if(device_info->error != CL_SUCCESS) { + free(buffer); + device_info->error_message = "Error querying device version."; + return device_info; + } + else { + buffer[sz] = 0; + device_version = buffer; + free(buffer); + } + + device_info->device_string = device_vendor + " - " + device_name/* + " : " + device_version*/; + + device_info->error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(device_info->max_mem_size), &(device_info->max_mem_size), NULL); + if(device_info->error != CL_SUCCESS) { + device_info->error_message = "Error querying device global memory size."; + return device_info; + } + + device_info->error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(device_info->max_allocable_mem_size), &(device_info->max_allocable_mem_size), NULL); + if(device_info->error != CL_SUCCESS) { + device_info->error_message = "Error querying device max memory allocation."; + return device_info; + } + + double mem_in_gb = device_info->max_mem_size / 1073741824.0; + stringstream ss; + ss << setprecision(2) << mem_in_gb; + device_info->device_string += (" (" + ss.str() + "GB)"); + + return device_info; +} + +bool opencl_hasher::configure(xmrig::HasherConfig &config) { + int index = config.getGPUCardsCount(); + double intensity = 0; + + int total_threads = 0; + intensity = config.getAverageGPUIntensity(); + + if (intensity == 0) { + m_intensity = 0; + m_description = "Status: DISABLED - by user."; + return false; + } + + bool cards_selected = false; + + intensity = 0; + + for(vector::iterator d = __devices.begin(); d != __devices.end(); d++, index++) { + stringstream ss; + ss << "["<< (index + 1) << "] " << (*d)->device_string; + string device_description = ss.str(); + (*d)->device_index = index; + (*d)->profile_info.profile = m_profile; + + if(config.gpuFilter().size() > 0) { + bool found = false; + for(xmrig::GPUFilter fit : config.gpuFilter()) { + if(device_description.find(fit.filter) != string::npos) { + found = true; + break; + } + } + if(!found) { + (*d)->profile_info.threads = 0; + ss << " - DISABLED" << endl; + m_description += ss.str(); + continue; + } + else { + cards_selected = true; + } + } + else { + cards_selected = true; + } + + ss << endl; + + double device_intensity = config.getGPUIntensity((*d)->device_index); + + m_description += ss.str(); + + if(!(__setup_device_info((*d), device_intensity))) { + m_description += (*d)->error_message; + m_description += "\n"; + continue; + }; + + DeviceInfo device; + + if((*d)->device_string.find("Advanced Micro Devices") != string::npos) { + device_topology_amd amdtopo; + if(clGetDeviceInfo((*d)->device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL) == CL_SUCCESS) { + char bus_id[50]; + sprintf(bus_id, "%02x:%02x.%x", amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function); + device.bus_id = bus_id; + } + } + else if((*d)->device_string.find("NVIDIA") != string::npos) { + cl_uint bus; + cl_uint slot; + + if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_BUS_ID_NV, sizeof(bus), &bus, NULL) == CL_SUCCESS) { + if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof(slot), &slot, NULL) == CL_SUCCESS) { + char bus_id[50]; + sprintf(bus_id, "%02x:%02x.0", bus, slot); + device.bus_id = bus_id; + } + } + } + + device.name = (*d)->device_string; + device.intensity = device_intensity; + storeDeviceInfo((*d)->device_index, device); + + __enabledDevices.push_back(*d); + + total_threads += (*d)->profile_info.threads; + intensity += device_intensity; + } + + config.addGPUCardsCount(index - config.getGPUCardsCount()); + + if(!cards_selected) { + m_intensity = 0; + m_description += "Status: DISABLED - no card enabled because of filtering."; + return false; + } + + if (total_threads == 0) { + m_intensity = 0; + m_description += "Status: DISABLED - not enough resources."; + return false; + } + + buildThreadData(); + + m_intensity = intensity / __enabledDevices.size(); + m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device + m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads."; + + return true; +} + +bool opencl_hasher::__setup_device_info(opencl_device_info *device, double intensity) { + cl_int error; + + cl_context_properties properties[] = { + CL_CONTEXT_PLATFORM, (cl_context_properties) device->platform, + 0}; + + device->context = clCreateContext(properties, 1, &(device->device), NULL, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error getting device context."; + return false; + } + + device->queue = clCreateCommandQueue(device->context, device->device, CL_QUEUE_PROFILING_ENABLE, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error getting device command queue."; + return false; + } + + const char *srcptr[] = {OpenCLKernel.c_str()}; + size_t srcsize = OpenCLKernel.size(); + + device->program = clCreateProgramWithSource(device->context, 1, srcptr, &srcsize, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating opencl program for device."; + return false; + } + + error = clBuildProgram(device->program, 1, &device->device, "", NULL, NULL); + if (error != CL_SUCCESS) { + size_t log_size; + clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + char *log = (char *) malloc(log_size + 1); + clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); + log[log_size] = 0; + string build_log = log; + free(log); + + device->error = error; + device->error_message = "Error building opencl program for device: " + build_log; + return false; + } + + device->kernel_prehash = clCreateKernel(device->program, "prehash", &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating opencl prehash kernel for device."; + return false; + } + device->kernel_fill_blocks = clCreateKernel(device->program, "fill_blocks", &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating opencl main kernel for device."; + return false; + } + device->kernel_posthash = clCreateKernel(device->program, "posthash", &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating opencl posthash kernel for device."; + return false; + } + + device->profile_info.threads_per_chunk = (uint32_t) (device->max_allocable_mem_size / device->profile_info.profile->memSize); + size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize; + + if (chunk_size == 0) { + device->error = -1; + device->error_message = "Not enough memory on GPU."; + return false; + } + + uint64_t usable_memory = device->max_mem_size; + double chunks = (double) usable_memory / (double) chunk_size; + + uint32_t max_threads = (uint32_t) (device->profile_info.threads_per_chunk * chunks); + + if (max_threads == 0) { + device->error = -1; + device->error_message = "Not enough memory on GPU."; + return false; + } + + device->profile_info.threads = (uint32_t) (max_threads * intensity / 100.0); + device->profile_info.threads = (device->profile_info.threads / 4) * 4; // make it divisible by 4 + if (max_threads > 0 && device->profile_info.threads == 0 && intensity > 0) + device->profile_info.threads = 4; + + double counter = (double) device->profile_info.threads / (double) device->profile_info.threads_per_chunk; + size_t allocated_mem_for_current_chunk = 0; + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_0 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_1 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_2 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_3 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_4 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (counter > 0) { + if (counter > 1) { + allocated_mem_for_current_chunk = chunk_size; + } else { + allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter); + } + counter -= 1; + } else { + allocated_mem_for_current_chunk = 1; + } + device->arguments.memory_chunk_5 = clCreateBuffer(device->context, CL_MEM_READ_WRITE, + allocated_mem_for_current_chunk, NULL, &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.refs = clCreateBuffer(device->context, CL_MEM_READ_ONLY, + device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL, + &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + if (device->profile_info.profile->succesiveIdxs == 1) { + device->arguments.idxs = NULL; + } + else { + device->arguments.idxs = clCreateBuffer(device->context, CL_MEM_READ_ONLY, + device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL, + &error); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + } + + device->arguments.segments = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + size_t preseed_memory_size = device->profile_info.profile->pwdLen * 4; + size_t seed_memory_size = device->profile_info.threads * (device->profile_info.profile->thrCost * 2) * ARGON2_BLOCK_SIZE; + size_t out_memory_size = device->profile_info.threads * ARGON2_BLOCK_SIZE; + size_t hash_memory_size = device->profile_info.threads * (xmrig::ARGON2_HASHLEN + 4); + + device->arguments.preseed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.preseed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.seed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.seed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.out_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.out_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.hash_memory[0] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + device->arguments.hash_memory[1] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error creating memory buffer."; + return false; + } + + //optimise address sizes + uint32_t *refs = (uint32_t *)malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t)); + for(int i=0;iprofile_info.profile->blockRefsSize;i++) { + refs[i] = device->profile_info.profile->blockRefs[i*3 + 1]; + } + + error=clEnqueueWriteBuffer(device->queue, device->arguments.refs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), refs, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error writing to gpu memory."; + return false; + } + + free(refs); + + if(device->profile_info.profile->succesiveIdxs == 0) { + uint32_t *idxs = (uint32_t *) malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t)); + for (int i = 0; i < device->profile_info.profile->blockRefsSize; i++) { + idxs[i] = device->profile_info.profile->blockRefs[i * 3]; + if (device->profile_info.profile->blockRefs[i * 3 + 2] == 1) { + idxs[i] |= 0x80000000; + } + } + + error=clEnqueueWriteBuffer(device->queue, device->arguments.idxs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), idxs, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error writing to gpu memory."; + return false; + } + + free(idxs); + } + + error=clEnqueueWriteBuffer(device->queue, device->arguments.segments, CL_TRUE, 0, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), device->profile_info.profile->segments, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error writing to gpu memory."; + return false; + } + + clSetKernelArg(device->kernel_fill_blocks, 0, sizeof(device->arguments.memory_chunk_0), &device->arguments.memory_chunk_0); + clSetKernelArg(device->kernel_fill_blocks, 1, sizeof(device->arguments.memory_chunk_1), &device->arguments.memory_chunk_1); + clSetKernelArg(device->kernel_fill_blocks, 2, sizeof(device->arguments.memory_chunk_2), &device->arguments.memory_chunk_2); + clSetKernelArg(device->kernel_fill_blocks, 3, sizeof(device->arguments.memory_chunk_3), &device->arguments.memory_chunk_3); + clSetKernelArg(device->kernel_fill_blocks, 4, sizeof(device->arguments.memory_chunk_4), &device->arguments.memory_chunk_4); + clSetKernelArg(device->kernel_fill_blocks, 5, sizeof(device->arguments.memory_chunk_5), &device->arguments.memory_chunk_5); + clSetKernelArg(device->kernel_fill_blocks, 8, sizeof(device->arguments.refs), &device->arguments.refs); + if(device->profile_info.profile->succesiveIdxs == 0) + clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(device->arguments.idxs), &device->arguments.idxs); + else + clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(cl_mem), NULL); + clSetKernelArg(device->kernel_fill_blocks, 10, sizeof(device->arguments.segments), &device->arguments.segments); + clSetKernelArg(device->kernel_fill_blocks, 11, sizeof(int32_t), &device->profile_info.profile->memSize); + clSetKernelArg(device->kernel_fill_blocks, 12, sizeof(int32_t), &device->profile_info.profile->thrCost); + clSetKernelArg(device->kernel_fill_blocks, 13, sizeof(int32_t), &device->profile_info.profile->segSize); + clSetKernelArg(device->kernel_fill_blocks, 14, sizeof(int32_t), &device->profile_info.profile->segCount); + clSetKernelArg(device->kernel_fill_blocks, 15, sizeof(int32_t), &device->profile_info.threads_per_chunk); + + clSetKernelArg(device->kernel_prehash, 2, sizeof(int32_t), &device->profile_info.profile->memCost); + clSetKernelArg(device->kernel_prehash, 3, sizeof(int32_t), &device->profile_info.profile->thrCost); + int passes = device->profile_info.profile->segCount / (4 * device->profile_info.profile->thrCost); + clSetKernelArg(device->kernel_prehash, 4, sizeof(int32_t), &passes); + clSetKernelArg(device->kernel_prehash, 6, sizeof(int32_t), &device->profile_info.profile->saltLen); + + return true; +} + +bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { + opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; + opencl_device_info *device = gpumgmt_thread->device; + + cl_int error; + + int sessions = max(profile->thrCost * 2, (uint32_t)16); + double hashes_per_block = sessions / (profile->thrCost * 2.0); + + size_t total_work_items = sessions * 4 * ceil(threads / hashes_per_block); + size_t local_work_items = sessions * 4; + + device->device_lock.lock(); + + error = clEnqueueWriteBuffer(device->queue, device->arguments.preseed_memory[gpumgmt_thread->thread_id], + CL_FALSE, 0, gpumgmt_thread->hashData.inSize, memory, 0, NULL, NULL); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error writing to gpu memory."; + device->device_lock.unlock(); + return false; + } + + int inSizeInInt = gpumgmt_thread->hashData.inSize / 4; + clSetKernelArg(device->kernel_prehash, 0, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_prehash, 1, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_prehash, 5, sizeof(int), &inSizeInInt); + clSetKernelArg(device->kernel_prehash, 7, sizeof(int), &threads); + clSetKernelArg(device->kernel_prehash, 8, sessions * sizeof(cl_ulong) * 76, NULL); // (preseed size is 16 ulongs = 128 bytes) + + error=clEnqueueNDRangeKernel(device->queue, device->kernel_prehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error running the kernel."; + device->device_lock.unlock(); + return false; + } + + return true; +} + +void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) { + opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; + opencl_device_info *device = gpumgmt_thread->device; + + cl_int error; + + size_t total_work_items = threads * KERNEL_WORKGROUP_SIZE * profile->thrCost; + size_t local_work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost; + + size_t shared_mem = profile->thrCost * ARGON2_QWORDS_IN_BLOCK; + + clSetKernelArg(device->kernel_fill_blocks, 6, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_fill_blocks, 7, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_fill_blocks, 16, sizeof(cl_ulong) * shared_mem, NULL); + + error=clEnqueueNDRangeKernel(device->queue, device->kernel_fill_blocks, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error running the kernel."; + device->device_lock.unlock(); + return NULL; + } + + return (void *)1; +} + +bool opencl_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) { + opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data; + opencl_device_info *device = gpumgmt_thread->device; + + cl_int error; + + size_t total_work_items = threads * 4; + size_t local_work_items = 4; + + clSetKernelArg(device->kernel_posthash, 0, sizeof(device->arguments.hash_memory[gpumgmt_thread->thread_id]), &device->arguments.hash_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_posthash, 1, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_posthash, 2, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]); + clSetKernelArg(device->kernel_posthash, 3, sizeof(cl_ulong) * 60, NULL); + + error=clEnqueueNDRangeKernel(device->queue, device->kernel_posthash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error running the kernel."; + device->device_lock.unlock(); + return false; + } + + error = clEnqueueReadBuffer(device->queue, device->arguments.hash_memory[gpumgmt_thread->thread_id], CL_FALSE, 0, threads * (xmrig::ARGON2_HASHLEN + 4), memory, 0, NULL, NULL); + if (error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error reading gpu memory."; + device->device_lock.unlock(); + return false; + } + + error=clFinish(device->queue); + if(error != CL_SUCCESS) { + device->error = error; + device->error_message = "Error flushing GPU queue."; + device->device_lock.unlock(); + return false; + } + + device->device_lock.unlock(); + + return true; +} + +void opencl_hasher::buildThreadData() { + __thread_data = new opencl_gpumgmt_thread_data[__enabledDevices.size() * 2]; + + for(int i=0; i < __enabledDevices.size(); i++) { + opencl_device_info *device = __enabledDevices[i]; + for(int threadId = 0; threadId < 2; threadId ++) { + opencl_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId]; + thread_data.device = device; + thread_data.thread_id = threadId; + thread_data.argon2 = new Argon2(opencl_kernel_prehasher, opencl_kernel_filler, opencl_kernel_posthasher, + nullptr, &thread_data); + thread_data.argon2->setThreads(device->profile_info.threads); + thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4; + } + } +} + +int opencl_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) { + opencl_gpumgmt_thread_data &threadData = __thread_data[threadIdx]; + threadData.hashData.input = input; + threadData.hashData.inSize = size; + threadData.hashData.output = output; + int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData); + if(threadData.device->error != CL_SUCCESS) { + LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message); + return 0; + } + + uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39)); + (*nonce) += threadData.device->profile_info.threads; + + return hashCount; +} + +void opencl_hasher::cleanup() { + vector platforms; + + for(vector::iterator it=__devices.begin(); it != __devices.end(); it++) { + if ((*it)->profile_info.threads != 0) { + clReleaseMemObject((*it)->arguments.memory_chunk_0); + clReleaseMemObject((*it)->arguments.memory_chunk_1); + clReleaseMemObject((*it)->arguments.memory_chunk_2); + clReleaseMemObject((*it)->arguments.memory_chunk_3); + clReleaseMemObject((*it)->arguments.memory_chunk_4); + clReleaseMemObject((*it)->arguments.memory_chunk_5); + clReleaseMemObject((*it)->arguments.refs); + clReleaseMemObject((*it)->arguments.segments); + clReleaseMemObject((*it)->arguments.preseed_memory[0]); + clReleaseMemObject((*it)->arguments.preseed_memory[1]); + clReleaseMemObject((*it)->arguments.seed_memory[0]); + clReleaseMemObject((*it)->arguments.seed_memory[1]); + clReleaseMemObject((*it)->arguments.out_memory[0]); + clReleaseMemObject((*it)->arguments.out_memory[1]); + clReleaseMemObject((*it)->arguments.hash_memory[0]); + clReleaseMemObject((*it)->arguments.hash_memory[1]); + + clReleaseKernel((*it)->kernel_prehash); + clReleaseKernel((*it)->kernel_fill_blocks); + clReleaseKernel((*it)->kernel_posthash); + clReleaseProgram((*it)->program); + clReleaseCommandQueue((*it)->queue); + clReleaseContext((*it)->context); + } + clReleaseDevice((*it)->device); + delete (*it); + } + __devices.clear(); +} + +size_t opencl_hasher::parallelism(int workerIdx) { + // there are 2 computing threads per device, so divide by 2 to get device index + workerIdx /= 2; + + if(workerIdx < 0 || workerIdx > __enabledDevices.size()) + return 0; + + return __enabledDevices[workerIdx]->profile_info.threads; +} + +size_t opencl_hasher::deviceCount() { + return __enabledDevices.size(); +} + +REGISTER_HASHER(opencl_hasher); + +#endif // WITH_OPENCL diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h new file mode 100755 index 00000000..ece7c971 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLHasher.h @@ -0,0 +1,110 @@ +// +// Created by Haifa Bogdan Adnan on 03/08/2018. +// + +#ifndef ARGON2_OPENCL_HASHER_H +#define ARGON2_OPENCL_HASHER_H + +#if defined(WITH_OPENCL) + +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#else +#include +#endif // !__APPLE__ + +struct opencl_kernel_arguments { + cl_mem memory_chunk_0; + cl_mem memory_chunk_1; + cl_mem memory_chunk_2; + cl_mem memory_chunk_3; + cl_mem memory_chunk_4; + cl_mem memory_chunk_5; + cl_mem refs; + cl_mem idxs; + cl_mem segments; + cl_mem preseed_memory[2]; + cl_mem seed_memory[2]; + cl_mem out_memory[2]; + cl_mem hash_memory[2]; +}; + +struct argon2profile_info { + argon2profile_info() { + threads = 0; + threads_per_chunk = 0; + } + + uint32_t threads; + uint32_t threads_per_chunk; + Argon2Profile *profile; +}; + +struct opencl_device_info { + opencl_device_info(cl_int err, const string &err_msg) { + error = err; + error_message = err_msg; + } + + cl_platform_id platform; + cl_device_id device; + cl_context context; + cl_command_queue queue; + + cl_program program; + cl_kernel kernel_prehash; + cl_kernel kernel_fill_blocks; + cl_kernel kernel_posthash; + + int device_index; + + opencl_kernel_arguments arguments; + argon2profile_info profile_info; + + string device_string; + uint64_t max_mem_size; + uint64_t max_allocable_mem_size; + + cl_int error; + string error_message; + + mutex device_lock; +}; + +struct opencl_gpumgmt_thread_data { + int thread_id; + opencl_device_info *device; + Argon2 *argon2; + HashData hashData; +}; + +class opencl_hasher : public Hasher { +public: + opencl_hasher(); + ~opencl_hasher(); + + virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant); + virtual bool configure(xmrig::HasherConfig &config); + virtual void cleanup(); + virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output); + virtual size_t parallelism(int workerIdx); + virtual size_t deviceCount(); + +private: + opencl_device_info *__get_device_info(cl_platform_id platform, cl_device_id device); + bool __setup_device_info(opencl_device_info *device, double intensity); + vector __query_opencl_devices(cl_int &error, string &error_message); + void buildThreadData(); + + vector __devices; + vector __enabledDevices; + opencl_gpumgmt_thread_data *__thread_data; + + Argon2Profile *m_profile; +}; + +#endif //WITH_OPENCL + +#endif //ARGON2_OPENCL_HASHER_H diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp new file mode 100644 index 00000000..b65539bc --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.cpp @@ -0,0 +1,1085 @@ +// +// Created by Haifa Bogdan Adnan on 06/08/2018. +// + +#include "../../../common/common.h" + +#include "OpenCLKernel.h" + +string OpenCLKernel = R"OCL( +#define THREADS_PER_LANE 32 +#define BLOCK_SIZE_ULONG 128 +#define BLOCK_SIZE_UINT 256 +#define ARGON2_PREHASH_DIGEST_LENGTH_UINT 16 +#define ARGON2_PREHASH_SEED_LENGTH_UINT 18 + +#define ARGON2_BLOCK_SIZE 1024 +#define ARGON2_DWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 4) + +#define BLAKE_SHARED_MEM_ULONG 76 + +#define ARGON2_RAW_LENGTH 8 + +#define ARGON2_TYPE_VALUE 2 +#define ARGON2_VERSION 0x13 + +#define BLOCK_BYTES 32 +#define OUT_BYTES 16 + +#define G(m, r, i, a, b, c, d) \ +do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ +} while ((void)0, 0) + +#define G_S(m, a, b, c, d) \ +do { \ + a = a + b + m; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ +} while ((void)0, 0) + +#define ROUND(m, t, r, shfl) \ +do { \ + G(m, r, t, v0, v1, v2, v3); \ + shfl[t + 4] = v1; \ + shfl[t + 8] = v2; \ + shfl[t + 12] = v3; \ + barrier(CLK_LOCAL_MEM_FENCE); \ + v1 = shfl[((t + 1) % 4)+ 4]; \ + v2 = shfl[((t + 2) % 4)+ 8]; \ + v3 = shfl[((t + 3) % 4)+ 12]; \ + G(m, r, (t + 4), v0, v1, v2, v3); \ + shfl[((t + 1) % 4)+ 4] = v1; \ + shfl[((t + 2) % 4)+ 8] = v2; \ + shfl[((t + 3) % 4)+ 12] = v3; \ + barrier(CLK_LOCAL_MEM_FENCE); \ + v1 = shfl[t + 4]; \ + v2 = shfl[t + 8]; \ + v3 = shfl[t + 12]; \ +} while ((void)0, 0) + +#define ROUND_S(m, t, shfl) \ +do { \ + G_S(m, v0, v1, v2, v3); \ + shfl[t + 4] = v1; \ + shfl[t + 8] = v2; \ + shfl[t + 12] = v3; \ + barrier(CLK_LOCAL_MEM_FENCE); \ + v1 = shfl[((t + 1) % 4)+ 4]; \ + v2 = shfl[((t + 2) % 4)+ 8]; \ + v3 = shfl[((t + 3) % 4)+ 12]; \ + G_S(m, v0, v1, v2, v3); \ + shfl[((t + 1) % 4)+ 4] = v1; \ + shfl[((t + 2) % 4)+ 8] = v2; \ + shfl[((t + 3) % 4)+ 12] = v3; \ + barrier(CLK_LOCAL_MEM_FENCE); \ + v1 = shfl[t + 4]; \ + v2 = shfl[t + 8]; \ + v3 = shfl[t + 12]; \ +} while ((void)0, 0) + +ulong rotr64(ulong x, ulong n) +{ + return rotate(x, 64 - n); +} + +__constant ulong blake2b_IV[8] = { + 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, + 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, + 0x510E527FADE682D1, 0x9B05688C2B3E6C1F, + 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179 +}; + +__constant uint blake2b_sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, +}; + +void blake2b_compress(__local ulong *h, __local ulong *m, ulong f0, __local ulong *shfl, int thr_id) +{ + ulong v0, v1, v2, v3; + + barrier(CLK_LOCAL_MEM_FENCE); + + v0 = h[thr_id]; + v1 = h[thr_id + 4]; + v2 = blake2b_IV[thr_id]; + v3 = blake2b_IV[thr_id + 4]; + + if(thr_id == 0) v3 ^= h[8]; + if(thr_id == 1) v3 ^= h[9]; + if(thr_id == 2) v3 ^= f0; + + ROUND(m, thr_id, 0, shfl); + ROUND(m, thr_id, 1, shfl); + ROUND(m, thr_id, 2, shfl); + ROUND(m, thr_id, 3, shfl); + ROUND(m, thr_id, 4, shfl); + ROUND(m, thr_id, 5, shfl); + ROUND(m, thr_id, 6, shfl); + ROUND(m, thr_id, 7, shfl); + ROUND(m, thr_id, 8, shfl); + ROUND(m, thr_id, 9, shfl); + ROUND(m, thr_id, 10, shfl); + ROUND(m, thr_id, 11, shfl); + + h[thr_id] ^= v0 ^ v2; + h[thr_id + 4] ^= v1 ^ v3; +} + +void blake2b_compress_static(__local ulong *h, ulong m, ulong f0, __local ulong *shfl, int thr_id) +{ + ulong v0, v1, v2, v3; + + barrier(CLK_LOCAL_MEM_FENCE); + + v0 = h[thr_id]; + v1 = h[thr_id + 4]; + v2 = blake2b_IV[thr_id]; + v3 = blake2b_IV[thr_id + 4]; + + if(thr_id == 0) v3 ^= h[8]; + if(thr_id == 1) v3 ^= h[9]; + if(thr_id == 2) v3 ^= f0; + + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + ROUND_S(m, thr_id, shfl); + + h[thr_id] ^= v0 ^ v2; + h[thr_id + 4] ^= v1 ^ v3; +} + +void blake2b_incrementCounter(__local ulong *h, int inc) +{ + h[8] += (inc * 4); + h[9] += (h[8] < (inc * 4)); +} + +void blake2b_final_global(__global uint *out, int out_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + int left = BLOCK_BYTES - buf_len; + __local uint *cursor_out_local = buf + buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out_local += 4) { + cursor_out_local[thr_id] = 0; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out_local[i] = 0; + } + blake2b_incrementCounter(h, buf_len); + } + + blake2b_compress(h, (__local ulong *)buf, 0xFFFFFFFFFFFFFFFF, shfl, thr_id); + + __local uint *cursor_in = (__local uint *)h; + __global uint *cursor_out_global = out; + + for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out_global += 4) { + cursor_out_global[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (out_len % 4); i++) { + cursor_out_global[i] = cursor_in[i]; + } + } +} + +void blake2b_final_local(__local uint *out, int out_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + int left = BLOCK_BYTES - buf_len; + __local uint *cursor_out = buf + buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = 0; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = 0; + } + blake2b_incrementCounter(h, buf_len); + } + + blake2b_compress(h, (__local ulong *)buf, 0xFFFFFFFFFFFFFFFF, shfl, thr_id); + + __local uint *cursor_in = (__local uint *)h; + cursor_out = out; + + for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (out_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } +} + +int blake2b_update_global(__global uint *in, int in_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + __global uint *cursor_in = in; + __local uint *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + buf_len = 0; + + in_len -= left; + in += left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + cursor_in = in; + cursor_out = buf; + + for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + in_len -= BLOCK_BYTES; + in += BLOCK_BYTES; + } + } + + cursor_in = in; + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } + + return buf_len + in_len; +} + +int blake2b_update_static(uint in, int in_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + ulong in64 = in; + in64 = in64 << 32; + in64 = in64 | in; + + __local uint *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = in; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = in; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + buf_len = 0; + + in_len -= left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + blake2b_compress_static(h, in64, 0, shfl, thr_id); + + in_len -= BLOCK_BYTES; + } + } + + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_out += 4) { + cursor_out[thr_id] = in; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = in; + } + } + + return buf_len + in_len; +} + +int blake2b_update_local(__local uint *in, int in_len, __local ulong *h, __local uint *buf, int buf_len, __local ulong *shfl, int thr_id) +{ + __local uint *cursor_in = in; + __local uint *cursor_out = buf + buf_len; + + if (buf_len + in_len > BLOCK_BYTES) { + int left = BLOCK_BYTES - buf_len; + + for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (left % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + blake2b_incrementCounter(h, BLOCK_BYTES); + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + buf_len = 0; + + in_len -= left; + in += left; + + while (in_len > BLOCK_BYTES) { + if(thr_id == 0) + blake2b_incrementCounter(h, BLOCK_BYTES); + + cursor_in = in; + cursor_out = buf; + + for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + blake2b_compress(h, (__local ulong *)buf, 0, shfl, thr_id); + + in_len -= BLOCK_BYTES; + in += BLOCK_BYTES; + } + } + + cursor_in = in; + cursor_out = buf + buf_len; + + for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (in_len % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + } + + return buf_len + in_len; +} + +int blake2b_init(__local ulong *h, int out_len, int thr_id) +{ + h[thr_id * 2] = blake2b_IV[thr_id * 2]; + h[thr_id * 2 + 1] = blake2b_IV[thr_id * 2 + 1]; + + if(thr_id == 0) { + h[8] = h[9] = 0; + h[0] = 0x6A09E667F3BCC908 ^ ((out_len * 4) | (1 << 16) | (1 << 24)); + } + + return 0; +} + +void blake2b_digestLong_global(__global uint *out, int out_len, + __global uint *in, int in_len, + int thr_id, __local ulong* shared) +{ + __local ulong *h = shared; + __local ulong *shfl = &h[10]; + __local uint *buf = (__local uint *)&shfl[16]; + __local uint *out_buffer = &buf[32]; + int buf_len; + + if(thr_id == 0) buf[0] = (out_len * 4); + buf_len = 1; + + if (out_len <= OUT_BYTES) { + blake2b_init(h, out_len, thr_id); + buf_len = blake2b_update_global(in, in_len, h, buf, buf_len, shfl, thr_id); + blake2b_final_global(out, out_len, h, buf, buf_len, shfl, thr_id); + } else { + __local uint *cursor_in = out_buffer; + __global uint *cursor_out = out; + + blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update_global(in, in_len, h, buf, buf_len, shfl, thr_id); + blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + + int to_produce = out_len - OUT_BYTES / 2; + while (to_produce > OUT_BYTES) { + buf_len = blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + + cursor_out = out; + cursor_in = out_buffer; + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + to_produce -= OUT_BYTES / 2; + } + + buf_len = blake2b_init(h, to_produce, thr_id); + buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + blake2b_final_global(out, to_produce, h, buf, buf_len, shfl, thr_id); + } +} + +void blake2b_digestLong_local(__global uint *out, int out_len, + __local uint *in, int in_len, + int thr_id, __local ulong* shared) +{ + __local ulong *h = shared; + __local ulong *shfl = &h[10]; + __local uint *buf = (__local uint *)&shfl[16]; + __local uint *out_buffer = &buf[32]; + int buf_len; + + if(thr_id == 0) buf[0] = (out_len * 4); + buf_len = 1; + + if (out_len <= OUT_BYTES) { + blake2b_init(h, out_len, thr_id); + buf_len = blake2b_update_local(in, in_len, h, buf, buf_len, shfl, thr_id); + blake2b_final_global(out, out_len, h, buf, buf_len, shfl, thr_id); + } else { + __local uint *cursor_in = out_buffer; + __global uint *cursor_out = out; + + blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update_local(in, in_len, h, buf, buf_len, shfl, thr_id); + blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + + int to_produce = out_len - OUT_BYTES / 2; + while (to_produce > OUT_BYTES) { + buf_len = blake2b_init(h, OUT_BYTES, thr_id); + buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + blake2b_final_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + + cursor_out = out; + cursor_in = out_buffer; + for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + out += OUT_BYTES / 2; + to_produce -= OUT_BYTES / 2; + } + + buf_len = blake2b_init(h, to_produce, thr_id); + buf_len = blake2b_update_local(out_buffer, OUT_BYTES, h, buf, buf_len, shfl, thr_id); + blake2b_final_global(out, to_produce, h, buf, buf_len, shfl, thr_id); + } +} + +#define fBlaMka(x, y) ((x) + (y) + 2 * upsample(mul_hi((uint)(x), (uint)(y)), (uint)(x) * (uint)y)) + +#define COMPUTE \ + a = fBlaMka(a, b); \ + d = rotate(d ^ a, (ulong)32); \ + c = fBlaMka(c, d); \ + b = rotate(b ^ c, (ulong)40); \ + a = fBlaMka(a, b); \ + d = rotate(d ^ a, (ulong)48); \ + c = fBlaMka(c, d); \ + b = rotate(b ^ c, (ulong)1); + +__constant char offsets_round_1[32][4] = { + { 0, 4, 8, 12 }, + { 1, 5, 9, 13 }, + { 2, 6, 10, 14 }, + { 3, 7, 11, 15 }, + { 16, 20, 24, 28 }, + { 17, 21, 25, 29 }, + { 18, 22, 26, 30 }, + { 19, 23, 27, 31 }, + { 32, 36, 40, 44 }, + { 33, 37, 41, 45 }, + { 34, 38, 42, 46 }, + { 35, 39, 43, 47 }, + { 48, 52, 56, 60 }, + { 49, 53, 57, 61 }, + { 50, 54, 58, 62 }, + { 51, 55, 59, 63 }, + { 64, 68, 72, 76 }, + { 65, 69, 73, 77 }, + { 66, 70, 74, 78 }, + { 67, 71, 75, 79 }, + { 80, 84, 88, 92 }, + { 81, 85, 89, 93 }, + { 82, 86, 90, 94 }, + { 83, 87, 91, 95 }, + { 96, 100, 104, 108 }, + { 97, 101, 105, 109 }, + { 98, 102, 106, 110 }, + { 99, 103, 107, 111 }, + { 112, 116, 120, 124 }, + { 113, 117, 121, 125 }, + { 114, 118, 122, 126 }, + { 115, 119, 123, 127 }, +}; + +__constant char offsets_round_2[32][4] = { + { 0, 5, 10, 15 }, + { 1, 6, 11, 12 }, + { 2, 7, 8, 13 }, + { 3, 4, 9, 14 }, + { 16, 21, 26, 31 }, + { 17, 22, 27, 28 }, + { 18, 23, 24, 29 }, + { 19, 20, 25, 30 }, + { 32, 37, 42, 47 }, + { 33, 38, 43, 44 }, + { 34, 39, 40, 45 }, + { 35, 36, 41, 46 }, + { 48, 53, 58, 63 }, + { 49, 54, 59, 60 }, + { 50, 55, 56, 61 }, + { 51, 52, 57, 62 }, + { 64, 69, 74, 79 }, + { 65, 70, 75, 76 }, + { 66, 71, 72, 77 }, + { 67, 68, 73, 78 }, + { 80, 85, 90, 95 }, + { 81, 86, 91, 92 }, + { 82, 87, 88, 93 }, + { 83, 84, 89, 94 }, + { 96, 101, 106, 111 }, + { 97, 102, 107, 108 }, + { 98, 103, 104, 109 }, + { 99, 100, 105, 110 }, + { 112, 117, 122, 127 }, + { 113, 118, 123, 124 }, + { 114, 119, 120, 125 }, + { 115, 116, 121, 126 }, +}; + +__constant char offsets_round_3[32][4] = { + { 0, 32, 64, 96 }, + { 1, 33, 65, 97 }, + { 16, 48, 80, 112 }, + { 17, 49, 81, 113 }, + { 2, 34, 66, 98 }, + { 3, 35, 67, 99 }, + { 18, 50, 82, 114 }, + { 19, 51, 83, 115 }, + { 4, 36, 68, 100 }, + { 5, 37, 69, 101 }, + { 20, 52, 84, 116 }, + { 21, 53, 85, 117 }, + { 6, 38, 70, 102 }, + { 7, 39, 71, 103 }, + { 22, 54, 86, 118 }, + { 23, 55, 87, 119 }, + { 8, 40, 72, 104 }, + { 9, 41, 73, 105 }, + { 24, 56, 88, 120 }, + { 25, 57, 89, 121 }, + { 10, 42, 74, 106 }, + { 11, 43, 75, 107 }, + { 26, 58, 90, 122 }, + { 27, 59, 91, 123 }, + { 12, 44, 76, 108 }, + { 13, 45, 77, 109 }, + { 28, 60, 92, 124 }, + { 29, 61, 93, 125 }, + { 14, 46, 78, 110 }, + { 15, 47, 79, 111 }, + { 30, 62, 94, 126 }, + { 31, 63, 95, 127 }, +}; + +__constant char offsets_round_4[32][4] = { + { 0, 33, 80, 113 }, + { 1, 48, 81, 96 }, + { 16, 49, 64, 97 }, + { 17, 32, 65, 112 }, + { 2, 35, 82, 115 }, + { 3, 50, 83, 98 }, + { 18, 51, 66, 99 }, + { 19, 34, 67, 114 }, + { 4, 37, 84, 117 }, + { 5, 52, 85, 100 }, + { 20, 53, 68, 101 }, + { 21, 36, 69, 116 }, + { 6, 39, 86, 119 }, + { 7, 54, 87, 102 }, + { 22, 55, 70, 103 }, + { 23, 38, 71, 118 }, + { 8, 41, 88, 121 }, + { 9, 56, 89, 104 }, + { 24, 57, 72, 105 }, + { 25, 40, 73, 120 }, + { 10, 43, 90, 123 }, + { 11, 58, 91, 106 }, + { 26, 59, 74, 107 }, + { 27, 42, 75, 122 }, + { 12, 45, 92, 125 }, + { 13, 60, 93, 108 }, + { 28, 61, 76, 109 }, + { 29, 44, 77, 124 }, + { 14, 47, 94, 127 }, + { 15, 62, 95, 110 }, + { 30, 63, 78, 111 }, + { 31, 46, 79, 126 }, +}; + +#define G1(data) \ +{ \ + barrier(CLK_LOCAL_MEM_FENCE); \ + a = data[i1_0]; \ + b = data[i1_1]; \ + c = data[i1_2]; \ + d = data[i1_3]; \ + COMPUTE \ + data[i1_1] = b; \ + data[i1_2] = c; \ + data[i1_3] = d; \ + barrier(CLK_LOCAL_MEM_FENCE); \ +} + +#define G2(data) \ +{ \ + b = data[i2_1]; \ + c = data[i2_2]; \ + d = data[i2_3]; \ + COMPUTE \ + data[i2_0] = a; \ + data[i2_1] = b; \ + data[i2_2] = c; \ + data[i2_3] = d; \ + barrier(CLK_LOCAL_MEM_FENCE); \ +} + +#define G3(data) \ +{ \ + a = data[i3_0]; \ + b = data[i3_1]; \ + c = data[i3_2]; \ + d = data[i3_3]; \ + COMPUTE \ + data[i3_1] = b; \ + data[i3_2] = c; \ + data[i3_3] = d; \ + barrier(CLK_LOCAL_MEM_FENCE); \ +} + +#define G4(data) \ +{ \ + b = data[i4_1]; \ + c = data[i4_2]; \ + d = data[i4_3]; \ + COMPUTE \ + data[i4_0] = a; \ + data[i4_1] = b; \ + data[i4_2] = c; \ + data[i4_3] = d; \ + barrier(CLK_LOCAL_MEM_FENCE); \ +} + +__kernel void fill_blocks(__global ulong *chunk_0, + __global ulong *chunk_1, + __global ulong *chunk_2, + __global ulong *chunk_3, + __global ulong *chunk_4, + __global ulong *chunk_5, + __global ulong *seed, + __global ulong *out, + __global uint *refs, + __global uint *idxs, + __global uint *segments, + int memsize, + int lanes, + int seg_length, + int seg_count, + int threads_per_chunk, + __local ulong *scratchpad) { // lanes * BLOCK_SIZE_ULONG + ulong4 tmp; + ulong a, b, c, d; + + int hash = get_group_id(0); + int local_id = get_local_id(0); + + int id = local_id % THREADS_PER_LANE; + int lane = local_id / THREADS_PER_LANE; + int lane_length = seg_length * 4; + + ulong chunks[6]; + chunks[0] = (ulong)chunk_0; + chunks[1] = (ulong)chunk_1; + chunks[2] = (ulong)chunk_2; + chunks[3] = (ulong)chunk_3; + chunks[4] = (ulong)chunk_4; + chunks[5] = (ulong)chunk_5; + int chunk_index = hash / threads_per_chunk; + int chunk_offset = hash - chunk_index * threads_per_chunk; + __global ulong *memory = (__global ulong *)chunks[chunk_index] + chunk_offset * (memsize / 8); + + int i1_0 = offsets_round_1[id][0]; + int i1_1 = offsets_round_1[id][1]; + int i1_2 = offsets_round_1[id][2]; + int i1_3 = offsets_round_1[id][3]; + + int i2_0 = offsets_round_2[id][0]; + int i2_1 = offsets_round_2[id][1]; + int i2_2 = offsets_round_2[id][2]; + int i2_3 = offsets_round_2[id][3]; + + int i3_0 = offsets_round_3[id][0]; + int i3_1 = offsets_round_3[id][1]; + int i3_2 = offsets_round_3[id][2]; + int i3_3 = offsets_round_3[id][3]; + + int i4_0 = offsets_round_4[id][0]; + int i4_1 = offsets_round_4[id][1]; + int i4_2 = offsets_round_4[id][2]; + int i4_3 = offsets_round_4[id][3]; + + __global ulong *out_mem = out + hash * BLOCK_SIZE_ULONG; + __global ulong *seed_mem = seed + hash * lanes * 2 * BLOCK_SIZE_ULONG + lane * 2 * BLOCK_SIZE_ULONG; + + __global ulong *seed_dst = memory + lane * lane_length * BLOCK_SIZE_ULONG; + + vstore4(vload4(id, seed_mem), id, seed_dst); + + seed_mem += BLOCK_SIZE_ULONG; + seed_dst += BLOCK_SIZE_ULONG; + + vstore4(vload4(id, seed_mem), id, seed_dst); + + __global ulong *next_block; + __global ulong *prev_block; + __global uint *seg_refs; + __global uint *seg_idxs; + + __local ulong *state = scratchpad + lane * BLOCK_SIZE_ULONG; + + segments += (lane * 3); + + for(int s=0; s < (seg_count / lanes); s++) { + int idx = ((s == 0) ? 2 : 0); // index for first slice in each lane is 2 + + int with_xor = ((s >= 4) ? 1 : 0); + int keep = 1; + int slice = s % 4; + int pass = s / 4; + __global int *cur_seg = &segments[s * lanes * 3]; + + int cur_idx = cur_seg[0]; + int prev_idx = cur_seg[1]; + int seg_type = cur_seg[2]; + int ref_idx = 0; + ulong4 ref = 0, next = 0; + + prev_block = memory + prev_idx * BLOCK_SIZE_ULONG; + + tmp = vload4(id, prev_block); + + if(seg_type == 0) { + seg_refs = refs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2); + ref_idx = seg_refs[0]; + + if(idxs != 0) { + seg_idxs = idxs + ((s * lanes + lane) * seg_length - ((s > 0) ? lanes : lane) * 2); + cur_idx = seg_idxs[0]; + } + + ulong4 nextref = vload4(id, memory + ref_idx * BLOCK_SIZE_ULONG); + + for (int i=0;idx < seg_length;i++, idx++) { + next_block = memory + (cur_idx & 0x7FFFFFFF) * BLOCK_SIZE_ULONG; + + if(with_xor == 1) + next = vload4(id, next_block); + + ref = nextref; + + if (idx < seg_length - 1) { + ref_idx = seg_refs[i + 1]; + + if(idxs != 0) { + keep = cur_idx & 0x80000000; + cur_idx = seg_idxs[i + 1]; + } + else + cur_idx++; + + nextref = vload4(id, memory + ref_idx * BLOCK_SIZE_ULONG); + } + + tmp ^= ref; + + vstore4(tmp, id, state); + + G1(state); + G2(state); + G3(state); + G4(state); + + if(with_xor == 1) + tmp ^= next; + + tmp ^= vload4(id, state); + + if(keep > 0) { + vstore4(tmp, id, next_block); + barrier(CLK_GLOBAL_MEM_FENCE); + } + } + } + else { + vstore4(tmp, id, state); + barrier(CLK_LOCAL_MEM_FENCE); + + for (int i=0;idx < seg_length;i++, idx++, cur_idx++) { + ulong pseudo_rand = state[0]; + + ulong ref_lane = ((pseudo_rand >> 32)) % lanes; // thr_cost + uint reference_area_size = 0; + + if(pass > 0) { + if (lane == ref_lane) { + reference_area_size = lane_length - seg_length + idx - 1; + } else { + reference_area_size = lane_length - seg_length + ((idx == 0) ? (-1) : 0); + } + } + else { + if (lane == ref_lane) { + reference_area_size = slice * seg_length + idx - 1; // seg_length + } else { + reference_area_size = slice * seg_length + ((idx == 0) ? (-1) : 0); + } + } + + ulong relative_position = pseudo_rand & 0xFFFFFFFF; + relative_position = (relative_position * relative_position) >> 32; + + relative_position = reference_area_size - 1 - + ((reference_area_size * relative_position) >> 32); + + ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length; + + ref = vload4(id, memory + ref_idx * BLOCK_SIZE_ULONG); + + next_block = memory + cur_idx * BLOCK_SIZE_ULONG; + + if(with_xor == 1) + next = vload4(id, next_block); + + tmp ^= ref; + + vstore4(tmp, id, state); + + G1(state); + G2(state); + G3(state); + G4(state); + + if(with_xor == 1) + tmp ^= next; + + tmp ^= vload4(id, state); + + vstore4(tmp, id, state); + vstore4(tmp, id, next_block); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + } + } + + vstore4(tmp, id, state); + barrier(CLK_LOCAL_MEM_FENCE); + + if(lane == 0) { // first lane needs to acumulate results + for(int l=1; l> 2); i++, cursor_in += 4, cursor_out += 4) { + cursor_out[thr_id] = cursor_in[thr_id]; + } + + if(thr_id == 0) { + for (int i = 0; i < (pwdlen % 4); i++) { + cursor_out[i] = cursor_in[i]; + } + + uint nonce = (preseed[9] >> 24) | (preseed[10] << 8); + nonce += hash; + local_preseed[9] = (preseed[9] & 0x00FFFFFF) | (nonce << 24); + local_preseed[10] = (preseed[10] & 0xFF000000) | (nonce >> 8); + } + + int buf_len = blake2b_init(h, ARGON2_PREHASH_DIGEST_LENGTH_UINT, thr_id); + *value = lanes; //lanes + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = 32; //outlen + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = memsz; //m_cost + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = passes; //t_cost + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = ARGON2_VERSION; //version + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = ARGON2_TYPE_VALUE; //type + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + *value = pwdlen * 4; //pw_len + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + buf_len = blake2b_update_local(local_preseed, pwdlen, h, buf, buf_len, shfl, thr_id); + *value = saltlen * 4; //salt_len + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + buf_len = blake2b_update_local(local_preseed, saltlen, h, buf, buf_len, shfl, thr_id); + *value = 0; //secret_len + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + buf_len = blake2b_update_local(0, 0, h, buf, buf_len, shfl, thr_id); + *value = 0; //ad_len + buf_len = blake2b_update_local(value, 1, h, buf, buf_len, shfl, thr_id); + buf_len = blake2b_update_local(0, 0, h, buf, buf_len, shfl, thr_id); + + blake2b_final_local(local_mem, ARGON2_PREHASH_DIGEST_LENGTH_UINT, h, buf, buf_len, shfl, thr_id); + + if (thr_id == 0) { + local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT] = idx; + local_mem[ARGON2_PREHASH_DIGEST_LENGTH_UINT + 1] = lane; + } + + blake2b_digestLong_local(local_seed, ARGON2_DWORDS_IN_BLOCK, local_mem, ARGON2_PREHASH_SEED_LENGTH_UINT, thr_id, (__local ulong *)&local_mem[20]); + } +} + +__kernel void posthash ( + __global uint *hash, + __global uint *out, + __global uint *preseed, + __local ulong *blake_shared) { + + int hash_id = get_group_id(0); + int thread = get_local_id(0); + + __global uint *local_hash = hash + hash_id * (ARGON2_RAW_LENGTH + 1); + __global uint *local_out = out + hash_id * BLOCK_SIZE_UINT; + + blake2b_digestLong_global(local_hash, ARGON2_RAW_LENGTH, local_out, ARGON2_DWORDS_IN_BLOCK, thread, blake_shared); + if(thread == 0) { + uint nonce = (preseed[9] >> 24) | (preseed[10] << 8); + nonce += hash_id; + local_hash[ARGON2_RAW_LENGTH] = nonce; + } +} + +)OCL"; diff --git a/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h new file mode 100644 index 00000000..386659f8 --- /dev/null +++ b/src/crypto/argon2_hasher/hash/gpu/opencl/OpenCLKernel.h @@ -0,0 +1,10 @@ +// +// Created by Haifa Bogdan Adnan on 06/08/2018. +// + +#ifndef ARGON2_OPENCL_KERNEL_H +#define ARGON2_OPENCL_KERNEL_H + +extern string OpenCLKernel; + +#endif //ARGON2_OPENCL_KERNEL_H diff --git a/src/crypto/asm/CryptonightR_soft_aes_template.inc b/src/crypto/asm/CryptonightR_soft_aes_template.inc deleted file mode 100644 index e9e1bb4f..00000000 --- a/src/crypto/asm/CryptonightR_soft_aes_template.inc +++ /dev/null @@ -1,281 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1) -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop) -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2) -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3) -PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end) - -ALIGN(64) -FN_PREFIX(CryptonightR_soft_aes_template_part1): - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movq xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movq xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movq xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movq xmm10, QWORD PTR [r10+96] - movq xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movq xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movq xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -FN_PREFIX(CryptonightR_soft_aes_template_mainloop): - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movq xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movq xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movq r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - pxor xmm6, xmm1 - pxor xmm6, xmm0 - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movq rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movq rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - movaps xmm0, xmm5 - psrldq xmm0, 8 - movd r9d, xmm0 - -FN_PREFIX(CryptonightR_soft_aes_template_part2): - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov edi, edi - shl rbp, 32 - or rbp, rdi - xor r8, rbp - - mov ebx, ebx - shl rsi, 32 - or rsi, rbx - xor QWORD PTR [rsp+320], rsi - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm1 - paddq xmm1, xmm7 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm6, xmm0 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop) - -FN_PREFIX(CryptonightR_soft_aes_template_part3): - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -FN_PREFIX(CryptonightR_soft_aes_template_end): diff --git a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc deleted file mode 100644 index 589192ca..00000000 --- a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc +++ /dev/null @@ -1,281 +0,0 @@ -PUBLIC CryptonightR_soft_aes_template_part1 -PUBLIC CryptonightR_soft_aes_template_mainloop -PUBLIC CryptonightR_soft_aes_template_part2 -PUBLIC CryptonightR_soft_aes_template_part3 -PUBLIC CryptonightR_soft_aes_template_end - -ALIGN(64) -CryptonightR_soft_aes_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movq xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movq xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movq xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movq xmm10, QWORD PTR [r10+96] - movq xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movq xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movq xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -CryptonightR_soft_aes_template_mainloop: - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movq xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movq xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movq r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - pxor xmm6, xmm1 - pxor xmm6, xmm0 - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movq rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movq rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - movaps xmm0, xmm5 - psrldq xmm0, 8 - movd r9d, xmm0 - -CryptonightR_soft_aes_template_part2: - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov edi, edi - shl rbp, 32 - or rbp, rdi - xor r8, rbp - - mov ebx, ebx - shl rsi, 32 - or rsi, rbx - xor QWORD PTR [rsp+320], rsi - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm1 - paddq xmm1, xmm7 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm6, xmm0 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne CryptonightR_soft_aes_template_mainloop - -CryptonightR_soft_aes_template_part3: - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -CryptonightR_soft_aes_template_end: diff --git a/src/crypto/asm/CryptonightR_template.S b/src/crypto/asm/CryptonightR_template.S deleted file mode 100644 index d2974d16..00000000 --- a/src/crypto/asm/CryptonightR_template.S +++ /dev/null @@ -1,1595 +0,0 @@ -#ifdef __APPLE__ -# define ALIGN(x) .align 6 -#else -# define ALIGN(x) .align 64 -#endif -.intel_syntax noprefix -#ifdef __APPLE__ -# define FN_PREFIX(fn) _ ## fn -.text -#else -# define FN_PREFIX(fn) fn -.section .text -#endif - -#define PUBLIC .global - -PUBLIC FN_PREFIX(CryptonightR_instruction0) -PUBLIC FN_PREFIX(CryptonightR_instruction1) -PUBLIC FN_PREFIX(CryptonightR_instruction2) -PUBLIC FN_PREFIX(CryptonightR_instruction3) -PUBLIC FN_PREFIX(CryptonightR_instruction4) -PUBLIC FN_PREFIX(CryptonightR_instruction5) -PUBLIC FN_PREFIX(CryptonightR_instruction6) -PUBLIC FN_PREFIX(CryptonightR_instruction7) -PUBLIC FN_PREFIX(CryptonightR_instruction8) -PUBLIC FN_PREFIX(CryptonightR_instruction9) -PUBLIC FN_PREFIX(CryptonightR_instruction10) -PUBLIC FN_PREFIX(CryptonightR_instruction11) -PUBLIC FN_PREFIX(CryptonightR_instruction12) -PUBLIC FN_PREFIX(CryptonightR_instruction13) -PUBLIC FN_PREFIX(CryptonightR_instruction14) -PUBLIC FN_PREFIX(CryptonightR_instruction15) -PUBLIC FN_PREFIX(CryptonightR_instruction16) -PUBLIC FN_PREFIX(CryptonightR_instruction17) -PUBLIC FN_PREFIX(CryptonightR_instruction18) -PUBLIC FN_PREFIX(CryptonightR_instruction19) -PUBLIC FN_PREFIX(CryptonightR_instruction20) -PUBLIC FN_PREFIX(CryptonightR_instruction21) -PUBLIC FN_PREFIX(CryptonightR_instruction22) -PUBLIC FN_PREFIX(CryptonightR_instruction23) -PUBLIC FN_PREFIX(CryptonightR_instruction24) -PUBLIC FN_PREFIX(CryptonightR_instruction25) -PUBLIC FN_PREFIX(CryptonightR_instruction26) -PUBLIC FN_PREFIX(CryptonightR_instruction27) -PUBLIC FN_PREFIX(CryptonightR_instruction28) -PUBLIC FN_PREFIX(CryptonightR_instruction29) -PUBLIC FN_PREFIX(CryptonightR_instruction30) -PUBLIC FN_PREFIX(CryptonightR_instruction31) -PUBLIC FN_PREFIX(CryptonightR_instruction32) -PUBLIC FN_PREFIX(CryptonightR_instruction33) -PUBLIC FN_PREFIX(CryptonightR_instruction34) -PUBLIC FN_PREFIX(CryptonightR_instruction35) -PUBLIC FN_PREFIX(CryptonightR_instruction36) -PUBLIC FN_PREFIX(CryptonightR_instruction37) -PUBLIC FN_PREFIX(CryptonightR_instruction38) -PUBLIC FN_PREFIX(CryptonightR_instruction39) -PUBLIC FN_PREFIX(CryptonightR_instruction40) -PUBLIC FN_PREFIX(CryptonightR_instruction41) -PUBLIC FN_PREFIX(CryptonightR_instruction42) -PUBLIC FN_PREFIX(CryptonightR_instruction43) -PUBLIC FN_PREFIX(CryptonightR_instruction44) -PUBLIC FN_PREFIX(CryptonightR_instruction45) -PUBLIC FN_PREFIX(CryptonightR_instruction46) -PUBLIC FN_PREFIX(CryptonightR_instruction47) -PUBLIC FN_PREFIX(CryptonightR_instruction48) -PUBLIC FN_PREFIX(CryptonightR_instruction49) -PUBLIC FN_PREFIX(CryptonightR_instruction50) -PUBLIC FN_PREFIX(CryptonightR_instruction51) -PUBLIC FN_PREFIX(CryptonightR_instruction52) -PUBLIC FN_PREFIX(CryptonightR_instruction53) -PUBLIC FN_PREFIX(CryptonightR_instruction54) -PUBLIC FN_PREFIX(CryptonightR_instruction55) -PUBLIC FN_PREFIX(CryptonightR_instruction56) -PUBLIC FN_PREFIX(CryptonightR_instruction57) -PUBLIC FN_PREFIX(CryptonightR_instruction58) -PUBLIC FN_PREFIX(CryptonightR_instruction59) -PUBLIC FN_PREFIX(CryptonightR_instruction60) -PUBLIC FN_PREFIX(CryptonightR_instruction61) -PUBLIC FN_PREFIX(CryptonightR_instruction62) -PUBLIC FN_PREFIX(CryptonightR_instruction63) -PUBLIC FN_PREFIX(CryptonightR_instruction64) -PUBLIC FN_PREFIX(CryptonightR_instruction65) -PUBLIC FN_PREFIX(CryptonightR_instruction66) -PUBLIC FN_PREFIX(CryptonightR_instruction67) -PUBLIC FN_PREFIX(CryptonightR_instruction68) -PUBLIC FN_PREFIX(CryptonightR_instruction69) -PUBLIC FN_PREFIX(CryptonightR_instruction70) -PUBLIC FN_PREFIX(CryptonightR_instruction71) -PUBLIC FN_PREFIX(CryptonightR_instruction72) -PUBLIC FN_PREFIX(CryptonightR_instruction73) -PUBLIC FN_PREFIX(CryptonightR_instruction74) -PUBLIC FN_PREFIX(CryptonightR_instruction75) -PUBLIC FN_PREFIX(CryptonightR_instruction76) -PUBLIC FN_PREFIX(CryptonightR_instruction77) -PUBLIC FN_PREFIX(CryptonightR_instruction78) -PUBLIC FN_PREFIX(CryptonightR_instruction79) -PUBLIC FN_PREFIX(CryptonightR_instruction80) -PUBLIC FN_PREFIX(CryptonightR_instruction81) -PUBLIC FN_PREFIX(CryptonightR_instruction82) -PUBLIC FN_PREFIX(CryptonightR_instruction83) -PUBLIC FN_PREFIX(CryptonightR_instruction84) -PUBLIC FN_PREFIX(CryptonightR_instruction85) -PUBLIC FN_PREFIX(CryptonightR_instruction86) -PUBLIC FN_PREFIX(CryptonightR_instruction87) -PUBLIC FN_PREFIX(CryptonightR_instruction88) -PUBLIC FN_PREFIX(CryptonightR_instruction89) -PUBLIC FN_PREFIX(CryptonightR_instruction90) -PUBLIC FN_PREFIX(CryptonightR_instruction91) -PUBLIC FN_PREFIX(CryptonightR_instruction92) -PUBLIC FN_PREFIX(CryptonightR_instruction93) -PUBLIC FN_PREFIX(CryptonightR_instruction94) -PUBLIC FN_PREFIX(CryptonightR_instruction95) -PUBLIC FN_PREFIX(CryptonightR_instruction96) -PUBLIC FN_PREFIX(CryptonightR_instruction97) -PUBLIC FN_PREFIX(CryptonightR_instruction98) -PUBLIC FN_PREFIX(CryptonightR_instruction99) -PUBLIC FN_PREFIX(CryptonightR_instruction100) -PUBLIC FN_PREFIX(CryptonightR_instruction101) -PUBLIC FN_PREFIX(CryptonightR_instruction102) -PUBLIC FN_PREFIX(CryptonightR_instruction103) -PUBLIC FN_PREFIX(CryptonightR_instruction104) -PUBLIC FN_PREFIX(CryptonightR_instruction105) -PUBLIC FN_PREFIX(CryptonightR_instruction106) -PUBLIC FN_PREFIX(CryptonightR_instruction107) -PUBLIC FN_PREFIX(CryptonightR_instruction108) -PUBLIC FN_PREFIX(CryptonightR_instruction109) -PUBLIC FN_PREFIX(CryptonightR_instruction110) -PUBLIC FN_PREFIX(CryptonightR_instruction111) -PUBLIC FN_PREFIX(CryptonightR_instruction112) -PUBLIC FN_PREFIX(CryptonightR_instruction113) -PUBLIC FN_PREFIX(CryptonightR_instruction114) -PUBLIC FN_PREFIX(CryptonightR_instruction115) -PUBLIC FN_PREFIX(CryptonightR_instruction116) -PUBLIC FN_PREFIX(CryptonightR_instruction117) -PUBLIC FN_PREFIX(CryptonightR_instruction118) -PUBLIC FN_PREFIX(CryptonightR_instruction119) -PUBLIC FN_PREFIX(CryptonightR_instruction120) -PUBLIC FN_PREFIX(CryptonightR_instruction121) -PUBLIC FN_PREFIX(CryptonightR_instruction122) -PUBLIC FN_PREFIX(CryptonightR_instruction123) -PUBLIC FN_PREFIX(CryptonightR_instruction124) -PUBLIC FN_PREFIX(CryptonightR_instruction125) -PUBLIC FN_PREFIX(CryptonightR_instruction126) -PUBLIC FN_PREFIX(CryptonightR_instruction127) -PUBLIC FN_PREFIX(CryptonightR_instruction128) -PUBLIC FN_PREFIX(CryptonightR_instruction129) -PUBLIC FN_PREFIX(CryptonightR_instruction130) -PUBLIC FN_PREFIX(CryptonightR_instruction131) -PUBLIC FN_PREFIX(CryptonightR_instruction132) -PUBLIC FN_PREFIX(CryptonightR_instruction133) -PUBLIC FN_PREFIX(CryptonightR_instruction134) -PUBLIC FN_PREFIX(CryptonightR_instruction135) -PUBLIC FN_PREFIX(CryptonightR_instruction136) -PUBLIC FN_PREFIX(CryptonightR_instruction137) -PUBLIC FN_PREFIX(CryptonightR_instruction138) -PUBLIC FN_PREFIX(CryptonightR_instruction139) -PUBLIC FN_PREFIX(CryptonightR_instruction140) -PUBLIC FN_PREFIX(CryptonightR_instruction141) -PUBLIC FN_PREFIX(CryptonightR_instruction142) -PUBLIC FN_PREFIX(CryptonightR_instruction143) -PUBLIC FN_PREFIX(CryptonightR_instruction144) -PUBLIC FN_PREFIX(CryptonightR_instruction145) -PUBLIC FN_PREFIX(CryptonightR_instruction146) -PUBLIC FN_PREFIX(CryptonightR_instruction147) -PUBLIC FN_PREFIX(CryptonightR_instruction148) -PUBLIC FN_PREFIX(CryptonightR_instruction149) -PUBLIC FN_PREFIX(CryptonightR_instruction150) -PUBLIC FN_PREFIX(CryptonightR_instruction151) -PUBLIC FN_PREFIX(CryptonightR_instruction152) -PUBLIC FN_PREFIX(CryptonightR_instruction153) -PUBLIC FN_PREFIX(CryptonightR_instruction154) -PUBLIC FN_PREFIX(CryptonightR_instruction155) -PUBLIC FN_PREFIX(CryptonightR_instruction156) -PUBLIC FN_PREFIX(CryptonightR_instruction157) -PUBLIC FN_PREFIX(CryptonightR_instruction158) -PUBLIC FN_PREFIX(CryptonightR_instruction159) -PUBLIC FN_PREFIX(CryptonightR_instruction160) -PUBLIC FN_PREFIX(CryptonightR_instruction161) -PUBLIC FN_PREFIX(CryptonightR_instruction162) -PUBLIC FN_PREFIX(CryptonightR_instruction163) -PUBLIC FN_PREFIX(CryptonightR_instruction164) -PUBLIC FN_PREFIX(CryptonightR_instruction165) -PUBLIC FN_PREFIX(CryptonightR_instruction166) -PUBLIC FN_PREFIX(CryptonightR_instruction167) -PUBLIC FN_PREFIX(CryptonightR_instruction168) -PUBLIC FN_PREFIX(CryptonightR_instruction169) -PUBLIC FN_PREFIX(CryptonightR_instruction170) -PUBLIC FN_PREFIX(CryptonightR_instruction171) -PUBLIC FN_PREFIX(CryptonightR_instruction172) -PUBLIC FN_PREFIX(CryptonightR_instruction173) -PUBLIC FN_PREFIX(CryptonightR_instruction174) -PUBLIC FN_PREFIX(CryptonightR_instruction175) -PUBLIC FN_PREFIX(CryptonightR_instruction176) -PUBLIC FN_PREFIX(CryptonightR_instruction177) -PUBLIC FN_PREFIX(CryptonightR_instruction178) -PUBLIC FN_PREFIX(CryptonightR_instruction179) -PUBLIC FN_PREFIX(CryptonightR_instruction180) -PUBLIC FN_PREFIX(CryptonightR_instruction181) -PUBLIC FN_PREFIX(CryptonightR_instruction182) -PUBLIC FN_PREFIX(CryptonightR_instruction183) -PUBLIC FN_PREFIX(CryptonightR_instruction184) -PUBLIC FN_PREFIX(CryptonightR_instruction185) -PUBLIC FN_PREFIX(CryptonightR_instruction186) -PUBLIC FN_PREFIX(CryptonightR_instruction187) -PUBLIC FN_PREFIX(CryptonightR_instruction188) -PUBLIC FN_PREFIX(CryptonightR_instruction189) -PUBLIC FN_PREFIX(CryptonightR_instruction190) -PUBLIC FN_PREFIX(CryptonightR_instruction191) -PUBLIC FN_PREFIX(CryptonightR_instruction192) -PUBLIC FN_PREFIX(CryptonightR_instruction193) -PUBLIC FN_PREFIX(CryptonightR_instruction194) -PUBLIC FN_PREFIX(CryptonightR_instruction195) -PUBLIC FN_PREFIX(CryptonightR_instruction196) -PUBLIC FN_PREFIX(CryptonightR_instruction197) -PUBLIC FN_PREFIX(CryptonightR_instruction198) -PUBLIC FN_PREFIX(CryptonightR_instruction199) -PUBLIC FN_PREFIX(CryptonightR_instruction200) -PUBLIC FN_PREFIX(CryptonightR_instruction201) -PUBLIC FN_PREFIX(CryptonightR_instruction202) -PUBLIC FN_PREFIX(CryptonightR_instruction203) -PUBLIC FN_PREFIX(CryptonightR_instruction204) -PUBLIC FN_PREFIX(CryptonightR_instruction205) -PUBLIC FN_PREFIX(CryptonightR_instruction206) -PUBLIC FN_PREFIX(CryptonightR_instruction207) -PUBLIC FN_PREFIX(CryptonightR_instruction208) -PUBLIC FN_PREFIX(CryptonightR_instruction209) -PUBLIC FN_PREFIX(CryptonightR_instruction210) -PUBLIC FN_PREFIX(CryptonightR_instruction211) -PUBLIC FN_PREFIX(CryptonightR_instruction212) -PUBLIC FN_PREFIX(CryptonightR_instruction213) -PUBLIC FN_PREFIX(CryptonightR_instruction214) -PUBLIC FN_PREFIX(CryptonightR_instruction215) -PUBLIC FN_PREFIX(CryptonightR_instruction216) -PUBLIC FN_PREFIX(CryptonightR_instruction217) -PUBLIC FN_PREFIX(CryptonightR_instruction218) -PUBLIC FN_PREFIX(CryptonightR_instruction219) -PUBLIC FN_PREFIX(CryptonightR_instruction220) -PUBLIC FN_PREFIX(CryptonightR_instruction221) -PUBLIC FN_PREFIX(CryptonightR_instruction222) -PUBLIC FN_PREFIX(CryptonightR_instruction223) -PUBLIC FN_PREFIX(CryptonightR_instruction224) -PUBLIC FN_PREFIX(CryptonightR_instruction225) -PUBLIC FN_PREFIX(CryptonightR_instruction226) -PUBLIC FN_PREFIX(CryptonightR_instruction227) -PUBLIC FN_PREFIX(CryptonightR_instruction228) -PUBLIC FN_PREFIX(CryptonightR_instruction229) -PUBLIC FN_PREFIX(CryptonightR_instruction230) -PUBLIC FN_PREFIX(CryptonightR_instruction231) -PUBLIC FN_PREFIX(CryptonightR_instruction232) -PUBLIC FN_PREFIX(CryptonightR_instruction233) -PUBLIC FN_PREFIX(CryptonightR_instruction234) -PUBLIC FN_PREFIX(CryptonightR_instruction235) -PUBLIC FN_PREFIX(CryptonightR_instruction236) -PUBLIC FN_PREFIX(CryptonightR_instruction237) -PUBLIC FN_PREFIX(CryptonightR_instruction238) -PUBLIC FN_PREFIX(CryptonightR_instruction239) -PUBLIC FN_PREFIX(CryptonightR_instruction240) -PUBLIC FN_PREFIX(CryptonightR_instruction241) -PUBLIC FN_PREFIX(CryptonightR_instruction242) -PUBLIC FN_PREFIX(CryptonightR_instruction243) -PUBLIC FN_PREFIX(CryptonightR_instruction244) -PUBLIC FN_PREFIX(CryptonightR_instruction245) -PUBLIC FN_PREFIX(CryptonightR_instruction246) -PUBLIC FN_PREFIX(CryptonightR_instruction247) -PUBLIC FN_PREFIX(CryptonightR_instruction248) -PUBLIC FN_PREFIX(CryptonightR_instruction249) -PUBLIC FN_PREFIX(CryptonightR_instruction250) -PUBLIC FN_PREFIX(CryptonightR_instruction251) -PUBLIC FN_PREFIX(CryptonightR_instruction252) -PUBLIC FN_PREFIX(CryptonightR_instruction253) -PUBLIC FN_PREFIX(CryptonightR_instruction254) -PUBLIC FN_PREFIX(CryptonightR_instruction255) -PUBLIC FN_PREFIX(CryptonightR_instruction256) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov0) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov1) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov2) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov3) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov4) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov5) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov6) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov7) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov8) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov9) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov10) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov11) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov12) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov13) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov14) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov15) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov16) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov17) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov18) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov19) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov20) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov21) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov22) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov23) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov24) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov25) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov26) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov27) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov28) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov29) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov30) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov31) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov32) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov33) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov34) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov35) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov36) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov37) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov38) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov39) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov40) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov41) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov42) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov43) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov44) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov45) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov46) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov47) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov48) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov49) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov50) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov51) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov52) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov53) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov54) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov55) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov56) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov57) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov58) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov59) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov60) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov61) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov62) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov63) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov64) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov65) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov66) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov67) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov68) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov69) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov70) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov71) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov72) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov73) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov74) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov75) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov76) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov77) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov78) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov79) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov80) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov81) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov82) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov83) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov84) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov85) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov86) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov87) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov88) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov89) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov90) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov91) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov92) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov93) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov94) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov95) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov96) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov97) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov98) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov99) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov100) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov101) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov102) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov103) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov104) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov105) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov106) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov107) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov108) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov109) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov110) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov111) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov112) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov113) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov114) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov115) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov116) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov117) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov118) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov119) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov120) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov121) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov122) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov123) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov124) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov125) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov126) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov127) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov128) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov129) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov130) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov131) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov132) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov133) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov134) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov135) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov136) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov137) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov138) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov139) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov140) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov141) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov142) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov143) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov144) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov145) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov146) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov147) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov148) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov149) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov150) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov151) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov152) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov153) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov154) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov155) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov156) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov157) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov158) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov159) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov160) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov161) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov162) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov163) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov164) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov165) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov166) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov167) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov168) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov169) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov170) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov171) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov172) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov173) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov174) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov175) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov176) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov177) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov178) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov179) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov180) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov181) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov182) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov183) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov184) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov185) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov186) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov187) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov188) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov189) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov190) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov191) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov192) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov193) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov194) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov195) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov196) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov197) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov198) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov199) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov200) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov201) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov202) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov203) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov204) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov205) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov206) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov207) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov208) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov209) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov210) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov211) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov212) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov213) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov214) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov215) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov216) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov217) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov218) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov219) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov220) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov221) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov222) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov223) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov224) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov225) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov226) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov227) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov228) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov229) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov230) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov231) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov232) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov233) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov234) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov235) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov236) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov237) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov238) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov239) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov240) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov241) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov242) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov243) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov244) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov245) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov246) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov247) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov248) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov249) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov250) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov251) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov252) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov253) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov254) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov255) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov256) - -#include "CryptonightWOW_template.inc" -#include "CryptonightR_template.inc" -#include "CryptonightWOW_soft_aes_template.inc" -#include "CryptonightR_soft_aes_template.inc" - -FN_PREFIX(CryptonightR_instruction0): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction1): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction2): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction3): - add rbx, r9 - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction4): - sub rbx, r9 -FN_PREFIX(CryptonightR_instruction5): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction6): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction7): - xor rbx, r9 -FN_PREFIX(CryptonightR_instruction8): - imul rsi, rbx -FN_PREFIX(CryptonightR_instruction9): - imul rsi, rbx -FN_PREFIX(CryptonightR_instruction10): - imul rsi, rbx -FN_PREFIX(CryptonightR_instruction11): - add rsi, rbx - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction12): - sub rsi, rbx -FN_PREFIX(CryptonightR_instruction13): - ror esi, cl -FN_PREFIX(CryptonightR_instruction14): - rol esi, cl -FN_PREFIX(CryptonightR_instruction15): - xor rsi, rbx -FN_PREFIX(CryptonightR_instruction16): - imul rdi, rbx -FN_PREFIX(CryptonightR_instruction17): - imul rdi, rbx -FN_PREFIX(CryptonightR_instruction18): - imul rdi, rbx -FN_PREFIX(CryptonightR_instruction19): - add rdi, rbx - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction20): - sub rdi, rbx -FN_PREFIX(CryptonightR_instruction21): - ror edi, cl -FN_PREFIX(CryptonightR_instruction22): - rol edi, cl -FN_PREFIX(CryptonightR_instruction23): - xor rdi, rbx -FN_PREFIX(CryptonightR_instruction24): - imul rbp, rbx -FN_PREFIX(CryptonightR_instruction25): - imul rbp, rbx -FN_PREFIX(CryptonightR_instruction26): - imul rbp, rbx -FN_PREFIX(CryptonightR_instruction27): - add rbp, rbx - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction28): - sub rbp, rbx -FN_PREFIX(CryptonightR_instruction29): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction30): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction31): - xor rbp, rbx -FN_PREFIX(CryptonightR_instruction32): - imul rbx, rsi -FN_PREFIX(CryptonightR_instruction33): - imul rbx, rsi -FN_PREFIX(CryptonightR_instruction34): - imul rbx, rsi -FN_PREFIX(CryptonightR_instruction35): - add rbx, rsi - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction36): - sub rbx, rsi -FN_PREFIX(CryptonightR_instruction37): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction38): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction39): - xor rbx, rsi -FN_PREFIX(CryptonightR_instruction40): - imul rsi, rsi -FN_PREFIX(CryptonightR_instruction41): - imul rsi, rsi -FN_PREFIX(CryptonightR_instruction42): - imul rsi, rsi -FN_PREFIX(CryptonightR_instruction43): - add rsi, r9 - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction44): - sub rsi, r9 -FN_PREFIX(CryptonightR_instruction45): - ror esi, cl -FN_PREFIX(CryptonightR_instruction46): - rol esi, cl -FN_PREFIX(CryptonightR_instruction47): - xor rsi, r9 -FN_PREFIX(CryptonightR_instruction48): - imul rdi, rsi -FN_PREFIX(CryptonightR_instruction49): - imul rdi, rsi -FN_PREFIX(CryptonightR_instruction50): - imul rdi, rsi -FN_PREFIX(CryptonightR_instruction51): - add rdi, rsi - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction52): - sub rdi, rsi -FN_PREFIX(CryptonightR_instruction53): - ror edi, cl -FN_PREFIX(CryptonightR_instruction54): - rol edi, cl -FN_PREFIX(CryptonightR_instruction55): - xor rdi, rsi -FN_PREFIX(CryptonightR_instruction56): - imul rbp, rsi -FN_PREFIX(CryptonightR_instruction57): - imul rbp, rsi -FN_PREFIX(CryptonightR_instruction58): - imul rbp, rsi -FN_PREFIX(CryptonightR_instruction59): - add rbp, rsi - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction60): - sub rbp, rsi -FN_PREFIX(CryptonightR_instruction61): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction62): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction63): - xor rbp, rsi -FN_PREFIX(CryptonightR_instruction64): - imul rbx, rdi -FN_PREFIX(CryptonightR_instruction65): - imul rbx, rdi -FN_PREFIX(CryptonightR_instruction66): - imul rbx, rdi -FN_PREFIX(CryptonightR_instruction67): - add rbx, rdi - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction68): - sub rbx, rdi -FN_PREFIX(CryptonightR_instruction69): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction70): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction71): - xor rbx, rdi -FN_PREFIX(CryptonightR_instruction72): - imul rsi, rdi -FN_PREFIX(CryptonightR_instruction73): - imul rsi, rdi -FN_PREFIX(CryptonightR_instruction74): - imul rsi, rdi -FN_PREFIX(CryptonightR_instruction75): - add rsi, rdi - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction76): - sub rsi, rdi -FN_PREFIX(CryptonightR_instruction77): - ror esi, cl -FN_PREFIX(CryptonightR_instruction78): - rol esi, cl -FN_PREFIX(CryptonightR_instruction79): - xor rsi, rdi -FN_PREFIX(CryptonightR_instruction80): - imul rdi, rdi -FN_PREFIX(CryptonightR_instruction81): - imul rdi, rdi -FN_PREFIX(CryptonightR_instruction82): - imul rdi, rdi -FN_PREFIX(CryptonightR_instruction83): - add rdi, r9 - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction84): - sub rdi, r9 -FN_PREFIX(CryptonightR_instruction85): - ror edi, cl -FN_PREFIX(CryptonightR_instruction86): - rol edi, cl -FN_PREFIX(CryptonightR_instruction87): - xor rdi, r9 -FN_PREFIX(CryptonightR_instruction88): - imul rbp, rdi -FN_PREFIX(CryptonightR_instruction89): - imul rbp, rdi -FN_PREFIX(CryptonightR_instruction90): - imul rbp, rdi -FN_PREFIX(CryptonightR_instruction91): - add rbp, rdi - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction92): - sub rbp, rdi -FN_PREFIX(CryptonightR_instruction93): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction94): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction95): - xor rbp, rdi -FN_PREFIX(CryptonightR_instruction96): - imul rbx, rbp -FN_PREFIX(CryptonightR_instruction97): - imul rbx, rbp -FN_PREFIX(CryptonightR_instruction98): - imul rbx, rbp -FN_PREFIX(CryptonightR_instruction99): - add rbx, rbp - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction100): - sub rbx, rbp -FN_PREFIX(CryptonightR_instruction101): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction102): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction103): - xor rbx, rbp -FN_PREFIX(CryptonightR_instruction104): - imul rsi, rbp -FN_PREFIX(CryptonightR_instruction105): - imul rsi, rbp -FN_PREFIX(CryptonightR_instruction106): - imul rsi, rbp -FN_PREFIX(CryptonightR_instruction107): - add rsi, rbp - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction108): - sub rsi, rbp -FN_PREFIX(CryptonightR_instruction109): - ror esi, cl -FN_PREFIX(CryptonightR_instruction110): - rol esi, cl -FN_PREFIX(CryptonightR_instruction111): - xor rsi, rbp -FN_PREFIX(CryptonightR_instruction112): - imul rdi, rbp -FN_PREFIX(CryptonightR_instruction113): - imul rdi, rbp -FN_PREFIX(CryptonightR_instruction114): - imul rdi, rbp -FN_PREFIX(CryptonightR_instruction115): - add rdi, rbp - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction116): - sub rdi, rbp -FN_PREFIX(CryptonightR_instruction117): - ror edi, cl -FN_PREFIX(CryptonightR_instruction118): - rol edi, cl -FN_PREFIX(CryptonightR_instruction119): - xor rdi, rbp -FN_PREFIX(CryptonightR_instruction120): - imul rbp, rbp -FN_PREFIX(CryptonightR_instruction121): - imul rbp, rbp -FN_PREFIX(CryptonightR_instruction122): - imul rbp, rbp -FN_PREFIX(CryptonightR_instruction123): - add rbp, r9 - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction124): - sub rbp, r9 -FN_PREFIX(CryptonightR_instruction125): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction126): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction127): - xor rbp, r9 -FN_PREFIX(CryptonightR_instruction128): - imul rbx, rsp -FN_PREFIX(CryptonightR_instruction129): - imul rbx, rsp -FN_PREFIX(CryptonightR_instruction130): - imul rbx, rsp -FN_PREFIX(CryptonightR_instruction131): - add rbx, rsp - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction132): - sub rbx, rsp -FN_PREFIX(CryptonightR_instruction133): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction134): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction135): - xor rbx, rsp -FN_PREFIX(CryptonightR_instruction136): - imul rsi, rsp -FN_PREFIX(CryptonightR_instruction137): - imul rsi, rsp -FN_PREFIX(CryptonightR_instruction138): - imul rsi, rsp -FN_PREFIX(CryptonightR_instruction139): - add rsi, rsp - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction140): - sub rsi, rsp -FN_PREFIX(CryptonightR_instruction141): - ror esi, cl -FN_PREFIX(CryptonightR_instruction142): - rol esi, cl -FN_PREFIX(CryptonightR_instruction143): - xor rsi, rsp -FN_PREFIX(CryptonightR_instruction144): - imul rdi, rsp -FN_PREFIX(CryptonightR_instruction145): - imul rdi, rsp -FN_PREFIX(CryptonightR_instruction146): - imul rdi, rsp -FN_PREFIX(CryptonightR_instruction147): - add rdi, rsp - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction148): - sub rdi, rsp -FN_PREFIX(CryptonightR_instruction149): - ror edi, cl -FN_PREFIX(CryptonightR_instruction150): - rol edi, cl -FN_PREFIX(CryptonightR_instruction151): - xor rdi, rsp -FN_PREFIX(CryptonightR_instruction152): - imul rbp, rsp -FN_PREFIX(CryptonightR_instruction153): - imul rbp, rsp -FN_PREFIX(CryptonightR_instruction154): - imul rbp, rsp -FN_PREFIX(CryptonightR_instruction155): - add rbp, rsp - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction156): - sub rbp, rsp -FN_PREFIX(CryptonightR_instruction157): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction158): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction159): - xor rbp, rsp -FN_PREFIX(CryptonightR_instruction160): - imul rbx, r15 -FN_PREFIX(CryptonightR_instruction161): - imul rbx, r15 -FN_PREFIX(CryptonightR_instruction162): - imul rbx, r15 -FN_PREFIX(CryptonightR_instruction163): - add rbx, r15 - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction164): - sub rbx, r15 -FN_PREFIX(CryptonightR_instruction165): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction166): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction167): - xor rbx, r15 -FN_PREFIX(CryptonightR_instruction168): - imul rsi, r15 -FN_PREFIX(CryptonightR_instruction169): - imul rsi, r15 -FN_PREFIX(CryptonightR_instruction170): - imul rsi, r15 -FN_PREFIX(CryptonightR_instruction171): - add rsi, r15 - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction172): - sub rsi, r15 -FN_PREFIX(CryptonightR_instruction173): - ror esi, cl -FN_PREFIX(CryptonightR_instruction174): - rol esi, cl -FN_PREFIX(CryptonightR_instruction175): - xor rsi, r15 -FN_PREFIX(CryptonightR_instruction176): - imul rdi, r15 -FN_PREFIX(CryptonightR_instruction177): - imul rdi, r15 -FN_PREFIX(CryptonightR_instruction178): - imul rdi, r15 -FN_PREFIX(CryptonightR_instruction179): - add rdi, r15 - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction180): - sub rdi, r15 -FN_PREFIX(CryptonightR_instruction181): - ror edi, cl -FN_PREFIX(CryptonightR_instruction182): - rol edi, cl -FN_PREFIX(CryptonightR_instruction183): - xor rdi, r15 -FN_PREFIX(CryptonightR_instruction184): - imul rbp, r15 -FN_PREFIX(CryptonightR_instruction185): - imul rbp, r15 -FN_PREFIX(CryptonightR_instruction186): - imul rbp, r15 -FN_PREFIX(CryptonightR_instruction187): - add rbp, r15 - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction188): - sub rbp, r15 -FN_PREFIX(CryptonightR_instruction189): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction190): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction191): - xor rbp, r15 -FN_PREFIX(CryptonightR_instruction192): - imul rbx, rax -FN_PREFIX(CryptonightR_instruction193): - imul rbx, rax -FN_PREFIX(CryptonightR_instruction194): - imul rbx, rax -FN_PREFIX(CryptonightR_instruction195): - add rbx, rax - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction196): - sub rbx, rax -FN_PREFIX(CryptonightR_instruction197): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction198): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction199): - xor rbx, rax -FN_PREFIX(CryptonightR_instruction200): - imul rsi, rax -FN_PREFIX(CryptonightR_instruction201): - imul rsi, rax -FN_PREFIX(CryptonightR_instruction202): - imul rsi, rax -FN_PREFIX(CryptonightR_instruction203): - add rsi, rax - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction204): - sub rsi, rax -FN_PREFIX(CryptonightR_instruction205): - ror esi, cl -FN_PREFIX(CryptonightR_instruction206): - rol esi, cl -FN_PREFIX(CryptonightR_instruction207): - xor rsi, rax -FN_PREFIX(CryptonightR_instruction208): - imul rdi, rax -FN_PREFIX(CryptonightR_instruction209): - imul rdi, rax -FN_PREFIX(CryptonightR_instruction210): - imul rdi, rax -FN_PREFIX(CryptonightR_instruction211): - add rdi, rax - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction212): - sub rdi, rax -FN_PREFIX(CryptonightR_instruction213): - ror edi, cl -FN_PREFIX(CryptonightR_instruction214): - rol edi, cl -FN_PREFIX(CryptonightR_instruction215): - xor rdi, rax -FN_PREFIX(CryptonightR_instruction216): - imul rbp, rax -FN_PREFIX(CryptonightR_instruction217): - imul rbp, rax -FN_PREFIX(CryptonightR_instruction218): - imul rbp, rax -FN_PREFIX(CryptonightR_instruction219): - add rbp, rax - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction220): - sub rbp, rax -FN_PREFIX(CryptonightR_instruction221): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction222): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction223): - xor rbp, rax -FN_PREFIX(CryptonightR_instruction224): - imul rbx, rdx -FN_PREFIX(CryptonightR_instruction225): - imul rbx, rdx -FN_PREFIX(CryptonightR_instruction226): - imul rbx, rdx -FN_PREFIX(CryptonightR_instruction227): - add rbx, rdx - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction228): - sub rbx, rdx -FN_PREFIX(CryptonightR_instruction229): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction230): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction231): - xor rbx, rdx -FN_PREFIX(CryptonightR_instruction232): - imul rsi, rdx -FN_PREFIX(CryptonightR_instruction233): - imul rsi, rdx -FN_PREFIX(CryptonightR_instruction234): - imul rsi, rdx -FN_PREFIX(CryptonightR_instruction235): - add rsi, rdx - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction236): - sub rsi, rdx -FN_PREFIX(CryptonightR_instruction237): - ror esi, cl -FN_PREFIX(CryptonightR_instruction238): - rol esi, cl -FN_PREFIX(CryptonightR_instruction239): - xor rsi, rdx -FN_PREFIX(CryptonightR_instruction240): - imul rdi, rdx -FN_PREFIX(CryptonightR_instruction241): - imul rdi, rdx -FN_PREFIX(CryptonightR_instruction242): - imul rdi, rdx -FN_PREFIX(CryptonightR_instruction243): - add rdi, rdx - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction244): - sub rdi, rdx -FN_PREFIX(CryptonightR_instruction245): - ror edi, cl -FN_PREFIX(CryptonightR_instruction246): - rol edi, cl -FN_PREFIX(CryptonightR_instruction247): - xor rdi, rdx -FN_PREFIX(CryptonightR_instruction248): - imul rbp, rdx -FN_PREFIX(CryptonightR_instruction249): - imul rbp, rdx -FN_PREFIX(CryptonightR_instruction250): - imul rbp, rdx -FN_PREFIX(CryptonightR_instruction251): - add rbp, rdx - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction252): - sub rbp, rdx -FN_PREFIX(CryptonightR_instruction253): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction254): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction255): - xor rbp, rdx -FN_PREFIX(CryptonightR_instruction256): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction_mov0): - -FN_PREFIX(CryptonightR_instruction_mov1): - -FN_PREFIX(CryptonightR_instruction_mov2): - -FN_PREFIX(CryptonightR_instruction_mov3): - -FN_PREFIX(CryptonightR_instruction_mov4): - -FN_PREFIX(CryptonightR_instruction_mov5): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov6): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov7): - -FN_PREFIX(CryptonightR_instruction_mov8): - -FN_PREFIX(CryptonightR_instruction_mov9): - -FN_PREFIX(CryptonightR_instruction_mov10): - -FN_PREFIX(CryptonightR_instruction_mov11): - -FN_PREFIX(CryptonightR_instruction_mov12): - -FN_PREFIX(CryptonightR_instruction_mov13): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov14): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov15): - -FN_PREFIX(CryptonightR_instruction_mov16): - -FN_PREFIX(CryptonightR_instruction_mov17): - -FN_PREFIX(CryptonightR_instruction_mov18): - -FN_PREFIX(CryptonightR_instruction_mov19): - -FN_PREFIX(CryptonightR_instruction_mov20): - -FN_PREFIX(CryptonightR_instruction_mov21): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov22): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov23): - -FN_PREFIX(CryptonightR_instruction_mov24): - -FN_PREFIX(CryptonightR_instruction_mov25): - -FN_PREFIX(CryptonightR_instruction_mov26): - -FN_PREFIX(CryptonightR_instruction_mov27): - -FN_PREFIX(CryptonightR_instruction_mov28): - -FN_PREFIX(CryptonightR_instruction_mov29): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov30): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov31): - -FN_PREFIX(CryptonightR_instruction_mov32): - -FN_PREFIX(CryptonightR_instruction_mov33): - -FN_PREFIX(CryptonightR_instruction_mov34): - -FN_PREFIX(CryptonightR_instruction_mov35): - -FN_PREFIX(CryptonightR_instruction_mov36): - -FN_PREFIX(CryptonightR_instruction_mov37): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov38): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov39): - -FN_PREFIX(CryptonightR_instruction_mov40): - -FN_PREFIX(CryptonightR_instruction_mov41): - -FN_PREFIX(CryptonightR_instruction_mov42): - -FN_PREFIX(CryptonightR_instruction_mov43): - -FN_PREFIX(CryptonightR_instruction_mov44): - -FN_PREFIX(CryptonightR_instruction_mov45): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov46): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov47): - -FN_PREFIX(CryptonightR_instruction_mov48): - -FN_PREFIX(CryptonightR_instruction_mov49): - -FN_PREFIX(CryptonightR_instruction_mov50): - -FN_PREFIX(CryptonightR_instruction_mov51): - -FN_PREFIX(CryptonightR_instruction_mov52): - -FN_PREFIX(CryptonightR_instruction_mov53): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov54): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov55): - -FN_PREFIX(CryptonightR_instruction_mov56): - -FN_PREFIX(CryptonightR_instruction_mov57): - -FN_PREFIX(CryptonightR_instruction_mov58): - -FN_PREFIX(CryptonightR_instruction_mov59): - -FN_PREFIX(CryptonightR_instruction_mov60): - -FN_PREFIX(CryptonightR_instruction_mov61): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov62): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov63): - -FN_PREFIX(CryptonightR_instruction_mov64): - -FN_PREFIX(CryptonightR_instruction_mov65): - -FN_PREFIX(CryptonightR_instruction_mov66): - -FN_PREFIX(CryptonightR_instruction_mov67): - -FN_PREFIX(CryptonightR_instruction_mov68): - -FN_PREFIX(CryptonightR_instruction_mov69): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov70): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov71): - -FN_PREFIX(CryptonightR_instruction_mov72): - -FN_PREFIX(CryptonightR_instruction_mov73): - -FN_PREFIX(CryptonightR_instruction_mov74): - -FN_PREFIX(CryptonightR_instruction_mov75): - -FN_PREFIX(CryptonightR_instruction_mov76): - -FN_PREFIX(CryptonightR_instruction_mov77): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov78): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov79): - -FN_PREFIX(CryptonightR_instruction_mov80): - -FN_PREFIX(CryptonightR_instruction_mov81): - -FN_PREFIX(CryptonightR_instruction_mov82): - -FN_PREFIX(CryptonightR_instruction_mov83): - -FN_PREFIX(CryptonightR_instruction_mov84): - -FN_PREFIX(CryptonightR_instruction_mov85): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov86): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov87): - -FN_PREFIX(CryptonightR_instruction_mov88): - -FN_PREFIX(CryptonightR_instruction_mov89): - -FN_PREFIX(CryptonightR_instruction_mov90): - -FN_PREFIX(CryptonightR_instruction_mov91): - -FN_PREFIX(CryptonightR_instruction_mov92): - -FN_PREFIX(CryptonightR_instruction_mov93): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov94): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov95): - -FN_PREFIX(CryptonightR_instruction_mov96): - -FN_PREFIX(CryptonightR_instruction_mov97): - -FN_PREFIX(CryptonightR_instruction_mov98): - -FN_PREFIX(CryptonightR_instruction_mov99): - -FN_PREFIX(CryptonightR_instruction_mov100): - -FN_PREFIX(CryptonightR_instruction_mov101): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov102): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov103): - -FN_PREFIX(CryptonightR_instruction_mov104): - -FN_PREFIX(CryptonightR_instruction_mov105): - -FN_PREFIX(CryptonightR_instruction_mov106): - -FN_PREFIX(CryptonightR_instruction_mov107): - -FN_PREFIX(CryptonightR_instruction_mov108): - -FN_PREFIX(CryptonightR_instruction_mov109): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov110): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov111): - -FN_PREFIX(CryptonightR_instruction_mov112): - -FN_PREFIX(CryptonightR_instruction_mov113): - -FN_PREFIX(CryptonightR_instruction_mov114): - -FN_PREFIX(CryptonightR_instruction_mov115): - -FN_PREFIX(CryptonightR_instruction_mov116): - -FN_PREFIX(CryptonightR_instruction_mov117): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov118): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov119): - -FN_PREFIX(CryptonightR_instruction_mov120): - -FN_PREFIX(CryptonightR_instruction_mov121): - -FN_PREFIX(CryptonightR_instruction_mov122): - -FN_PREFIX(CryptonightR_instruction_mov123): - -FN_PREFIX(CryptonightR_instruction_mov124): - -FN_PREFIX(CryptonightR_instruction_mov125): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov126): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov127): - -FN_PREFIX(CryptonightR_instruction_mov128): - -FN_PREFIX(CryptonightR_instruction_mov129): - -FN_PREFIX(CryptonightR_instruction_mov130): - -FN_PREFIX(CryptonightR_instruction_mov131): - -FN_PREFIX(CryptonightR_instruction_mov132): - -FN_PREFIX(CryptonightR_instruction_mov133): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov134): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov135): - -FN_PREFIX(CryptonightR_instruction_mov136): - -FN_PREFIX(CryptonightR_instruction_mov137): - -FN_PREFIX(CryptonightR_instruction_mov138): - -FN_PREFIX(CryptonightR_instruction_mov139): - -FN_PREFIX(CryptonightR_instruction_mov140): - -FN_PREFIX(CryptonightR_instruction_mov141): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov142): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov143): - -FN_PREFIX(CryptonightR_instruction_mov144): - -FN_PREFIX(CryptonightR_instruction_mov145): - -FN_PREFIX(CryptonightR_instruction_mov146): - -FN_PREFIX(CryptonightR_instruction_mov147): - -FN_PREFIX(CryptonightR_instruction_mov148): - -FN_PREFIX(CryptonightR_instruction_mov149): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov150): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov151): - -FN_PREFIX(CryptonightR_instruction_mov152): - -FN_PREFIX(CryptonightR_instruction_mov153): - -FN_PREFIX(CryptonightR_instruction_mov154): - -FN_PREFIX(CryptonightR_instruction_mov155): - -FN_PREFIX(CryptonightR_instruction_mov156): - -FN_PREFIX(CryptonightR_instruction_mov157): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov158): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov159): - -FN_PREFIX(CryptonightR_instruction_mov160): - -FN_PREFIX(CryptonightR_instruction_mov161): - -FN_PREFIX(CryptonightR_instruction_mov162): - -FN_PREFIX(CryptonightR_instruction_mov163): - -FN_PREFIX(CryptonightR_instruction_mov164): - -FN_PREFIX(CryptonightR_instruction_mov165): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov166): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov167): - -FN_PREFIX(CryptonightR_instruction_mov168): - -FN_PREFIX(CryptonightR_instruction_mov169): - -FN_PREFIX(CryptonightR_instruction_mov170): - -FN_PREFIX(CryptonightR_instruction_mov171): - -FN_PREFIX(CryptonightR_instruction_mov172): - -FN_PREFIX(CryptonightR_instruction_mov173): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov174): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov175): - -FN_PREFIX(CryptonightR_instruction_mov176): - -FN_PREFIX(CryptonightR_instruction_mov177): - -FN_PREFIX(CryptonightR_instruction_mov178): - -FN_PREFIX(CryptonightR_instruction_mov179): - -FN_PREFIX(CryptonightR_instruction_mov180): - -FN_PREFIX(CryptonightR_instruction_mov181): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov182): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov183): - -FN_PREFIX(CryptonightR_instruction_mov184): - -FN_PREFIX(CryptonightR_instruction_mov185): - -FN_PREFIX(CryptonightR_instruction_mov186): - -FN_PREFIX(CryptonightR_instruction_mov187): - -FN_PREFIX(CryptonightR_instruction_mov188): - -FN_PREFIX(CryptonightR_instruction_mov189): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov190): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov191): - -FN_PREFIX(CryptonightR_instruction_mov192): - -FN_PREFIX(CryptonightR_instruction_mov193): - -FN_PREFIX(CryptonightR_instruction_mov194): - -FN_PREFIX(CryptonightR_instruction_mov195): - -FN_PREFIX(CryptonightR_instruction_mov196): - -FN_PREFIX(CryptonightR_instruction_mov197): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov198): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov199): - -FN_PREFIX(CryptonightR_instruction_mov200): - -FN_PREFIX(CryptonightR_instruction_mov201): - -FN_PREFIX(CryptonightR_instruction_mov202): - -FN_PREFIX(CryptonightR_instruction_mov203): - -FN_PREFIX(CryptonightR_instruction_mov204): - -FN_PREFIX(CryptonightR_instruction_mov205): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov206): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov207): - -FN_PREFIX(CryptonightR_instruction_mov208): - -FN_PREFIX(CryptonightR_instruction_mov209): - -FN_PREFIX(CryptonightR_instruction_mov210): - -FN_PREFIX(CryptonightR_instruction_mov211): - -FN_PREFIX(CryptonightR_instruction_mov212): - -FN_PREFIX(CryptonightR_instruction_mov213): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov214): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov215): - -FN_PREFIX(CryptonightR_instruction_mov216): - -FN_PREFIX(CryptonightR_instruction_mov217): - -FN_PREFIX(CryptonightR_instruction_mov218): - -FN_PREFIX(CryptonightR_instruction_mov219): - -FN_PREFIX(CryptonightR_instruction_mov220): - -FN_PREFIX(CryptonightR_instruction_mov221): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov222): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov223): - -FN_PREFIX(CryptonightR_instruction_mov224): - -FN_PREFIX(CryptonightR_instruction_mov225): - -FN_PREFIX(CryptonightR_instruction_mov226): - -FN_PREFIX(CryptonightR_instruction_mov227): - -FN_PREFIX(CryptonightR_instruction_mov228): - -FN_PREFIX(CryptonightR_instruction_mov229): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov230): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov231): - -FN_PREFIX(CryptonightR_instruction_mov232): - -FN_PREFIX(CryptonightR_instruction_mov233): - -FN_PREFIX(CryptonightR_instruction_mov234): - -FN_PREFIX(CryptonightR_instruction_mov235): - -FN_PREFIX(CryptonightR_instruction_mov236): - -FN_PREFIX(CryptonightR_instruction_mov237): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov238): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov239): - -FN_PREFIX(CryptonightR_instruction_mov240): - -FN_PREFIX(CryptonightR_instruction_mov241): - -FN_PREFIX(CryptonightR_instruction_mov242): - -FN_PREFIX(CryptonightR_instruction_mov243): - -FN_PREFIX(CryptonightR_instruction_mov244): - -FN_PREFIX(CryptonightR_instruction_mov245): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov246): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov247): - -FN_PREFIX(CryptonightR_instruction_mov248): - -FN_PREFIX(CryptonightR_instruction_mov249): - -FN_PREFIX(CryptonightR_instruction_mov250): - -FN_PREFIX(CryptonightR_instruction_mov251): - -FN_PREFIX(CryptonightR_instruction_mov252): - -FN_PREFIX(CryptonightR_instruction_mov253): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov254): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov255): - -FN_PREFIX(CryptonightR_instruction_mov256): diff --git a/src/crypto/asm/CryptonightR_template.asm b/src/crypto/asm/CryptonightR_template.asm deleted file mode 100644 index 250eca3d..00000000 --- a/src/crypto/asm/CryptonightR_template.asm +++ /dev/null @@ -1,1585 +0,0 @@ -; Auto-generated file, do not edit - -_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE -PUBLIC CryptonightR_instruction0 -PUBLIC CryptonightR_instruction1 -PUBLIC CryptonightR_instruction2 -PUBLIC CryptonightR_instruction3 -PUBLIC CryptonightR_instruction4 -PUBLIC CryptonightR_instruction5 -PUBLIC CryptonightR_instruction6 -PUBLIC CryptonightR_instruction7 -PUBLIC CryptonightR_instruction8 -PUBLIC CryptonightR_instruction9 -PUBLIC CryptonightR_instruction10 -PUBLIC CryptonightR_instruction11 -PUBLIC CryptonightR_instruction12 -PUBLIC CryptonightR_instruction13 -PUBLIC CryptonightR_instruction14 -PUBLIC CryptonightR_instruction15 -PUBLIC CryptonightR_instruction16 -PUBLIC CryptonightR_instruction17 -PUBLIC CryptonightR_instruction18 -PUBLIC CryptonightR_instruction19 -PUBLIC CryptonightR_instruction20 -PUBLIC CryptonightR_instruction21 -PUBLIC CryptonightR_instruction22 -PUBLIC CryptonightR_instruction23 -PUBLIC CryptonightR_instruction24 -PUBLIC CryptonightR_instruction25 -PUBLIC CryptonightR_instruction26 -PUBLIC CryptonightR_instruction27 -PUBLIC CryptonightR_instruction28 -PUBLIC CryptonightR_instruction29 -PUBLIC CryptonightR_instruction30 -PUBLIC CryptonightR_instruction31 -PUBLIC CryptonightR_instruction32 -PUBLIC CryptonightR_instruction33 -PUBLIC CryptonightR_instruction34 -PUBLIC CryptonightR_instruction35 -PUBLIC CryptonightR_instruction36 -PUBLIC CryptonightR_instruction37 -PUBLIC CryptonightR_instruction38 -PUBLIC CryptonightR_instruction39 -PUBLIC CryptonightR_instruction40 -PUBLIC CryptonightR_instruction41 -PUBLIC CryptonightR_instruction42 -PUBLIC CryptonightR_instruction43 -PUBLIC CryptonightR_instruction44 -PUBLIC CryptonightR_instruction45 -PUBLIC CryptonightR_instruction46 -PUBLIC CryptonightR_instruction47 -PUBLIC CryptonightR_instruction48 -PUBLIC CryptonightR_instruction49 -PUBLIC CryptonightR_instruction50 -PUBLIC CryptonightR_instruction51 -PUBLIC CryptonightR_instruction52 -PUBLIC CryptonightR_instruction53 -PUBLIC CryptonightR_instruction54 -PUBLIC CryptonightR_instruction55 -PUBLIC CryptonightR_instruction56 -PUBLIC CryptonightR_instruction57 -PUBLIC CryptonightR_instruction58 -PUBLIC CryptonightR_instruction59 -PUBLIC CryptonightR_instruction60 -PUBLIC CryptonightR_instruction61 -PUBLIC CryptonightR_instruction62 -PUBLIC CryptonightR_instruction63 -PUBLIC CryptonightR_instruction64 -PUBLIC CryptonightR_instruction65 -PUBLIC CryptonightR_instruction66 -PUBLIC CryptonightR_instruction67 -PUBLIC CryptonightR_instruction68 -PUBLIC CryptonightR_instruction69 -PUBLIC CryptonightR_instruction70 -PUBLIC CryptonightR_instruction71 -PUBLIC CryptonightR_instruction72 -PUBLIC CryptonightR_instruction73 -PUBLIC CryptonightR_instruction74 -PUBLIC CryptonightR_instruction75 -PUBLIC CryptonightR_instruction76 -PUBLIC CryptonightR_instruction77 -PUBLIC CryptonightR_instruction78 -PUBLIC CryptonightR_instruction79 -PUBLIC CryptonightR_instruction80 -PUBLIC CryptonightR_instruction81 -PUBLIC CryptonightR_instruction82 -PUBLIC CryptonightR_instruction83 -PUBLIC CryptonightR_instruction84 -PUBLIC CryptonightR_instruction85 -PUBLIC CryptonightR_instruction86 -PUBLIC CryptonightR_instruction87 -PUBLIC CryptonightR_instruction88 -PUBLIC CryptonightR_instruction89 -PUBLIC CryptonightR_instruction90 -PUBLIC CryptonightR_instruction91 -PUBLIC CryptonightR_instruction92 -PUBLIC CryptonightR_instruction93 -PUBLIC CryptonightR_instruction94 -PUBLIC CryptonightR_instruction95 -PUBLIC CryptonightR_instruction96 -PUBLIC CryptonightR_instruction97 -PUBLIC CryptonightR_instruction98 -PUBLIC CryptonightR_instruction99 -PUBLIC CryptonightR_instruction100 -PUBLIC CryptonightR_instruction101 -PUBLIC CryptonightR_instruction102 -PUBLIC CryptonightR_instruction103 -PUBLIC CryptonightR_instruction104 -PUBLIC CryptonightR_instruction105 -PUBLIC CryptonightR_instruction106 -PUBLIC CryptonightR_instruction107 -PUBLIC CryptonightR_instruction108 -PUBLIC CryptonightR_instruction109 -PUBLIC CryptonightR_instruction110 -PUBLIC CryptonightR_instruction111 -PUBLIC CryptonightR_instruction112 -PUBLIC CryptonightR_instruction113 -PUBLIC CryptonightR_instruction114 -PUBLIC CryptonightR_instruction115 -PUBLIC CryptonightR_instruction116 -PUBLIC CryptonightR_instruction117 -PUBLIC CryptonightR_instruction118 -PUBLIC CryptonightR_instruction119 -PUBLIC CryptonightR_instruction120 -PUBLIC CryptonightR_instruction121 -PUBLIC CryptonightR_instruction122 -PUBLIC CryptonightR_instruction123 -PUBLIC CryptonightR_instruction124 -PUBLIC CryptonightR_instruction125 -PUBLIC CryptonightR_instruction126 -PUBLIC CryptonightR_instruction127 -PUBLIC CryptonightR_instruction128 -PUBLIC CryptonightR_instruction129 -PUBLIC CryptonightR_instruction130 -PUBLIC CryptonightR_instruction131 -PUBLIC CryptonightR_instruction132 -PUBLIC CryptonightR_instruction133 -PUBLIC CryptonightR_instruction134 -PUBLIC CryptonightR_instruction135 -PUBLIC CryptonightR_instruction136 -PUBLIC CryptonightR_instruction137 -PUBLIC CryptonightR_instruction138 -PUBLIC CryptonightR_instruction139 -PUBLIC CryptonightR_instruction140 -PUBLIC CryptonightR_instruction141 -PUBLIC CryptonightR_instruction142 -PUBLIC CryptonightR_instruction143 -PUBLIC CryptonightR_instruction144 -PUBLIC CryptonightR_instruction145 -PUBLIC CryptonightR_instruction146 -PUBLIC CryptonightR_instruction147 -PUBLIC CryptonightR_instruction148 -PUBLIC CryptonightR_instruction149 -PUBLIC CryptonightR_instruction150 -PUBLIC CryptonightR_instruction151 -PUBLIC CryptonightR_instruction152 -PUBLIC CryptonightR_instruction153 -PUBLIC CryptonightR_instruction154 -PUBLIC CryptonightR_instruction155 -PUBLIC CryptonightR_instruction156 -PUBLIC CryptonightR_instruction157 -PUBLIC CryptonightR_instruction158 -PUBLIC CryptonightR_instruction159 -PUBLIC CryptonightR_instruction160 -PUBLIC CryptonightR_instruction161 -PUBLIC CryptonightR_instruction162 -PUBLIC CryptonightR_instruction163 -PUBLIC CryptonightR_instruction164 -PUBLIC CryptonightR_instruction165 -PUBLIC CryptonightR_instruction166 -PUBLIC CryptonightR_instruction167 -PUBLIC CryptonightR_instruction168 -PUBLIC CryptonightR_instruction169 -PUBLIC CryptonightR_instruction170 -PUBLIC CryptonightR_instruction171 -PUBLIC CryptonightR_instruction172 -PUBLIC CryptonightR_instruction173 -PUBLIC CryptonightR_instruction174 -PUBLIC CryptonightR_instruction175 -PUBLIC CryptonightR_instruction176 -PUBLIC CryptonightR_instruction177 -PUBLIC CryptonightR_instruction178 -PUBLIC CryptonightR_instruction179 -PUBLIC CryptonightR_instruction180 -PUBLIC CryptonightR_instruction181 -PUBLIC CryptonightR_instruction182 -PUBLIC CryptonightR_instruction183 -PUBLIC CryptonightR_instruction184 -PUBLIC CryptonightR_instruction185 -PUBLIC CryptonightR_instruction186 -PUBLIC CryptonightR_instruction187 -PUBLIC CryptonightR_instruction188 -PUBLIC CryptonightR_instruction189 -PUBLIC CryptonightR_instruction190 -PUBLIC CryptonightR_instruction191 -PUBLIC CryptonightR_instruction192 -PUBLIC CryptonightR_instruction193 -PUBLIC CryptonightR_instruction194 -PUBLIC CryptonightR_instruction195 -PUBLIC CryptonightR_instruction196 -PUBLIC CryptonightR_instruction197 -PUBLIC CryptonightR_instruction198 -PUBLIC CryptonightR_instruction199 -PUBLIC CryptonightR_instruction200 -PUBLIC CryptonightR_instruction201 -PUBLIC CryptonightR_instruction202 -PUBLIC CryptonightR_instruction203 -PUBLIC CryptonightR_instruction204 -PUBLIC CryptonightR_instruction205 -PUBLIC CryptonightR_instruction206 -PUBLIC CryptonightR_instruction207 -PUBLIC CryptonightR_instruction208 -PUBLIC CryptonightR_instruction209 -PUBLIC CryptonightR_instruction210 -PUBLIC CryptonightR_instruction211 -PUBLIC CryptonightR_instruction212 -PUBLIC CryptonightR_instruction213 -PUBLIC CryptonightR_instruction214 -PUBLIC CryptonightR_instruction215 -PUBLIC CryptonightR_instruction216 -PUBLIC CryptonightR_instruction217 -PUBLIC CryptonightR_instruction218 -PUBLIC CryptonightR_instruction219 -PUBLIC CryptonightR_instruction220 -PUBLIC CryptonightR_instruction221 -PUBLIC CryptonightR_instruction222 -PUBLIC CryptonightR_instruction223 -PUBLIC CryptonightR_instruction224 -PUBLIC CryptonightR_instruction225 -PUBLIC CryptonightR_instruction226 -PUBLIC CryptonightR_instruction227 -PUBLIC CryptonightR_instruction228 -PUBLIC CryptonightR_instruction229 -PUBLIC CryptonightR_instruction230 -PUBLIC CryptonightR_instruction231 -PUBLIC CryptonightR_instruction232 -PUBLIC CryptonightR_instruction233 -PUBLIC CryptonightR_instruction234 -PUBLIC CryptonightR_instruction235 -PUBLIC CryptonightR_instruction236 -PUBLIC CryptonightR_instruction237 -PUBLIC CryptonightR_instruction238 -PUBLIC CryptonightR_instruction239 -PUBLIC CryptonightR_instruction240 -PUBLIC CryptonightR_instruction241 -PUBLIC CryptonightR_instruction242 -PUBLIC CryptonightR_instruction243 -PUBLIC CryptonightR_instruction244 -PUBLIC CryptonightR_instruction245 -PUBLIC CryptonightR_instruction246 -PUBLIC CryptonightR_instruction247 -PUBLIC CryptonightR_instruction248 -PUBLIC CryptonightR_instruction249 -PUBLIC CryptonightR_instruction250 -PUBLIC CryptonightR_instruction251 -PUBLIC CryptonightR_instruction252 -PUBLIC CryptonightR_instruction253 -PUBLIC CryptonightR_instruction254 -PUBLIC CryptonightR_instruction255 -PUBLIC CryptonightR_instruction256 -PUBLIC CryptonightR_instruction_mov0 -PUBLIC CryptonightR_instruction_mov1 -PUBLIC CryptonightR_instruction_mov2 -PUBLIC CryptonightR_instruction_mov3 -PUBLIC CryptonightR_instruction_mov4 -PUBLIC CryptonightR_instruction_mov5 -PUBLIC CryptonightR_instruction_mov6 -PUBLIC CryptonightR_instruction_mov7 -PUBLIC CryptonightR_instruction_mov8 -PUBLIC CryptonightR_instruction_mov9 -PUBLIC CryptonightR_instruction_mov10 -PUBLIC CryptonightR_instruction_mov11 -PUBLIC CryptonightR_instruction_mov12 -PUBLIC CryptonightR_instruction_mov13 -PUBLIC CryptonightR_instruction_mov14 -PUBLIC CryptonightR_instruction_mov15 -PUBLIC CryptonightR_instruction_mov16 -PUBLIC CryptonightR_instruction_mov17 -PUBLIC CryptonightR_instruction_mov18 -PUBLIC CryptonightR_instruction_mov19 -PUBLIC CryptonightR_instruction_mov20 -PUBLIC CryptonightR_instruction_mov21 -PUBLIC CryptonightR_instruction_mov22 -PUBLIC CryptonightR_instruction_mov23 -PUBLIC CryptonightR_instruction_mov24 -PUBLIC CryptonightR_instruction_mov25 -PUBLIC CryptonightR_instruction_mov26 -PUBLIC CryptonightR_instruction_mov27 -PUBLIC CryptonightR_instruction_mov28 -PUBLIC CryptonightR_instruction_mov29 -PUBLIC CryptonightR_instruction_mov30 -PUBLIC CryptonightR_instruction_mov31 -PUBLIC CryptonightR_instruction_mov32 -PUBLIC CryptonightR_instruction_mov33 -PUBLIC CryptonightR_instruction_mov34 -PUBLIC CryptonightR_instruction_mov35 -PUBLIC CryptonightR_instruction_mov36 -PUBLIC CryptonightR_instruction_mov37 -PUBLIC CryptonightR_instruction_mov38 -PUBLIC CryptonightR_instruction_mov39 -PUBLIC CryptonightR_instruction_mov40 -PUBLIC CryptonightR_instruction_mov41 -PUBLIC CryptonightR_instruction_mov42 -PUBLIC CryptonightR_instruction_mov43 -PUBLIC CryptonightR_instruction_mov44 -PUBLIC CryptonightR_instruction_mov45 -PUBLIC CryptonightR_instruction_mov46 -PUBLIC CryptonightR_instruction_mov47 -PUBLIC CryptonightR_instruction_mov48 -PUBLIC CryptonightR_instruction_mov49 -PUBLIC CryptonightR_instruction_mov50 -PUBLIC CryptonightR_instruction_mov51 -PUBLIC CryptonightR_instruction_mov52 -PUBLIC CryptonightR_instruction_mov53 -PUBLIC CryptonightR_instruction_mov54 -PUBLIC CryptonightR_instruction_mov55 -PUBLIC CryptonightR_instruction_mov56 -PUBLIC CryptonightR_instruction_mov57 -PUBLIC CryptonightR_instruction_mov58 -PUBLIC CryptonightR_instruction_mov59 -PUBLIC CryptonightR_instruction_mov60 -PUBLIC CryptonightR_instruction_mov61 -PUBLIC CryptonightR_instruction_mov62 -PUBLIC CryptonightR_instruction_mov63 -PUBLIC CryptonightR_instruction_mov64 -PUBLIC CryptonightR_instruction_mov65 -PUBLIC CryptonightR_instruction_mov66 -PUBLIC CryptonightR_instruction_mov67 -PUBLIC CryptonightR_instruction_mov68 -PUBLIC CryptonightR_instruction_mov69 -PUBLIC CryptonightR_instruction_mov70 -PUBLIC CryptonightR_instruction_mov71 -PUBLIC CryptonightR_instruction_mov72 -PUBLIC CryptonightR_instruction_mov73 -PUBLIC CryptonightR_instruction_mov74 -PUBLIC CryptonightR_instruction_mov75 -PUBLIC CryptonightR_instruction_mov76 -PUBLIC CryptonightR_instruction_mov77 -PUBLIC CryptonightR_instruction_mov78 -PUBLIC CryptonightR_instruction_mov79 -PUBLIC CryptonightR_instruction_mov80 -PUBLIC CryptonightR_instruction_mov81 -PUBLIC CryptonightR_instruction_mov82 -PUBLIC CryptonightR_instruction_mov83 -PUBLIC CryptonightR_instruction_mov84 -PUBLIC CryptonightR_instruction_mov85 -PUBLIC CryptonightR_instruction_mov86 -PUBLIC CryptonightR_instruction_mov87 -PUBLIC CryptonightR_instruction_mov88 -PUBLIC CryptonightR_instruction_mov89 -PUBLIC CryptonightR_instruction_mov90 -PUBLIC CryptonightR_instruction_mov91 -PUBLIC CryptonightR_instruction_mov92 -PUBLIC CryptonightR_instruction_mov93 -PUBLIC CryptonightR_instruction_mov94 -PUBLIC CryptonightR_instruction_mov95 -PUBLIC CryptonightR_instruction_mov96 -PUBLIC CryptonightR_instruction_mov97 -PUBLIC CryptonightR_instruction_mov98 -PUBLIC CryptonightR_instruction_mov99 -PUBLIC CryptonightR_instruction_mov100 -PUBLIC CryptonightR_instruction_mov101 -PUBLIC CryptonightR_instruction_mov102 -PUBLIC CryptonightR_instruction_mov103 -PUBLIC CryptonightR_instruction_mov104 -PUBLIC CryptonightR_instruction_mov105 -PUBLIC CryptonightR_instruction_mov106 -PUBLIC CryptonightR_instruction_mov107 -PUBLIC CryptonightR_instruction_mov108 -PUBLIC CryptonightR_instruction_mov109 -PUBLIC CryptonightR_instruction_mov110 -PUBLIC CryptonightR_instruction_mov111 -PUBLIC CryptonightR_instruction_mov112 -PUBLIC CryptonightR_instruction_mov113 -PUBLIC CryptonightR_instruction_mov114 -PUBLIC CryptonightR_instruction_mov115 -PUBLIC CryptonightR_instruction_mov116 -PUBLIC CryptonightR_instruction_mov117 -PUBLIC CryptonightR_instruction_mov118 -PUBLIC CryptonightR_instruction_mov119 -PUBLIC CryptonightR_instruction_mov120 -PUBLIC CryptonightR_instruction_mov121 -PUBLIC CryptonightR_instruction_mov122 -PUBLIC CryptonightR_instruction_mov123 -PUBLIC CryptonightR_instruction_mov124 -PUBLIC CryptonightR_instruction_mov125 -PUBLIC CryptonightR_instruction_mov126 -PUBLIC CryptonightR_instruction_mov127 -PUBLIC CryptonightR_instruction_mov128 -PUBLIC CryptonightR_instruction_mov129 -PUBLIC CryptonightR_instruction_mov130 -PUBLIC CryptonightR_instruction_mov131 -PUBLIC CryptonightR_instruction_mov132 -PUBLIC CryptonightR_instruction_mov133 -PUBLIC CryptonightR_instruction_mov134 -PUBLIC CryptonightR_instruction_mov135 -PUBLIC CryptonightR_instruction_mov136 -PUBLIC CryptonightR_instruction_mov137 -PUBLIC CryptonightR_instruction_mov138 -PUBLIC CryptonightR_instruction_mov139 -PUBLIC CryptonightR_instruction_mov140 -PUBLIC CryptonightR_instruction_mov141 -PUBLIC CryptonightR_instruction_mov142 -PUBLIC CryptonightR_instruction_mov143 -PUBLIC CryptonightR_instruction_mov144 -PUBLIC CryptonightR_instruction_mov145 -PUBLIC CryptonightR_instruction_mov146 -PUBLIC CryptonightR_instruction_mov147 -PUBLIC CryptonightR_instruction_mov148 -PUBLIC CryptonightR_instruction_mov149 -PUBLIC CryptonightR_instruction_mov150 -PUBLIC CryptonightR_instruction_mov151 -PUBLIC CryptonightR_instruction_mov152 -PUBLIC CryptonightR_instruction_mov153 -PUBLIC CryptonightR_instruction_mov154 -PUBLIC CryptonightR_instruction_mov155 -PUBLIC CryptonightR_instruction_mov156 -PUBLIC CryptonightR_instruction_mov157 -PUBLIC CryptonightR_instruction_mov158 -PUBLIC CryptonightR_instruction_mov159 -PUBLIC CryptonightR_instruction_mov160 -PUBLIC CryptonightR_instruction_mov161 -PUBLIC CryptonightR_instruction_mov162 -PUBLIC CryptonightR_instruction_mov163 -PUBLIC CryptonightR_instruction_mov164 -PUBLIC CryptonightR_instruction_mov165 -PUBLIC CryptonightR_instruction_mov166 -PUBLIC CryptonightR_instruction_mov167 -PUBLIC CryptonightR_instruction_mov168 -PUBLIC CryptonightR_instruction_mov169 -PUBLIC CryptonightR_instruction_mov170 -PUBLIC CryptonightR_instruction_mov171 -PUBLIC CryptonightR_instruction_mov172 -PUBLIC CryptonightR_instruction_mov173 -PUBLIC CryptonightR_instruction_mov174 -PUBLIC CryptonightR_instruction_mov175 -PUBLIC CryptonightR_instruction_mov176 -PUBLIC CryptonightR_instruction_mov177 -PUBLIC CryptonightR_instruction_mov178 -PUBLIC CryptonightR_instruction_mov179 -PUBLIC CryptonightR_instruction_mov180 -PUBLIC CryptonightR_instruction_mov181 -PUBLIC CryptonightR_instruction_mov182 -PUBLIC CryptonightR_instruction_mov183 -PUBLIC CryptonightR_instruction_mov184 -PUBLIC CryptonightR_instruction_mov185 -PUBLIC CryptonightR_instruction_mov186 -PUBLIC CryptonightR_instruction_mov187 -PUBLIC CryptonightR_instruction_mov188 -PUBLIC CryptonightR_instruction_mov189 -PUBLIC CryptonightR_instruction_mov190 -PUBLIC CryptonightR_instruction_mov191 -PUBLIC CryptonightR_instruction_mov192 -PUBLIC CryptonightR_instruction_mov193 -PUBLIC CryptonightR_instruction_mov194 -PUBLIC CryptonightR_instruction_mov195 -PUBLIC CryptonightR_instruction_mov196 -PUBLIC CryptonightR_instruction_mov197 -PUBLIC CryptonightR_instruction_mov198 -PUBLIC CryptonightR_instruction_mov199 -PUBLIC CryptonightR_instruction_mov200 -PUBLIC CryptonightR_instruction_mov201 -PUBLIC CryptonightR_instruction_mov202 -PUBLIC CryptonightR_instruction_mov203 -PUBLIC CryptonightR_instruction_mov204 -PUBLIC CryptonightR_instruction_mov205 -PUBLIC CryptonightR_instruction_mov206 -PUBLIC CryptonightR_instruction_mov207 -PUBLIC CryptonightR_instruction_mov208 -PUBLIC CryptonightR_instruction_mov209 -PUBLIC CryptonightR_instruction_mov210 -PUBLIC CryptonightR_instruction_mov211 -PUBLIC CryptonightR_instruction_mov212 -PUBLIC CryptonightR_instruction_mov213 -PUBLIC CryptonightR_instruction_mov214 -PUBLIC CryptonightR_instruction_mov215 -PUBLIC CryptonightR_instruction_mov216 -PUBLIC CryptonightR_instruction_mov217 -PUBLIC CryptonightR_instruction_mov218 -PUBLIC CryptonightR_instruction_mov219 -PUBLIC CryptonightR_instruction_mov220 -PUBLIC CryptonightR_instruction_mov221 -PUBLIC CryptonightR_instruction_mov222 -PUBLIC CryptonightR_instruction_mov223 -PUBLIC CryptonightR_instruction_mov224 -PUBLIC CryptonightR_instruction_mov225 -PUBLIC CryptonightR_instruction_mov226 -PUBLIC CryptonightR_instruction_mov227 -PUBLIC CryptonightR_instruction_mov228 -PUBLIC CryptonightR_instruction_mov229 -PUBLIC CryptonightR_instruction_mov230 -PUBLIC CryptonightR_instruction_mov231 -PUBLIC CryptonightR_instruction_mov232 -PUBLIC CryptonightR_instruction_mov233 -PUBLIC CryptonightR_instruction_mov234 -PUBLIC CryptonightR_instruction_mov235 -PUBLIC CryptonightR_instruction_mov236 -PUBLIC CryptonightR_instruction_mov237 -PUBLIC CryptonightR_instruction_mov238 -PUBLIC CryptonightR_instruction_mov239 -PUBLIC CryptonightR_instruction_mov240 -PUBLIC CryptonightR_instruction_mov241 -PUBLIC CryptonightR_instruction_mov242 -PUBLIC CryptonightR_instruction_mov243 -PUBLIC CryptonightR_instruction_mov244 -PUBLIC CryptonightR_instruction_mov245 -PUBLIC CryptonightR_instruction_mov246 -PUBLIC CryptonightR_instruction_mov247 -PUBLIC CryptonightR_instruction_mov248 -PUBLIC CryptonightR_instruction_mov249 -PUBLIC CryptonightR_instruction_mov250 -PUBLIC CryptonightR_instruction_mov251 -PUBLIC CryptonightR_instruction_mov252 -PUBLIC CryptonightR_instruction_mov253 -PUBLIC CryptonightR_instruction_mov254 -PUBLIC CryptonightR_instruction_mov255 -PUBLIC CryptonightR_instruction_mov256 - -INCLUDE CryptonightWOW_template_win.inc -INCLUDE CryptonightR_template_win.inc -INCLUDE CryptonightWOW_soft_aes_template_win.inc -INCLUDE CryptonightR_soft_aes_template_win.inc - -CryptonightR_instruction0: - imul rbx, rbx -CryptonightR_instruction1: - imul rbx, rbx -CryptonightR_instruction2: - imul rbx, rbx -CryptonightR_instruction3: - add rbx, r9 - add rbx, 2147483647 -CryptonightR_instruction4: - sub rbx, r9 -CryptonightR_instruction5: - ror ebx, cl -CryptonightR_instruction6: - rol ebx, cl -CryptonightR_instruction7: - xor rbx, r9 -CryptonightR_instruction8: - imul rsi, rbx -CryptonightR_instruction9: - imul rsi, rbx -CryptonightR_instruction10: - imul rsi, rbx -CryptonightR_instruction11: - add rsi, rbx - add rsi, 2147483647 -CryptonightR_instruction12: - sub rsi, rbx -CryptonightR_instruction13: - ror esi, cl -CryptonightR_instruction14: - rol esi, cl -CryptonightR_instruction15: - xor rsi, rbx -CryptonightR_instruction16: - imul rdi, rbx -CryptonightR_instruction17: - imul rdi, rbx -CryptonightR_instruction18: - imul rdi, rbx -CryptonightR_instruction19: - add rdi, rbx - add rdi, 2147483647 -CryptonightR_instruction20: - sub rdi, rbx -CryptonightR_instruction21: - ror edi, cl -CryptonightR_instruction22: - rol edi, cl -CryptonightR_instruction23: - xor rdi, rbx -CryptonightR_instruction24: - imul rbp, rbx -CryptonightR_instruction25: - imul rbp, rbx -CryptonightR_instruction26: - imul rbp, rbx -CryptonightR_instruction27: - add rbp, rbx - add rbp, 2147483647 -CryptonightR_instruction28: - sub rbp, rbx -CryptonightR_instruction29: - ror ebp, cl -CryptonightR_instruction30: - rol ebp, cl -CryptonightR_instruction31: - xor rbp, rbx -CryptonightR_instruction32: - imul rbx, rsi -CryptonightR_instruction33: - imul rbx, rsi -CryptonightR_instruction34: - imul rbx, rsi -CryptonightR_instruction35: - add rbx, rsi - add rbx, 2147483647 -CryptonightR_instruction36: - sub rbx, rsi -CryptonightR_instruction37: - ror ebx, cl -CryptonightR_instruction38: - rol ebx, cl -CryptonightR_instruction39: - xor rbx, rsi -CryptonightR_instruction40: - imul rsi, rsi -CryptonightR_instruction41: - imul rsi, rsi -CryptonightR_instruction42: - imul rsi, rsi -CryptonightR_instruction43: - add rsi, r9 - add rsi, 2147483647 -CryptonightR_instruction44: - sub rsi, r9 -CryptonightR_instruction45: - ror esi, cl -CryptonightR_instruction46: - rol esi, cl -CryptonightR_instruction47: - xor rsi, r9 -CryptonightR_instruction48: - imul rdi, rsi -CryptonightR_instruction49: - imul rdi, rsi -CryptonightR_instruction50: - imul rdi, rsi -CryptonightR_instruction51: - add rdi, rsi - add rdi, 2147483647 -CryptonightR_instruction52: - sub rdi, rsi -CryptonightR_instruction53: - ror edi, cl -CryptonightR_instruction54: - rol edi, cl -CryptonightR_instruction55: - xor rdi, rsi -CryptonightR_instruction56: - imul rbp, rsi -CryptonightR_instruction57: - imul rbp, rsi -CryptonightR_instruction58: - imul rbp, rsi -CryptonightR_instruction59: - add rbp, rsi - add rbp, 2147483647 -CryptonightR_instruction60: - sub rbp, rsi -CryptonightR_instruction61: - ror ebp, cl -CryptonightR_instruction62: - rol ebp, cl -CryptonightR_instruction63: - xor rbp, rsi -CryptonightR_instruction64: - imul rbx, rdi -CryptonightR_instruction65: - imul rbx, rdi -CryptonightR_instruction66: - imul rbx, rdi -CryptonightR_instruction67: - add rbx, rdi - add rbx, 2147483647 -CryptonightR_instruction68: - sub rbx, rdi -CryptonightR_instruction69: - ror ebx, cl -CryptonightR_instruction70: - rol ebx, cl -CryptonightR_instruction71: - xor rbx, rdi -CryptonightR_instruction72: - imul rsi, rdi -CryptonightR_instruction73: - imul rsi, rdi -CryptonightR_instruction74: - imul rsi, rdi -CryptonightR_instruction75: - add rsi, rdi - add rsi, 2147483647 -CryptonightR_instruction76: - sub rsi, rdi -CryptonightR_instruction77: - ror esi, cl -CryptonightR_instruction78: - rol esi, cl -CryptonightR_instruction79: - xor rsi, rdi -CryptonightR_instruction80: - imul rdi, rdi -CryptonightR_instruction81: - imul rdi, rdi -CryptonightR_instruction82: - imul rdi, rdi -CryptonightR_instruction83: - add rdi, r9 - add rdi, 2147483647 -CryptonightR_instruction84: - sub rdi, r9 -CryptonightR_instruction85: - ror edi, cl -CryptonightR_instruction86: - rol edi, cl -CryptonightR_instruction87: - xor rdi, r9 -CryptonightR_instruction88: - imul rbp, rdi -CryptonightR_instruction89: - imul rbp, rdi -CryptonightR_instruction90: - imul rbp, rdi -CryptonightR_instruction91: - add rbp, rdi - add rbp, 2147483647 -CryptonightR_instruction92: - sub rbp, rdi -CryptonightR_instruction93: - ror ebp, cl -CryptonightR_instruction94: - rol ebp, cl -CryptonightR_instruction95: - xor rbp, rdi -CryptonightR_instruction96: - imul rbx, rbp -CryptonightR_instruction97: - imul rbx, rbp -CryptonightR_instruction98: - imul rbx, rbp -CryptonightR_instruction99: - add rbx, rbp - add rbx, 2147483647 -CryptonightR_instruction100: - sub rbx, rbp -CryptonightR_instruction101: - ror ebx, cl -CryptonightR_instruction102: - rol ebx, cl -CryptonightR_instruction103: - xor rbx, rbp -CryptonightR_instruction104: - imul rsi, rbp -CryptonightR_instruction105: - imul rsi, rbp -CryptonightR_instruction106: - imul rsi, rbp -CryptonightR_instruction107: - add rsi, rbp - add rsi, 2147483647 -CryptonightR_instruction108: - sub rsi, rbp -CryptonightR_instruction109: - ror esi, cl -CryptonightR_instruction110: - rol esi, cl -CryptonightR_instruction111: - xor rsi, rbp -CryptonightR_instruction112: - imul rdi, rbp -CryptonightR_instruction113: - imul rdi, rbp -CryptonightR_instruction114: - imul rdi, rbp -CryptonightR_instruction115: - add rdi, rbp - add rdi, 2147483647 -CryptonightR_instruction116: - sub rdi, rbp -CryptonightR_instruction117: - ror edi, cl -CryptonightR_instruction118: - rol edi, cl -CryptonightR_instruction119: - xor rdi, rbp -CryptonightR_instruction120: - imul rbp, rbp -CryptonightR_instruction121: - imul rbp, rbp -CryptonightR_instruction122: - imul rbp, rbp -CryptonightR_instruction123: - add rbp, r9 - add rbp, 2147483647 -CryptonightR_instruction124: - sub rbp, r9 -CryptonightR_instruction125: - ror ebp, cl -CryptonightR_instruction126: - rol ebp, cl -CryptonightR_instruction127: - xor rbp, r9 -CryptonightR_instruction128: - imul rbx, rsp -CryptonightR_instruction129: - imul rbx, rsp -CryptonightR_instruction130: - imul rbx, rsp -CryptonightR_instruction131: - add rbx, rsp - add rbx, 2147483647 -CryptonightR_instruction132: - sub rbx, rsp -CryptonightR_instruction133: - ror ebx, cl -CryptonightR_instruction134: - rol ebx, cl -CryptonightR_instruction135: - xor rbx, rsp -CryptonightR_instruction136: - imul rsi, rsp -CryptonightR_instruction137: - imul rsi, rsp -CryptonightR_instruction138: - imul rsi, rsp -CryptonightR_instruction139: - add rsi, rsp - add rsi, 2147483647 -CryptonightR_instruction140: - sub rsi, rsp -CryptonightR_instruction141: - ror esi, cl -CryptonightR_instruction142: - rol esi, cl -CryptonightR_instruction143: - xor rsi, rsp -CryptonightR_instruction144: - imul rdi, rsp -CryptonightR_instruction145: - imul rdi, rsp -CryptonightR_instruction146: - imul rdi, rsp -CryptonightR_instruction147: - add rdi, rsp - add rdi, 2147483647 -CryptonightR_instruction148: - sub rdi, rsp -CryptonightR_instruction149: - ror edi, cl -CryptonightR_instruction150: - rol edi, cl -CryptonightR_instruction151: - xor rdi, rsp -CryptonightR_instruction152: - imul rbp, rsp -CryptonightR_instruction153: - imul rbp, rsp -CryptonightR_instruction154: - imul rbp, rsp -CryptonightR_instruction155: - add rbp, rsp - add rbp, 2147483647 -CryptonightR_instruction156: - sub rbp, rsp -CryptonightR_instruction157: - ror ebp, cl -CryptonightR_instruction158: - rol ebp, cl -CryptonightR_instruction159: - xor rbp, rsp -CryptonightR_instruction160: - imul rbx, r15 -CryptonightR_instruction161: - imul rbx, r15 -CryptonightR_instruction162: - imul rbx, r15 -CryptonightR_instruction163: - add rbx, r15 - add rbx, 2147483647 -CryptonightR_instruction164: - sub rbx, r15 -CryptonightR_instruction165: - ror ebx, cl -CryptonightR_instruction166: - rol ebx, cl -CryptonightR_instruction167: - xor rbx, r15 -CryptonightR_instruction168: - imul rsi, r15 -CryptonightR_instruction169: - imul rsi, r15 -CryptonightR_instruction170: - imul rsi, r15 -CryptonightR_instruction171: - add rsi, r15 - add rsi, 2147483647 -CryptonightR_instruction172: - sub rsi, r15 -CryptonightR_instruction173: - ror esi, cl -CryptonightR_instruction174: - rol esi, cl -CryptonightR_instruction175: - xor rsi, r15 -CryptonightR_instruction176: - imul rdi, r15 -CryptonightR_instruction177: - imul rdi, r15 -CryptonightR_instruction178: - imul rdi, r15 -CryptonightR_instruction179: - add rdi, r15 - add rdi, 2147483647 -CryptonightR_instruction180: - sub rdi, r15 -CryptonightR_instruction181: - ror edi, cl -CryptonightR_instruction182: - rol edi, cl -CryptonightR_instruction183: - xor rdi, r15 -CryptonightR_instruction184: - imul rbp, r15 -CryptonightR_instruction185: - imul rbp, r15 -CryptonightR_instruction186: - imul rbp, r15 -CryptonightR_instruction187: - add rbp, r15 - add rbp, 2147483647 -CryptonightR_instruction188: - sub rbp, r15 -CryptonightR_instruction189: - ror ebp, cl -CryptonightR_instruction190: - rol ebp, cl -CryptonightR_instruction191: - xor rbp, r15 -CryptonightR_instruction192: - imul rbx, rax -CryptonightR_instruction193: - imul rbx, rax -CryptonightR_instruction194: - imul rbx, rax -CryptonightR_instruction195: - add rbx, rax - add rbx, 2147483647 -CryptonightR_instruction196: - sub rbx, rax -CryptonightR_instruction197: - ror ebx, cl -CryptonightR_instruction198: - rol ebx, cl -CryptonightR_instruction199: - xor rbx, rax -CryptonightR_instruction200: - imul rsi, rax -CryptonightR_instruction201: - imul rsi, rax -CryptonightR_instruction202: - imul rsi, rax -CryptonightR_instruction203: - add rsi, rax - add rsi, 2147483647 -CryptonightR_instruction204: - sub rsi, rax -CryptonightR_instruction205: - ror esi, cl -CryptonightR_instruction206: - rol esi, cl -CryptonightR_instruction207: - xor rsi, rax -CryptonightR_instruction208: - imul rdi, rax -CryptonightR_instruction209: - imul rdi, rax -CryptonightR_instruction210: - imul rdi, rax -CryptonightR_instruction211: - add rdi, rax - add rdi, 2147483647 -CryptonightR_instruction212: - sub rdi, rax -CryptonightR_instruction213: - ror edi, cl -CryptonightR_instruction214: - rol edi, cl -CryptonightR_instruction215: - xor rdi, rax -CryptonightR_instruction216: - imul rbp, rax -CryptonightR_instruction217: - imul rbp, rax -CryptonightR_instruction218: - imul rbp, rax -CryptonightR_instruction219: - add rbp, rax - add rbp, 2147483647 -CryptonightR_instruction220: - sub rbp, rax -CryptonightR_instruction221: - ror ebp, cl -CryptonightR_instruction222: - rol ebp, cl -CryptonightR_instruction223: - xor rbp, rax -CryptonightR_instruction224: - imul rbx, rdx -CryptonightR_instruction225: - imul rbx, rdx -CryptonightR_instruction226: - imul rbx, rdx -CryptonightR_instruction227: - add rbx, rdx - add rbx, 2147483647 -CryptonightR_instruction228: - sub rbx, rdx -CryptonightR_instruction229: - ror ebx, cl -CryptonightR_instruction230: - rol ebx, cl -CryptonightR_instruction231: - xor rbx, rdx -CryptonightR_instruction232: - imul rsi, rdx -CryptonightR_instruction233: - imul rsi, rdx -CryptonightR_instruction234: - imul rsi, rdx -CryptonightR_instruction235: - add rsi, rdx - add rsi, 2147483647 -CryptonightR_instruction236: - sub rsi, rdx -CryptonightR_instruction237: - ror esi, cl -CryptonightR_instruction238: - rol esi, cl -CryptonightR_instruction239: - xor rsi, rdx -CryptonightR_instruction240: - imul rdi, rdx -CryptonightR_instruction241: - imul rdi, rdx -CryptonightR_instruction242: - imul rdi, rdx -CryptonightR_instruction243: - add rdi, rdx - add rdi, 2147483647 -CryptonightR_instruction244: - sub rdi, rdx -CryptonightR_instruction245: - ror edi, cl -CryptonightR_instruction246: - rol edi, cl -CryptonightR_instruction247: - xor rdi, rdx -CryptonightR_instruction248: - imul rbp, rdx -CryptonightR_instruction249: - imul rbp, rdx -CryptonightR_instruction250: - imul rbp, rdx -CryptonightR_instruction251: - add rbp, rdx - add rbp, 2147483647 -CryptonightR_instruction252: - sub rbp, rdx -CryptonightR_instruction253: - ror ebp, cl -CryptonightR_instruction254: - rol ebp, cl -CryptonightR_instruction255: - xor rbp, rdx -CryptonightR_instruction256: - imul rbx, rbx -CryptonightR_instruction_mov0: - -CryptonightR_instruction_mov1: - -CryptonightR_instruction_mov2: - -CryptonightR_instruction_mov3: - -CryptonightR_instruction_mov4: - -CryptonightR_instruction_mov5: - mov rcx, rbx -CryptonightR_instruction_mov6: - mov rcx, rbx -CryptonightR_instruction_mov7: - -CryptonightR_instruction_mov8: - -CryptonightR_instruction_mov9: - -CryptonightR_instruction_mov10: - -CryptonightR_instruction_mov11: - -CryptonightR_instruction_mov12: - -CryptonightR_instruction_mov13: - mov rcx, rbx -CryptonightR_instruction_mov14: - mov rcx, rbx -CryptonightR_instruction_mov15: - -CryptonightR_instruction_mov16: - -CryptonightR_instruction_mov17: - -CryptonightR_instruction_mov18: - -CryptonightR_instruction_mov19: - -CryptonightR_instruction_mov20: - -CryptonightR_instruction_mov21: - mov rcx, rbx -CryptonightR_instruction_mov22: - mov rcx, rbx -CryptonightR_instruction_mov23: - -CryptonightR_instruction_mov24: - -CryptonightR_instruction_mov25: - -CryptonightR_instruction_mov26: - -CryptonightR_instruction_mov27: - -CryptonightR_instruction_mov28: - -CryptonightR_instruction_mov29: - mov rcx, rbx -CryptonightR_instruction_mov30: - mov rcx, rbx -CryptonightR_instruction_mov31: - -CryptonightR_instruction_mov32: - -CryptonightR_instruction_mov33: - -CryptonightR_instruction_mov34: - -CryptonightR_instruction_mov35: - -CryptonightR_instruction_mov36: - -CryptonightR_instruction_mov37: - mov rcx, rsi -CryptonightR_instruction_mov38: - mov rcx, rsi -CryptonightR_instruction_mov39: - -CryptonightR_instruction_mov40: - -CryptonightR_instruction_mov41: - -CryptonightR_instruction_mov42: - -CryptonightR_instruction_mov43: - -CryptonightR_instruction_mov44: - -CryptonightR_instruction_mov45: - mov rcx, rsi -CryptonightR_instruction_mov46: - mov rcx, rsi -CryptonightR_instruction_mov47: - -CryptonightR_instruction_mov48: - -CryptonightR_instruction_mov49: - -CryptonightR_instruction_mov50: - -CryptonightR_instruction_mov51: - -CryptonightR_instruction_mov52: - -CryptonightR_instruction_mov53: - mov rcx, rsi -CryptonightR_instruction_mov54: - mov rcx, rsi -CryptonightR_instruction_mov55: - -CryptonightR_instruction_mov56: - -CryptonightR_instruction_mov57: - -CryptonightR_instruction_mov58: - -CryptonightR_instruction_mov59: - -CryptonightR_instruction_mov60: - -CryptonightR_instruction_mov61: - mov rcx, rsi -CryptonightR_instruction_mov62: - mov rcx, rsi -CryptonightR_instruction_mov63: - -CryptonightR_instruction_mov64: - -CryptonightR_instruction_mov65: - -CryptonightR_instruction_mov66: - -CryptonightR_instruction_mov67: - -CryptonightR_instruction_mov68: - -CryptonightR_instruction_mov69: - mov rcx, rdi -CryptonightR_instruction_mov70: - mov rcx, rdi -CryptonightR_instruction_mov71: - -CryptonightR_instruction_mov72: - -CryptonightR_instruction_mov73: - -CryptonightR_instruction_mov74: - -CryptonightR_instruction_mov75: - -CryptonightR_instruction_mov76: - -CryptonightR_instruction_mov77: - mov rcx, rdi -CryptonightR_instruction_mov78: - mov rcx, rdi -CryptonightR_instruction_mov79: - -CryptonightR_instruction_mov80: - -CryptonightR_instruction_mov81: - -CryptonightR_instruction_mov82: - -CryptonightR_instruction_mov83: - -CryptonightR_instruction_mov84: - -CryptonightR_instruction_mov85: - mov rcx, rdi -CryptonightR_instruction_mov86: - mov rcx, rdi -CryptonightR_instruction_mov87: - -CryptonightR_instruction_mov88: - -CryptonightR_instruction_mov89: - -CryptonightR_instruction_mov90: - -CryptonightR_instruction_mov91: - -CryptonightR_instruction_mov92: - -CryptonightR_instruction_mov93: - mov rcx, rdi -CryptonightR_instruction_mov94: - mov rcx, rdi -CryptonightR_instruction_mov95: - -CryptonightR_instruction_mov96: - -CryptonightR_instruction_mov97: - -CryptonightR_instruction_mov98: - -CryptonightR_instruction_mov99: - -CryptonightR_instruction_mov100: - -CryptonightR_instruction_mov101: - mov rcx, rbp -CryptonightR_instruction_mov102: - mov rcx, rbp -CryptonightR_instruction_mov103: - -CryptonightR_instruction_mov104: - -CryptonightR_instruction_mov105: - -CryptonightR_instruction_mov106: - -CryptonightR_instruction_mov107: - -CryptonightR_instruction_mov108: - -CryptonightR_instruction_mov109: - mov rcx, rbp -CryptonightR_instruction_mov110: - mov rcx, rbp -CryptonightR_instruction_mov111: - -CryptonightR_instruction_mov112: - -CryptonightR_instruction_mov113: - -CryptonightR_instruction_mov114: - -CryptonightR_instruction_mov115: - -CryptonightR_instruction_mov116: - -CryptonightR_instruction_mov117: - mov rcx, rbp -CryptonightR_instruction_mov118: - mov rcx, rbp -CryptonightR_instruction_mov119: - -CryptonightR_instruction_mov120: - -CryptonightR_instruction_mov121: - -CryptonightR_instruction_mov122: - -CryptonightR_instruction_mov123: - -CryptonightR_instruction_mov124: - -CryptonightR_instruction_mov125: - mov rcx, rbp -CryptonightR_instruction_mov126: - mov rcx, rbp -CryptonightR_instruction_mov127: - -CryptonightR_instruction_mov128: - -CryptonightR_instruction_mov129: - -CryptonightR_instruction_mov130: - -CryptonightR_instruction_mov131: - -CryptonightR_instruction_mov132: - -CryptonightR_instruction_mov133: - mov rcx, rsp -CryptonightR_instruction_mov134: - mov rcx, rsp -CryptonightR_instruction_mov135: - -CryptonightR_instruction_mov136: - -CryptonightR_instruction_mov137: - -CryptonightR_instruction_mov138: - -CryptonightR_instruction_mov139: - -CryptonightR_instruction_mov140: - -CryptonightR_instruction_mov141: - mov rcx, rsp -CryptonightR_instruction_mov142: - mov rcx, rsp -CryptonightR_instruction_mov143: - -CryptonightR_instruction_mov144: - -CryptonightR_instruction_mov145: - -CryptonightR_instruction_mov146: - -CryptonightR_instruction_mov147: - -CryptonightR_instruction_mov148: - -CryptonightR_instruction_mov149: - mov rcx, rsp -CryptonightR_instruction_mov150: - mov rcx, rsp -CryptonightR_instruction_mov151: - -CryptonightR_instruction_mov152: - -CryptonightR_instruction_mov153: - -CryptonightR_instruction_mov154: - -CryptonightR_instruction_mov155: - -CryptonightR_instruction_mov156: - -CryptonightR_instruction_mov157: - mov rcx, rsp -CryptonightR_instruction_mov158: - mov rcx, rsp -CryptonightR_instruction_mov159: - -CryptonightR_instruction_mov160: - -CryptonightR_instruction_mov161: - -CryptonightR_instruction_mov162: - -CryptonightR_instruction_mov163: - -CryptonightR_instruction_mov164: - -CryptonightR_instruction_mov165: - mov rcx, r15 -CryptonightR_instruction_mov166: - mov rcx, r15 -CryptonightR_instruction_mov167: - -CryptonightR_instruction_mov168: - -CryptonightR_instruction_mov169: - -CryptonightR_instruction_mov170: - -CryptonightR_instruction_mov171: - -CryptonightR_instruction_mov172: - -CryptonightR_instruction_mov173: - mov rcx, r15 -CryptonightR_instruction_mov174: - mov rcx, r15 -CryptonightR_instruction_mov175: - -CryptonightR_instruction_mov176: - -CryptonightR_instruction_mov177: - -CryptonightR_instruction_mov178: - -CryptonightR_instruction_mov179: - -CryptonightR_instruction_mov180: - -CryptonightR_instruction_mov181: - mov rcx, r15 -CryptonightR_instruction_mov182: - mov rcx, r15 -CryptonightR_instruction_mov183: - -CryptonightR_instruction_mov184: - -CryptonightR_instruction_mov185: - -CryptonightR_instruction_mov186: - -CryptonightR_instruction_mov187: - -CryptonightR_instruction_mov188: - -CryptonightR_instruction_mov189: - mov rcx, r15 -CryptonightR_instruction_mov190: - mov rcx, r15 -CryptonightR_instruction_mov191: - -CryptonightR_instruction_mov192: - -CryptonightR_instruction_mov193: - -CryptonightR_instruction_mov194: - -CryptonightR_instruction_mov195: - -CryptonightR_instruction_mov196: - -CryptonightR_instruction_mov197: - mov rcx, rax -CryptonightR_instruction_mov198: - mov rcx, rax -CryptonightR_instruction_mov199: - -CryptonightR_instruction_mov200: - -CryptonightR_instruction_mov201: - -CryptonightR_instruction_mov202: - -CryptonightR_instruction_mov203: - -CryptonightR_instruction_mov204: - -CryptonightR_instruction_mov205: - mov rcx, rax -CryptonightR_instruction_mov206: - mov rcx, rax -CryptonightR_instruction_mov207: - -CryptonightR_instruction_mov208: - -CryptonightR_instruction_mov209: - -CryptonightR_instruction_mov210: - -CryptonightR_instruction_mov211: - -CryptonightR_instruction_mov212: - -CryptonightR_instruction_mov213: - mov rcx, rax -CryptonightR_instruction_mov214: - mov rcx, rax -CryptonightR_instruction_mov215: - -CryptonightR_instruction_mov216: - -CryptonightR_instruction_mov217: - -CryptonightR_instruction_mov218: - -CryptonightR_instruction_mov219: - -CryptonightR_instruction_mov220: - -CryptonightR_instruction_mov221: - mov rcx, rax -CryptonightR_instruction_mov222: - mov rcx, rax -CryptonightR_instruction_mov223: - -CryptonightR_instruction_mov224: - -CryptonightR_instruction_mov225: - -CryptonightR_instruction_mov226: - -CryptonightR_instruction_mov227: - -CryptonightR_instruction_mov228: - -CryptonightR_instruction_mov229: - mov rcx, rdx -CryptonightR_instruction_mov230: - mov rcx, rdx -CryptonightR_instruction_mov231: - -CryptonightR_instruction_mov232: - -CryptonightR_instruction_mov233: - -CryptonightR_instruction_mov234: - -CryptonightR_instruction_mov235: - -CryptonightR_instruction_mov236: - -CryptonightR_instruction_mov237: - mov rcx, rdx -CryptonightR_instruction_mov238: - mov rcx, rdx -CryptonightR_instruction_mov239: - -CryptonightR_instruction_mov240: - -CryptonightR_instruction_mov241: - -CryptonightR_instruction_mov242: - -CryptonightR_instruction_mov243: - -CryptonightR_instruction_mov244: - -CryptonightR_instruction_mov245: - mov rcx, rdx -CryptonightR_instruction_mov246: - mov rcx, rdx -CryptonightR_instruction_mov247: - -CryptonightR_instruction_mov248: - -CryptonightR_instruction_mov249: - -CryptonightR_instruction_mov250: - -CryptonightR_instruction_mov251: - -CryptonightR_instruction_mov252: - -CryptonightR_instruction_mov253: - mov rcx, rdx -CryptonightR_instruction_mov254: - mov rcx, rdx -CryptonightR_instruction_mov255: - -CryptonightR_instruction_mov256: - -_TEXT_CN_TEMPLATE ENDS -END diff --git a/src/crypto/asm/CryptonightR_template.h b/src/crypto/asm/CryptonightR_template.h deleted file mode 100644 index d9159a8f..00000000 --- a/src/crypto/asm/CryptonightR_template.h +++ /dev/null @@ -1,1087 +0,0 @@ -// Auto-generated file, do not edit - -extern "C" -{ - void CryptonightWOW_template_part1(); - void CryptonightWOW_template_mainloop(); - void CryptonightWOW_template_part2(); - void CryptonightWOW_template_part3(); - void CryptonightWOW_template_end(); - void CryptonightWOW_template_double_part1(); - void CryptonightWOW_template_double_mainloop(); - void CryptonightWOW_template_double_part2(); - void CryptonightWOW_template_double_part3(); - void CryptonightWOW_template_double_part4(); - void CryptonightWOW_template_double_end(); - - void CryptonightR_template_part1(); - void CryptonightR_template_mainloop(); - void CryptonightR_template_part2(); - void CryptonightR_template_part3(); - void CryptonightR_template_end(); - void CryptonightR_template_double_part1(); - void CryptonightR_template_double_mainloop(); - void CryptonightR_template_double_part2(); - void CryptonightR_template_double_part3(); - void CryptonightR_template_double_part4(); - void CryptonightR_template_double_end(); - - void CryptonightWOW_soft_aes_template_part1(); - void CryptonightWOW_soft_aes_template_mainloop(); - void CryptonightWOW_soft_aes_template_part2(); - void CryptonightWOW_soft_aes_template_part3(); - void CryptonightWOW_soft_aes_template_end(); - void CryptonightWOW_soft_aes_template_double_part1(); - void CryptonightWOW_soft_aes_template_double_mainloop(); - void CryptonightWOW_soft_aes_template_double_part2(); - void CryptonightWOW_soft_aes_template_double_part3(); - void CryptonightWOW_soft_aes_template_double_part4(); - void CryptonightWOW_soft_aes_template_double_end(); - - void CryptonightR_soft_aes_template_part1(); - void CryptonightR_soft_aes_template_mainloop(); - void CryptonightR_soft_aes_template_part2(); - void CryptonightR_soft_aes_template_part3(); - void CryptonightR_soft_aes_template_end(); - void CryptonightR_soft_aes_template_double_part1(); - void CryptonightR_soft_aes_template_double_mainloop(); - void CryptonightR_soft_aes_template_double_part2(); - void CryptonightR_soft_aes_template_double_part3(); - void CryptonightR_soft_aes_template_double_part4(); - void CryptonightR_soft_aes_template_double_end(); - - void CryptonightR_instruction0(); - void CryptonightR_instruction1(); - void CryptonightR_instruction2(); - void CryptonightR_instruction3(); - void CryptonightR_instruction4(); - void CryptonightR_instruction5(); - void CryptonightR_instruction6(); - void CryptonightR_instruction7(); - void CryptonightR_instruction8(); - void CryptonightR_instruction9(); - void CryptonightR_instruction10(); - void CryptonightR_instruction11(); - void CryptonightR_instruction12(); - void CryptonightR_instruction13(); - void CryptonightR_instruction14(); - void CryptonightR_instruction15(); - void CryptonightR_instruction16(); - void CryptonightR_instruction17(); - void CryptonightR_instruction18(); - void CryptonightR_instruction19(); - void CryptonightR_instruction20(); - void CryptonightR_instruction21(); - void CryptonightR_instruction22(); - void CryptonightR_instruction23(); - void CryptonightR_instruction24(); - void CryptonightR_instruction25(); - void CryptonightR_instruction26(); - void CryptonightR_instruction27(); - void CryptonightR_instruction28(); - void CryptonightR_instruction29(); - void CryptonightR_instruction30(); - void CryptonightR_instruction31(); - void CryptonightR_instruction32(); - void CryptonightR_instruction33(); - void CryptonightR_instruction34(); - void CryptonightR_instruction35(); - void CryptonightR_instruction36(); - void CryptonightR_instruction37(); - void CryptonightR_instruction38(); - void CryptonightR_instruction39(); - void CryptonightR_instruction40(); - void CryptonightR_instruction41(); - void CryptonightR_instruction42(); - void CryptonightR_instruction43(); - void CryptonightR_instruction44(); - void CryptonightR_instruction45(); - void CryptonightR_instruction46(); - void CryptonightR_instruction47(); - void CryptonightR_instruction48(); - void CryptonightR_instruction49(); - void CryptonightR_instruction50(); - void CryptonightR_instruction51(); - void CryptonightR_instruction52(); - void CryptonightR_instruction53(); - void CryptonightR_instruction54(); - void CryptonightR_instruction55(); - void CryptonightR_instruction56(); - void CryptonightR_instruction57(); - void CryptonightR_instruction58(); - void CryptonightR_instruction59(); - void CryptonightR_instruction60(); - void CryptonightR_instruction61(); - void CryptonightR_instruction62(); - void CryptonightR_instruction63(); - void CryptonightR_instruction64(); - void CryptonightR_instruction65(); - void CryptonightR_instruction66(); - void CryptonightR_instruction67(); - void CryptonightR_instruction68(); - void CryptonightR_instruction69(); - void CryptonightR_instruction70(); - void CryptonightR_instruction71(); - void CryptonightR_instruction72(); - void CryptonightR_instruction73(); - void CryptonightR_instruction74(); - void CryptonightR_instruction75(); - void CryptonightR_instruction76(); - void CryptonightR_instruction77(); - void CryptonightR_instruction78(); - void CryptonightR_instruction79(); - void CryptonightR_instruction80(); - void CryptonightR_instruction81(); - void CryptonightR_instruction82(); - void CryptonightR_instruction83(); - void CryptonightR_instruction84(); - void CryptonightR_instruction85(); - void CryptonightR_instruction86(); - void CryptonightR_instruction87(); - void CryptonightR_instruction88(); - void CryptonightR_instruction89(); - void CryptonightR_instruction90(); - void CryptonightR_instruction91(); - void CryptonightR_instruction92(); - void CryptonightR_instruction93(); - void CryptonightR_instruction94(); - void CryptonightR_instruction95(); - void CryptonightR_instruction96(); - void CryptonightR_instruction97(); - void CryptonightR_instruction98(); - void CryptonightR_instruction99(); - void CryptonightR_instruction100(); - void CryptonightR_instruction101(); - void CryptonightR_instruction102(); - void CryptonightR_instruction103(); - void CryptonightR_instruction104(); - void CryptonightR_instruction105(); - void CryptonightR_instruction106(); - void CryptonightR_instruction107(); - void CryptonightR_instruction108(); - void CryptonightR_instruction109(); - void CryptonightR_instruction110(); - void CryptonightR_instruction111(); - void CryptonightR_instruction112(); - void CryptonightR_instruction113(); - void CryptonightR_instruction114(); - void CryptonightR_instruction115(); - void CryptonightR_instruction116(); - void CryptonightR_instruction117(); - void CryptonightR_instruction118(); - void CryptonightR_instruction119(); - void CryptonightR_instruction120(); - void CryptonightR_instruction121(); - void CryptonightR_instruction122(); - void CryptonightR_instruction123(); - void CryptonightR_instruction124(); - void CryptonightR_instruction125(); - void CryptonightR_instruction126(); - void CryptonightR_instruction127(); - void CryptonightR_instruction128(); - void CryptonightR_instruction129(); - void CryptonightR_instruction130(); - void CryptonightR_instruction131(); - void CryptonightR_instruction132(); - void CryptonightR_instruction133(); - void CryptonightR_instruction134(); - void CryptonightR_instruction135(); - void CryptonightR_instruction136(); - void CryptonightR_instruction137(); - void CryptonightR_instruction138(); - void CryptonightR_instruction139(); - void CryptonightR_instruction140(); - void CryptonightR_instruction141(); - void CryptonightR_instruction142(); - void CryptonightR_instruction143(); - void CryptonightR_instruction144(); - void CryptonightR_instruction145(); - void CryptonightR_instruction146(); - void CryptonightR_instruction147(); - void CryptonightR_instruction148(); - void CryptonightR_instruction149(); - void CryptonightR_instruction150(); - void CryptonightR_instruction151(); - void CryptonightR_instruction152(); - void CryptonightR_instruction153(); - void CryptonightR_instruction154(); - void CryptonightR_instruction155(); - void CryptonightR_instruction156(); - void CryptonightR_instruction157(); - void CryptonightR_instruction158(); - void CryptonightR_instruction159(); - void CryptonightR_instruction160(); - void CryptonightR_instruction161(); - void CryptonightR_instruction162(); - void CryptonightR_instruction163(); - void CryptonightR_instruction164(); - void CryptonightR_instruction165(); - void CryptonightR_instruction166(); - void CryptonightR_instruction167(); - void CryptonightR_instruction168(); - void CryptonightR_instruction169(); - void CryptonightR_instruction170(); - void CryptonightR_instruction171(); - void CryptonightR_instruction172(); - void CryptonightR_instruction173(); - void CryptonightR_instruction174(); - void CryptonightR_instruction175(); - void CryptonightR_instruction176(); - void CryptonightR_instruction177(); - void CryptonightR_instruction178(); - void CryptonightR_instruction179(); - void CryptonightR_instruction180(); - void CryptonightR_instruction181(); - void CryptonightR_instruction182(); - void CryptonightR_instruction183(); - void CryptonightR_instruction184(); - void CryptonightR_instruction185(); - void CryptonightR_instruction186(); - void CryptonightR_instruction187(); - void CryptonightR_instruction188(); - void CryptonightR_instruction189(); - void CryptonightR_instruction190(); - void CryptonightR_instruction191(); - void CryptonightR_instruction192(); - void CryptonightR_instruction193(); - void CryptonightR_instruction194(); - void CryptonightR_instruction195(); - void CryptonightR_instruction196(); - void CryptonightR_instruction197(); - void CryptonightR_instruction198(); - void CryptonightR_instruction199(); - void CryptonightR_instruction200(); - void CryptonightR_instruction201(); - void CryptonightR_instruction202(); - void CryptonightR_instruction203(); - void CryptonightR_instruction204(); - void CryptonightR_instruction205(); - void CryptonightR_instruction206(); - void CryptonightR_instruction207(); - void CryptonightR_instruction208(); - void CryptonightR_instruction209(); - void CryptonightR_instruction210(); - void CryptonightR_instruction211(); - void CryptonightR_instruction212(); - void CryptonightR_instruction213(); - void CryptonightR_instruction214(); - void CryptonightR_instruction215(); - void CryptonightR_instruction216(); - void CryptonightR_instruction217(); - void CryptonightR_instruction218(); - void CryptonightR_instruction219(); - void CryptonightR_instruction220(); - void CryptonightR_instruction221(); - void CryptonightR_instruction222(); - void CryptonightR_instruction223(); - void CryptonightR_instruction224(); - void CryptonightR_instruction225(); - void CryptonightR_instruction226(); - void CryptonightR_instruction227(); - void CryptonightR_instruction228(); - void CryptonightR_instruction229(); - void CryptonightR_instruction230(); - void CryptonightR_instruction231(); - void CryptonightR_instruction232(); - void CryptonightR_instruction233(); - void CryptonightR_instruction234(); - void CryptonightR_instruction235(); - void CryptonightR_instruction236(); - void CryptonightR_instruction237(); - void CryptonightR_instruction238(); - void CryptonightR_instruction239(); - void CryptonightR_instruction240(); - void CryptonightR_instruction241(); - void CryptonightR_instruction242(); - void CryptonightR_instruction243(); - void CryptonightR_instruction244(); - void CryptonightR_instruction245(); - void CryptonightR_instruction246(); - void CryptonightR_instruction247(); - void CryptonightR_instruction248(); - void CryptonightR_instruction249(); - void CryptonightR_instruction250(); - void CryptonightR_instruction251(); - void CryptonightR_instruction252(); - void CryptonightR_instruction253(); - void CryptonightR_instruction254(); - void CryptonightR_instruction255(); - void CryptonightR_instruction256(); - void CryptonightR_instruction_mov0(); - void CryptonightR_instruction_mov1(); - void CryptonightR_instruction_mov2(); - void CryptonightR_instruction_mov3(); - void CryptonightR_instruction_mov4(); - void CryptonightR_instruction_mov5(); - void CryptonightR_instruction_mov6(); - void CryptonightR_instruction_mov7(); - void CryptonightR_instruction_mov8(); - void CryptonightR_instruction_mov9(); - void CryptonightR_instruction_mov10(); - void CryptonightR_instruction_mov11(); - void CryptonightR_instruction_mov12(); - void CryptonightR_instruction_mov13(); - void CryptonightR_instruction_mov14(); - void CryptonightR_instruction_mov15(); - void CryptonightR_instruction_mov16(); - void CryptonightR_instruction_mov17(); - void CryptonightR_instruction_mov18(); - void CryptonightR_instruction_mov19(); - void CryptonightR_instruction_mov20(); - void CryptonightR_instruction_mov21(); - void CryptonightR_instruction_mov22(); - void CryptonightR_instruction_mov23(); - void CryptonightR_instruction_mov24(); - void CryptonightR_instruction_mov25(); - void CryptonightR_instruction_mov26(); - void CryptonightR_instruction_mov27(); - void CryptonightR_instruction_mov28(); - void CryptonightR_instruction_mov29(); - void CryptonightR_instruction_mov30(); - void CryptonightR_instruction_mov31(); - void CryptonightR_instruction_mov32(); - void CryptonightR_instruction_mov33(); - void CryptonightR_instruction_mov34(); - void CryptonightR_instruction_mov35(); - void CryptonightR_instruction_mov36(); - void CryptonightR_instruction_mov37(); - void CryptonightR_instruction_mov38(); - void CryptonightR_instruction_mov39(); - void CryptonightR_instruction_mov40(); - void CryptonightR_instruction_mov41(); - void CryptonightR_instruction_mov42(); - void CryptonightR_instruction_mov43(); - void CryptonightR_instruction_mov44(); - void CryptonightR_instruction_mov45(); - void CryptonightR_instruction_mov46(); - void CryptonightR_instruction_mov47(); - void CryptonightR_instruction_mov48(); - void CryptonightR_instruction_mov49(); - void CryptonightR_instruction_mov50(); - void CryptonightR_instruction_mov51(); - void CryptonightR_instruction_mov52(); - void CryptonightR_instruction_mov53(); - void CryptonightR_instruction_mov54(); - void CryptonightR_instruction_mov55(); - void CryptonightR_instruction_mov56(); - void CryptonightR_instruction_mov57(); - void CryptonightR_instruction_mov58(); - void CryptonightR_instruction_mov59(); - void CryptonightR_instruction_mov60(); - void CryptonightR_instruction_mov61(); - void CryptonightR_instruction_mov62(); - void CryptonightR_instruction_mov63(); - void CryptonightR_instruction_mov64(); - void CryptonightR_instruction_mov65(); - void CryptonightR_instruction_mov66(); - void CryptonightR_instruction_mov67(); - void CryptonightR_instruction_mov68(); - void CryptonightR_instruction_mov69(); - void CryptonightR_instruction_mov70(); - void CryptonightR_instruction_mov71(); - void CryptonightR_instruction_mov72(); - void CryptonightR_instruction_mov73(); - void CryptonightR_instruction_mov74(); - void CryptonightR_instruction_mov75(); - void CryptonightR_instruction_mov76(); - void CryptonightR_instruction_mov77(); - void CryptonightR_instruction_mov78(); - void CryptonightR_instruction_mov79(); - void CryptonightR_instruction_mov80(); - void CryptonightR_instruction_mov81(); - void CryptonightR_instruction_mov82(); - void CryptonightR_instruction_mov83(); - void CryptonightR_instruction_mov84(); - void CryptonightR_instruction_mov85(); - void CryptonightR_instruction_mov86(); - void CryptonightR_instruction_mov87(); - void CryptonightR_instruction_mov88(); - void CryptonightR_instruction_mov89(); - void CryptonightR_instruction_mov90(); - void CryptonightR_instruction_mov91(); - void CryptonightR_instruction_mov92(); - void CryptonightR_instruction_mov93(); - void CryptonightR_instruction_mov94(); - void CryptonightR_instruction_mov95(); - void CryptonightR_instruction_mov96(); - void CryptonightR_instruction_mov97(); - void CryptonightR_instruction_mov98(); - void CryptonightR_instruction_mov99(); - void CryptonightR_instruction_mov100(); - void CryptonightR_instruction_mov101(); - void CryptonightR_instruction_mov102(); - void CryptonightR_instruction_mov103(); - void CryptonightR_instruction_mov104(); - void CryptonightR_instruction_mov105(); - void CryptonightR_instruction_mov106(); - void CryptonightR_instruction_mov107(); - void CryptonightR_instruction_mov108(); - void CryptonightR_instruction_mov109(); - void CryptonightR_instruction_mov110(); - void CryptonightR_instruction_mov111(); - void CryptonightR_instruction_mov112(); - void CryptonightR_instruction_mov113(); - void CryptonightR_instruction_mov114(); - void CryptonightR_instruction_mov115(); - void CryptonightR_instruction_mov116(); - void CryptonightR_instruction_mov117(); - void CryptonightR_instruction_mov118(); - void CryptonightR_instruction_mov119(); - void CryptonightR_instruction_mov120(); - void CryptonightR_instruction_mov121(); - void CryptonightR_instruction_mov122(); - void CryptonightR_instruction_mov123(); - void CryptonightR_instruction_mov124(); - void CryptonightR_instruction_mov125(); - void CryptonightR_instruction_mov126(); - void CryptonightR_instruction_mov127(); - void CryptonightR_instruction_mov128(); - void CryptonightR_instruction_mov129(); - void CryptonightR_instruction_mov130(); - void CryptonightR_instruction_mov131(); - void CryptonightR_instruction_mov132(); - void CryptonightR_instruction_mov133(); - void CryptonightR_instruction_mov134(); - void CryptonightR_instruction_mov135(); - void CryptonightR_instruction_mov136(); - void CryptonightR_instruction_mov137(); - void CryptonightR_instruction_mov138(); - void CryptonightR_instruction_mov139(); - void CryptonightR_instruction_mov140(); - void CryptonightR_instruction_mov141(); - void CryptonightR_instruction_mov142(); - void CryptonightR_instruction_mov143(); - void CryptonightR_instruction_mov144(); - void CryptonightR_instruction_mov145(); - void CryptonightR_instruction_mov146(); - void CryptonightR_instruction_mov147(); - void CryptonightR_instruction_mov148(); - void CryptonightR_instruction_mov149(); - void CryptonightR_instruction_mov150(); - void CryptonightR_instruction_mov151(); - void CryptonightR_instruction_mov152(); - void CryptonightR_instruction_mov153(); - void CryptonightR_instruction_mov154(); - void CryptonightR_instruction_mov155(); - void CryptonightR_instruction_mov156(); - void CryptonightR_instruction_mov157(); - void CryptonightR_instruction_mov158(); - void CryptonightR_instruction_mov159(); - void CryptonightR_instruction_mov160(); - void CryptonightR_instruction_mov161(); - void CryptonightR_instruction_mov162(); - void CryptonightR_instruction_mov163(); - void CryptonightR_instruction_mov164(); - void CryptonightR_instruction_mov165(); - void CryptonightR_instruction_mov166(); - void CryptonightR_instruction_mov167(); - void CryptonightR_instruction_mov168(); - void CryptonightR_instruction_mov169(); - void CryptonightR_instruction_mov170(); - void CryptonightR_instruction_mov171(); - void CryptonightR_instruction_mov172(); - void CryptonightR_instruction_mov173(); - void CryptonightR_instruction_mov174(); - void CryptonightR_instruction_mov175(); - void CryptonightR_instruction_mov176(); - void CryptonightR_instruction_mov177(); - void CryptonightR_instruction_mov178(); - void CryptonightR_instruction_mov179(); - void CryptonightR_instruction_mov180(); - void CryptonightR_instruction_mov181(); - void CryptonightR_instruction_mov182(); - void CryptonightR_instruction_mov183(); - void CryptonightR_instruction_mov184(); - void CryptonightR_instruction_mov185(); - void CryptonightR_instruction_mov186(); - void CryptonightR_instruction_mov187(); - void CryptonightR_instruction_mov188(); - void CryptonightR_instruction_mov189(); - void CryptonightR_instruction_mov190(); - void CryptonightR_instruction_mov191(); - void CryptonightR_instruction_mov192(); - void CryptonightR_instruction_mov193(); - void CryptonightR_instruction_mov194(); - void CryptonightR_instruction_mov195(); - void CryptonightR_instruction_mov196(); - void CryptonightR_instruction_mov197(); - void CryptonightR_instruction_mov198(); - void CryptonightR_instruction_mov199(); - void CryptonightR_instruction_mov200(); - void CryptonightR_instruction_mov201(); - void CryptonightR_instruction_mov202(); - void CryptonightR_instruction_mov203(); - void CryptonightR_instruction_mov204(); - void CryptonightR_instruction_mov205(); - void CryptonightR_instruction_mov206(); - void CryptonightR_instruction_mov207(); - void CryptonightR_instruction_mov208(); - void CryptonightR_instruction_mov209(); - void CryptonightR_instruction_mov210(); - void CryptonightR_instruction_mov211(); - void CryptonightR_instruction_mov212(); - void CryptonightR_instruction_mov213(); - void CryptonightR_instruction_mov214(); - void CryptonightR_instruction_mov215(); - void CryptonightR_instruction_mov216(); - void CryptonightR_instruction_mov217(); - void CryptonightR_instruction_mov218(); - void CryptonightR_instruction_mov219(); - void CryptonightR_instruction_mov220(); - void CryptonightR_instruction_mov221(); - void CryptonightR_instruction_mov222(); - void CryptonightR_instruction_mov223(); - void CryptonightR_instruction_mov224(); - void CryptonightR_instruction_mov225(); - void CryptonightR_instruction_mov226(); - void CryptonightR_instruction_mov227(); - void CryptonightR_instruction_mov228(); - void CryptonightR_instruction_mov229(); - void CryptonightR_instruction_mov230(); - void CryptonightR_instruction_mov231(); - void CryptonightR_instruction_mov232(); - void CryptonightR_instruction_mov233(); - void CryptonightR_instruction_mov234(); - void CryptonightR_instruction_mov235(); - void CryptonightR_instruction_mov236(); - void CryptonightR_instruction_mov237(); - void CryptonightR_instruction_mov238(); - void CryptonightR_instruction_mov239(); - void CryptonightR_instruction_mov240(); - void CryptonightR_instruction_mov241(); - void CryptonightR_instruction_mov242(); - void CryptonightR_instruction_mov243(); - void CryptonightR_instruction_mov244(); - void CryptonightR_instruction_mov245(); - void CryptonightR_instruction_mov246(); - void CryptonightR_instruction_mov247(); - void CryptonightR_instruction_mov248(); - void CryptonightR_instruction_mov249(); - void CryptonightR_instruction_mov250(); - void CryptonightR_instruction_mov251(); - void CryptonightR_instruction_mov252(); - void CryptonightR_instruction_mov253(); - void CryptonightR_instruction_mov254(); - void CryptonightR_instruction_mov255(); - void CryptonightR_instruction_mov256(); -} - -const void_func instructions[257] = { - CryptonightR_instruction0, - CryptonightR_instruction1, - CryptonightR_instruction2, - CryptonightR_instruction3, - CryptonightR_instruction4, - CryptonightR_instruction5, - CryptonightR_instruction6, - CryptonightR_instruction7, - CryptonightR_instruction8, - CryptonightR_instruction9, - CryptonightR_instruction10, - CryptonightR_instruction11, - CryptonightR_instruction12, - CryptonightR_instruction13, - CryptonightR_instruction14, - CryptonightR_instruction15, - CryptonightR_instruction16, - CryptonightR_instruction17, - CryptonightR_instruction18, - CryptonightR_instruction19, - CryptonightR_instruction20, - CryptonightR_instruction21, - CryptonightR_instruction22, - CryptonightR_instruction23, - CryptonightR_instruction24, - CryptonightR_instruction25, - CryptonightR_instruction26, - CryptonightR_instruction27, - CryptonightR_instruction28, - CryptonightR_instruction29, - CryptonightR_instruction30, - CryptonightR_instruction31, - CryptonightR_instruction32, - CryptonightR_instruction33, - CryptonightR_instruction34, - CryptonightR_instruction35, - CryptonightR_instruction36, - CryptonightR_instruction37, - CryptonightR_instruction38, - CryptonightR_instruction39, - CryptonightR_instruction40, - CryptonightR_instruction41, - CryptonightR_instruction42, - CryptonightR_instruction43, - CryptonightR_instruction44, - CryptonightR_instruction45, - CryptonightR_instruction46, - CryptonightR_instruction47, - CryptonightR_instruction48, - CryptonightR_instruction49, - CryptonightR_instruction50, - CryptonightR_instruction51, - CryptonightR_instruction52, - CryptonightR_instruction53, - CryptonightR_instruction54, - CryptonightR_instruction55, - CryptonightR_instruction56, - CryptonightR_instruction57, - CryptonightR_instruction58, - CryptonightR_instruction59, - CryptonightR_instruction60, - CryptonightR_instruction61, - CryptonightR_instruction62, - CryptonightR_instruction63, - CryptonightR_instruction64, - CryptonightR_instruction65, - CryptonightR_instruction66, - CryptonightR_instruction67, - CryptonightR_instruction68, - CryptonightR_instruction69, - CryptonightR_instruction70, - CryptonightR_instruction71, - CryptonightR_instruction72, - CryptonightR_instruction73, - CryptonightR_instruction74, - CryptonightR_instruction75, - CryptonightR_instruction76, - CryptonightR_instruction77, - CryptonightR_instruction78, - CryptonightR_instruction79, - CryptonightR_instruction80, - CryptonightR_instruction81, - CryptonightR_instruction82, - CryptonightR_instruction83, - CryptonightR_instruction84, - CryptonightR_instruction85, - CryptonightR_instruction86, - CryptonightR_instruction87, - CryptonightR_instruction88, - CryptonightR_instruction89, - CryptonightR_instruction90, - CryptonightR_instruction91, - CryptonightR_instruction92, - CryptonightR_instruction93, - CryptonightR_instruction94, - CryptonightR_instruction95, - CryptonightR_instruction96, - CryptonightR_instruction97, - CryptonightR_instruction98, - CryptonightR_instruction99, - CryptonightR_instruction100, - CryptonightR_instruction101, - CryptonightR_instruction102, - CryptonightR_instruction103, - CryptonightR_instruction104, - CryptonightR_instruction105, - CryptonightR_instruction106, - CryptonightR_instruction107, - CryptonightR_instruction108, - CryptonightR_instruction109, - CryptonightR_instruction110, - CryptonightR_instruction111, - CryptonightR_instruction112, - CryptonightR_instruction113, - CryptonightR_instruction114, - CryptonightR_instruction115, - CryptonightR_instruction116, - CryptonightR_instruction117, - CryptonightR_instruction118, - CryptonightR_instruction119, - CryptonightR_instruction120, - CryptonightR_instruction121, - CryptonightR_instruction122, - CryptonightR_instruction123, - CryptonightR_instruction124, - CryptonightR_instruction125, - CryptonightR_instruction126, - CryptonightR_instruction127, - CryptonightR_instruction128, - CryptonightR_instruction129, - CryptonightR_instruction130, - CryptonightR_instruction131, - CryptonightR_instruction132, - CryptonightR_instruction133, - CryptonightR_instruction134, - CryptonightR_instruction135, - CryptonightR_instruction136, - CryptonightR_instruction137, - CryptonightR_instruction138, - CryptonightR_instruction139, - CryptonightR_instruction140, - CryptonightR_instruction141, - CryptonightR_instruction142, - CryptonightR_instruction143, - CryptonightR_instruction144, - CryptonightR_instruction145, - CryptonightR_instruction146, - CryptonightR_instruction147, - CryptonightR_instruction148, - CryptonightR_instruction149, - CryptonightR_instruction150, - CryptonightR_instruction151, - CryptonightR_instruction152, - CryptonightR_instruction153, - CryptonightR_instruction154, - CryptonightR_instruction155, - CryptonightR_instruction156, - CryptonightR_instruction157, - CryptonightR_instruction158, - CryptonightR_instruction159, - CryptonightR_instruction160, - CryptonightR_instruction161, - CryptonightR_instruction162, - CryptonightR_instruction163, - CryptonightR_instruction164, - CryptonightR_instruction165, - CryptonightR_instruction166, - CryptonightR_instruction167, - CryptonightR_instruction168, - CryptonightR_instruction169, - CryptonightR_instruction170, - CryptonightR_instruction171, - CryptonightR_instruction172, - CryptonightR_instruction173, - CryptonightR_instruction174, - CryptonightR_instruction175, - CryptonightR_instruction176, - CryptonightR_instruction177, - CryptonightR_instruction178, - CryptonightR_instruction179, - CryptonightR_instruction180, - CryptonightR_instruction181, - CryptonightR_instruction182, - CryptonightR_instruction183, - CryptonightR_instruction184, - CryptonightR_instruction185, - CryptonightR_instruction186, - CryptonightR_instruction187, - CryptonightR_instruction188, - CryptonightR_instruction189, - CryptonightR_instruction190, - CryptonightR_instruction191, - CryptonightR_instruction192, - CryptonightR_instruction193, - CryptonightR_instruction194, - CryptonightR_instruction195, - CryptonightR_instruction196, - CryptonightR_instruction197, - CryptonightR_instruction198, - CryptonightR_instruction199, - CryptonightR_instruction200, - CryptonightR_instruction201, - CryptonightR_instruction202, - CryptonightR_instruction203, - CryptonightR_instruction204, - CryptonightR_instruction205, - CryptonightR_instruction206, - CryptonightR_instruction207, - CryptonightR_instruction208, - CryptonightR_instruction209, - CryptonightR_instruction210, - CryptonightR_instruction211, - CryptonightR_instruction212, - CryptonightR_instruction213, - CryptonightR_instruction214, - CryptonightR_instruction215, - CryptonightR_instruction216, - CryptonightR_instruction217, - CryptonightR_instruction218, - CryptonightR_instruction219, - CryptonightR_instruction220, - CryptonightR_instruction221, - CryptonightR_instruction222, - CryptonightR_instruction223, - CryptonightR_instruction224, - CryptonightR_instruction225, - CryptonightR_instruction226, - CryptonightR_instruction227, - CryptonightR_instruction228, - CryptonightR_instruction229, - CryptonightR_instruction230, - CryptonightR_instruction231, - CryptonightR_instruction232, - CryptonightR_instruction233, - CryptonightR_instruction234, - CryptonightR_instruction235, - CryptonightR_instruction236, - CryptonightR_instruction237, - CryptonightR_instruction238, - CryptonightR_instruction239, - CryptonightR_instruction240, - CryptonightR_instruction241, - CryptonightR_instruction242, - CryptonightR_instruction243, - CryptonightR_instruction244, - CryptonightR_instruction245, - CryptonightR_instruction246, - CryptonightR_instruction247, - CryptonightR_instruction248, - CryptonightR_instruction249, - CryptonightR_instruction250, - CryptonightR_instruction251, - CryptonightR_instruction252, - CryptonightR_instruction253, - CryptonightR_instruction254, - CryptonightR_instruction255, - CryptonightR_instruction256, -}; - -const void_func instructions_mov[257] = { - CryptonightR_instruction_mov0, - CryptonightR_instruction_mov1, - CryptonightR_instruction_mov2, - CryptonightR_instruction_mov3, - CryptonightR_instruction_mov4, - CryptonightR_instruction_mov5, - CryptonightR_instruction_mov6, - CryptonightR_instruction_mov7, - CryptonightR_instruction_mov8, - CryptonightR_instruction_mov9, - CryptonightR_instruction_mov10, - CryptonightR_instruction_mov11, - CryptonightR_instruction_mov12, - CryptonightR_instruction_mov13, - CryptonightR_instruction_mov14, - CryptonightR_instruction_mov15, - CryptonightR_instruction_mov16, - CryptonightR_instruction_mov17, - CryptonightR_instruction_mov18, - CryptonightR_instruction_mov19, - CryptonightR_instruction_mov20, - CryptonightR_instruction_mov21, - CryptonightR_instruction_mov22, - CryptonightR_instruction_mov23, - CryptonightR_instruction_mov24, - CryptonightR_instruction_mov25, - CryptonightR_instruction_mov26, - CryptonightR_instruction_mov27, - CryptonightR_instruction_mov28, - CryptonightR_instruction_mov29, - CryptonightR_instruction_mov30, - CryptonightR_instruction_mov31, - CryptonightR_instruction_mov32, - CryptonightR_instruction_mov33, - CryptonightR_instruction_mov34, - CryptonightR_instruction_mov35, - CryptonightR_instruction_mov36, - CryptonightR_instruction_mov37, - CryptonightR_instruction_mov38, - CryptonightR_instruction_mov39, - CryptonightR_instruction_mov40, - CryptonightR_instruction_mov41, - CryptonightR_instruction_mov42, - CryptonightR_instruction_mov43, - CryptonightR_instruction_mov44, - CryptonightR_instruction_mov45, - CryptonightR_instruction_mov46, - CryptonightR_instruction_mov47, - CryptonightR_instruction_mov48, - CryptonightR_instruction_mov49, - CryptonightR_instruction_mov50, - CryptonightR_instruction_mov51, - CryptonightR_instruction_mov52, - CryptonightR_instruction_mov53, - CryptonightR_instruction_mov54, - CryptonightR_instruction_mov55, - CryptonightR_instruction_mov56, - CryptonightR_instruction_mov57, - CryptonightR_instruction_mov58, - CryptonightR_instruction_mov59, - CryptonightR_instruction_mov60, - CryptonightR_instruction_mov61, - CryptonightR_instruction_mov62, - CryptonightR_instruction_mov63, - CryptonightR_instruction_mov64, - CryptonightR_instruction_mov65, - CryptonightR_instruction_mov66, - CryptonightR_instruction_mov67, - CryptonightR_instruction_mov68, - CryptonightR_instruction_mov69, - CryptonightR_instruction_mov70, - CryptonightR_instruction_mov71, - CryptonightR_instruction_mov72, - CryptonightR_instruction_mov73, - CryptonightR_instruction_mov74, - CryptonightR_instruction_mov75, - CryptonightR_instruction_mov76, - CryptonightR_instruction_mov77, - CryptonightR_instruction_mov78, - CryptonightR_instruction_mov79, - CryptonightR_instruction_mov80, - CryptonightR_instruction_mov81, - CryptonightR_instruction_mov82, - CryptonightR_instruction_mov83, - CryptonightR_instruction_mov84, - CryptonightR_instruction_mov85, - CryptonightR_instruction_mov86, - CryptonightR_instruction_mov87, - CryptonightR_instruction_mov88, - CryptonightR_instruction_mov89, - CryptonightR_instruction_mov90, - CryptonightR_instruction_mov91, - CryptonightR_instruction_mov92, - CryptonightR_instruction_mov93, - CryptonightR_instruction_mov94, - CryptonightR_instruction_mov95, - CryptonightR_instruction_mov96, - CryptonightR_instruction_mov97, - CryptonightR_instruction_mov98, - CryptonightR_instruction_mov99, - CryptonightR_instruction_mov100, - CryptonightR_instruction_mov101, - CryptonightR_instruction_mov102, - CryptonightR_instruction_mov103, - CryptonightR_instruction_mov104, - CryptonightR_instruction_mov105, - CryptonightR_instruction_mov106, - CryptonightR_instruction_mov107, - CryptonightR_instruction_mov108, - CryptonightR_instruction_mov109, - CryptonightR_instruction_mov110, - CryptonightR_instruction_mov111, - CryptonightR_instruction_mov112, - CryptonightR_instruction_mov113, - CryptonightR_instruction_mov114, - CryptonightR_instruction_mov115, - CryptonightR_instruction_mov116, - CryptonightR_instruction_mov117, - CryptonightR_instruction_mov118, - CryptonightR_instruction_mov119, - CryptonightR_instruction_mov120, - CryptonightR_instruction_mov121, - CryptonightR_instruction_mov122, - CryptonightR_instruction_mov123, - CryptonightR_instruction_mov124, - CryptonightR_instruction_mov125, - CryptonightR_instruction_mov126, - CryptonightR_instruction_mov127, - CryptonightR_instruction_mov128, - CryptonightR_instruction_mov129, - CryptonightR_instruction_mov130, - CryptonightR_instruction_mov131, - CryptonightR_instruction_mov132, - CryptonightR_instruction_mov133, - CryptonightR_instruction_mov134, - CryptonightR_instruction_mov135, - CryptonightR_instruction_mov136, - CryptonightR_instruction_mov137, - CryptonightR_instruction_mov138, - CryptonightR_instruction_mov139, - CryptonightR_instruction_mov140, - CryptonightR_instruction_mov141, - CryptonightR_instruction_mov142, - CryptonightR_instruction_mov143, - CryptonightR_instruction_mov144, - CryptonightR_instruction_mov145, - CryptonightR_instruction_mov146, - CryptonightR_instruction_mov147, - CryptonightR_instruction_mov148, - CryptonightR_instruction_mov149, - CryptonightR_instruction_mov150, - CryptonightR_instruction_mov151, - CryptonightR_instruction_mov152, - CryptonightR_instruction_mov153, - CryptonightR_instruction_mov154, - CryptonightR_instruction_mov155, - CryptonightR_instruction_mov156, - CryptonightR_instruction_mov157, - CryptonightR_instruction_mov158, - CryptonightR_instruction_mov159, - CryptonightR_instruction_mov160, - CryptonightR_instruction_mov161, - CryptonightR_instruction_mov162, - CryptonightR_instruction_mov163, - CryptonightR_instruction_mov164, - CryptonightR_instruction_mov165, - CryptonightR_instruction_mov166, - CryptonightR_instruction_mov167, - CryptonightR_instruction_mov168, - CryptonightR_instruction_mov169, - CryptonightR_instruction_mov170, - CryptonightR_instruction_mov171, - CryptonightR_instruction_mov172, - CryptonightR_instruction_mov173, - CryptonightR_instruction_mov174, - CryptonightR_instruction_mov175, - CryptonightR_instruction_mov176, - CryptonightR_instruction_mov177, - CryptonightR_instruction_mov178, - CryptonightR_instruction_mov179, - CryptonightR_instruction_mov180, - CryptonightR_instruction_mov181, - CryptonightR_instruction_mov182, - CryptonightR_instruction_mov183, - CryptonightR_instruction_mov184, - CryptonightR_instruction_mov185, - CryptonightR_instruction_mov186, - CryptonightR_instruction_mov187, - CryptonightR_instruction_mov188, - CryptonightR_instruction_mov189, - CryptonightR_instruction_mov190, - CryptonightR_instruction_mov191, - CryptonightR_instruction_mov192, - CryptonightR_instruction_mov193, - CryptonightR_instruction_mov194, - CryptonightR_instruction_mov195, - CryptonightR_instruction_mov196, - CryptonightR_instruction_mov197, - CryptonightR_instruction_mov198, - CryptonightR_instruction_mov199, - CryptonightR_instruction_mov200, - CryptonightR_instruction_mov201, - CryptonightR_instruction_mov202, - CryptonightR_instruction_mov203, - CryptonightR_instruction_mov204, - CryptonightR_instruction_mov205, - CryptonightR_instruction_mov206, - CryptonightR_instruction_mov207, - CryptonightR_instruction_mov208, - CryptonightR_instruction_mov209, - CryptonightR_instruction_mov210, - CryptonightR_instruction_mov211, - CryptonightR_instruction_mov212, - CryptonightR_instruction_mov213, - CryptonightR_instruction_mov214, - CryptonightR_instruction_mov215, - CryptonightR_instruction_mov216, - CryptonightR_instruction_mov217, - CryptonightR_instruction_mov218, - CryptonightR_instruction_mov219, - CryptonightR_instruction_mov220, - CryptonightR_instruction_mov221, - CryptonightR_instruction_mov222, - CryptonightR_instruction_mov223, - CryptonightR_instruction_mov224, - CryptonightR_instruction_mov225, - CryptonightR_instruction_mov226, - CryptonightR_instruction_mov227, - CryptonightR_instruction_mov228, - CryptonightR_instruction_mov229, - CryptonightR_instruction_mov230, - CryptonightR_instruction_mov231, - CryptonightR_instruction_mov232, - CryptonightR_instruction_mov233, - CryptonightR_instruction_mov234, - CryptonightR_instruction_mov235, - CryptonightR_instruction_mov236, - CryptonightR_instruction_mov237, - CryptonightR_instruction_mov238, - CryptonightR_instruction_mov239, - CryptonightR_instruction_mov240, - CryptonightR_instruction_mov241, - CryptonightR_instruction_mov242, - CryptonightR_instruction_mov243, - CryptonightR_instruction_mov244, - CryptonightR_instruction_mov245, - CryptonightR_instruction_mov246, - CryptonightR_instruction_mov247, - CryptonightR_instruction_mov248, - CryptonightR_instruction_mov249, - CryptonightR_instruction_mov250, - CryptonightR_instruction_mov251, - CryptonightR_instruction_mov252, - CryptonightR_instruction_mov253, - CryptonightR_instruction_mov254, - CryptonightR_instruction_mov255, - CryptonightR_instruction_mov256, -}; diff --git a/src/crypto/asm/CryptonightR_template.inc b/src/crypto/asm/CryptonightR_template.inc deleted file mode 100644 index 61b6b985..00000000 --- a/src/crypto/asm/CryptonightR_template.inc +++ /dev/null @@ -1,536 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightR_template_part1) -PUBLIC FN_PREFIX(CryptonightR_template_mainloop) -PUBLIC FN_PREFIX(CryptonightR_template_part2) -PUBLIC FN_PREFIX(CryptonightR_template_part3) -PUBLIC FN_PREFIX(CryptonightR_template_end) -PUBLIC FN_PREFIX(CryptonightR_template_double_part1) -PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop) -PUBLIC FN_PREFIX(CryptonightR_template_double_part2) -PUBLIC FN_PREFIX(CryptonightR_template_double_part3) -PUBLIC FN_PREFIX(CryptonightR_template_double_part4) -PUBLIC FN_PREFIX(CryptonightR_template_double_end) - -ALIGN(64) -FN_PREFIX(CryptonightR_template_part1): - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movq xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movq xmm0, r12 - movq xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movq xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -FN_PREFIX(CryptonightR_template_mainloop): - movdqa xmm5, XMMWORD PTR [r9+r11] - movq xmm0, r15 - movq xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - - mov r13d, r9d - mov eax, r9d - xor r9d, 48 - xor r13d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r13+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - pxor xmm0, xmm2 - pxor xmm5, xmm1 - pxor xmm5, xmm0 - - movq r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - - paddq xmm3, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r13+r11], xmm3 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - -FN_PREFIX(CryptonightR_template_part2): - lea rcx, [r10+r11] - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor rsp, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov rax, r13 - mul r12 - add r15, rax - add rsp, rdx - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - movaps xmm3, xmm1 - movdqa xmm2, XMMWORD PTR [r9+r11] - movdqa xmm0, XMMWORD PTR [r10+r11] - pxor xmm1, xmm2 - pxor xmm5, xmm0 - pxor xmm5, xmm1 - paddq xmm3, xmm4 - paddq xmm2, xmm6 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqu XMMWORD PTR [r12+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm3 - - movdqa xmm7, xmm6 - mov QWORD PTR [rcx], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [rcx+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz FN_PREFIX(CryptonightR_template_mainloop) - -FN_PREFIX(CryptonightR_template_part3): - movq rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -FN_PREFIX(CryptonightR_template_end): - -ALIGN(64) -FN_PREFIX(CryptonightR_template_double_part1): - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movq xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movq xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movq xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movq xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movq xmm0, rcx - mov r11d, 524288 - movq xmm10, rax - punpcklqdq xmm10, xmm0 - - movq xmm14, QWORD PTR [rsp+128] - movq xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -FN_PREFIX(CryptonightR_template_double_mainloop): - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movq xmm0, r12 - mov ecx, ebx - movq xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movq xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm1 - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - movq rdx, xmm6 - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movq xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm1 - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movq rdi, xmm5 - movq rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movq xmm0, rsp - movq xmm1, rsi - movq xmm2, rdi - movq xmm11, rbp - movq xmm12, r15 - movq xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -FN_PREFIX(CryptonightR_template_double_part2): - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r14, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r12, rax - - movq rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movq rsi, xmm1 - movq rdi, xmm2 - movq rbp, xmm11 - movq r15, xmm12 - movq rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movdqu xmm1, XMMWORD PTR [rcx+rsi] - pxor xmm6, xmm1 - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - pxor xmm6, xmm2 - paddq xmm2, xmm3 - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - pxor xmm6, xmm0 - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movq rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movq xmm0, rsp - movq xmm1, rbx - movq xmm2, rsi - movq xmm11, rdi - movq xmm12, rbp - movq xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movq xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -FN_PREFIX(CryptonightR_template_double_part3): - - movq r15, xmm13 - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r13, rax - - movq rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movq rbx, xmm1 - movq rsi, xmm2 - movq rdi, xmm11 - movq rbp, xmm12 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - mov rdi, rcx - mov r8, rax - movdqu xmm1, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm1 - xor ebp, 48 - paddq xmm1, xmm8 - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm2 - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movq rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz FN_PREFIX(CryptonightR_template_double_mainloop) - -FN_PREFIX(CryptonightR_template_double_part4): - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -FN_PREFIX(CryptonightR_template_double_end): diff --git a/src/crypto/asm/CryptonightR_template_win.inc b/src/crypto/asm/CryptonightR_template_win.inc deleted file mode 100644 index 1bb89eb1..00000000 --- a/src/crypto/asm/CryptonightR_template_win.inc +++ /dev/null @@ -1,536 +0,0 @@ -PUBLIC CryptonightR_template_part1 -PUBLIC CryptonightR_template_mainloop -PUBLIC CryptonightR_template_part2 -PUBLIC CryptonightR_template_part3 -PUBLIC CryptonightR_template_end -PUBLIC CryptonightR_template_double_part1 -PUBLIC CryptonightR_template_double_mainloop -PUBLIC CryptonightR_template_double_part2 -PUBLIC CryptonightR_template_double_part3 -PUBLIC CryptonightR_template_double_part4 -PUBLIC CryptonightR_template_double_end - -ALIGN(64) -CryptonightR_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movq xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movq xmm0, r12 - movq xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movq xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -CryptonightR_template_mainloop: - movdqa xmm5, XMMWORD PTR [r9+r11] - movq xmm0, r15 - movq xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - - mov r13d, r9d - mov eax, r9d - xor r9d, 48 - xor r13d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r13+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - pxor xmm0, xmm2 - pxor xmm5, xmm1 - pxor xmm5, xmm0 - - movq r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - - paddq xmm3, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r13+r11], xmm3 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - -CryptonightR_template_part2: - lea rcx, [r10+r11] - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor rsp, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov rax, r13 - mul r12 - add r15, rax - add rsp, rdx - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - movaps xmm3, xmm1 - movdqa xmm2, XMMWORD PTR [r9+r11] - movdqa xmm0, XMMWORD PTR [r10+r11] - pxor xmm1, xmm2 - pxor xmm5, xmm0 - pxor xmm5, xmm1 - paddq xmm3, xmm4 - paddq xmm2, xmm6 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqu XMMWORD PTR [r12+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm3 - - movdqa xmm7, xmm6 - mov QWORD PTR [rcx], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [rcx+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz CryptonightR_template_mainloop - -CryptonightR_template_part3: - movq rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -CryptonightR_template_end: - -ALIGN(64) -CryptonightR_template_double_part1: - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movq xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movq xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movq xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movq xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movq xmm0, rcx - mov r11d, 524288 - movq xmm10, rax - punpcklqdq xmm10, xmm0 - - movq xmm14, QWORD PTR [rsp+128] - movq xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -CryptonightR_template_double_mainloop: - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movq xmm0, r12 - mov ecx, ebx - movq xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movq xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm1 - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - movq rdx, xmm6 - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movq xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm1 - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movq rdi, xmm5 - movq rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movq xmm0, rsp - movq xmm1, rsi - movq xmm2, rdi - movq xmm11, rbp - movq xmm12, r15 - movq xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -CryptonightR_template_double_part2: - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r14, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r12, rax - - movq rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movq rsi, xmm1 - movq rdi, xmm2 - movq rbp, xmm11 - movq r15, xmm12 - movq rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movdqu xmm1, XMMWORD PTR [rcx+rsi] - pxor xmm6, xmm1 - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - pxor xmm6, xmm2 - paddq xmm2, xmm3 - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - pxor xmm6, xmm0 - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movq rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movq xmm0, rsp - movq xmm1, rbx - movq xmm2, rsi - movq xmm11, rdi - movq xmm12, rbp - movq xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movq xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -CryptonightR_template_double_part3: - - movq r15, xmm13 - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r13, rax - - movq rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movq rbx, xmm1 - movq rsi, xmm2 - movq rdi, xmm11 - movq rbp, xmm12 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - mov rdi, rcx - mov r8, rax - movdqu xmm1, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm1 - xor ebp, 48 - paddq xmm1, xmm8 - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm2 - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movq rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz CryptonightR_template_double_mainloop - -CryptonightR_template_double_part4: - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -CryptonightR_template_double_end: diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc deleted file mode 100644 index 53b7016a..00000000 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc +++ /dev/null @@ -1,268 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1) -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop) -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2) -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3) -PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end) - -ALIGN(64) -FN_PREFIX(CryptonightWOW_soft_aes_template_part1): - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movq xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movq xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movq xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movq xmm10, QWORD PTR [r10+96] - movq xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movq xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movq xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop): - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movq xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movq xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movq r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movq rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movq rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - -FN_PREFIX(CryptonightWOW_soft_aes_template_part2): - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - paddq xmm1, xmm7 - movq xmm0, rax - movq xmm3, rdx - xor rax, QWORD PTR [r11+rcx+8] - xor rdx, QWORD PTR [rcx+r11] - punpcklqdq xmm3, xmm0 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm2, xmm3 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop) - -FN_PREFIX(CryptonightWOW_soft_aes_template_part3): - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -FN_PREFIX(CryptonightWOW_soft_aes_template_end): diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc deleted file mode 100644 index b3202b78..00000000 --- a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc +++ /dev/null @@ -1,268 +0,0 @@ -PUBLIC CryptonightWOW_soft_aes_template_part1 -PUBLIC CryptonightWOW_soft_aes_template_mainloop -PUBLIC CryptonightWOW_soft_aes_template_part2 -PUBLIC CryptonightWOW_soft_aes_template_part3 -PUBLIC CryptonightWOW_soft_aes_template_end - -ALIGN(64) -CryptonightWOW_soft_aes_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movq xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movq xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movq xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movq xmm10, QWORD PTR [r10+96] - movq xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movq xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movq xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -CryptonightWOW_soft_aes_template_mainloop: - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movq xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movq xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movq r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movq rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movq rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - -CryptonightWOW_soft_aes_template_part2: - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - paddq xmm1, xmm7 - movq xmm0, rax - movq xmm3, rdx - xor rax, QWORD PTR [r11+rcx+8] - xor rdx, QWORD PTR [rcx+r11] - punpcklqdq xmm3, xmm0 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm2, xmm3 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne CryptonightWOW_soft_aes_template_mainloop - -CryptonightWOW_soft_aes_template_part3: - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -CryptonightWOW_soft_aes_template_end: diff --git a/src/crypto/asm/CryptonightWOW_template.inc b/src/crypto/asm/CryptonightWOW_template.inc deleted file mode 100644 index 82d455f6..00000000 --- a/src/crypto/asm/CryptonightWOW_template.inc +++ /dev/null @@ -1,491 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightWOW_template_part1) -PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop) -PUBLIC FN_PREFIX(CryptonightWOW_template_part2) -PUBLIC FN_PREFIX(CryptonightWOW_template_part3) -PUBLIC FN_PREFIX(CryptonightWOW_template_end) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_end) - -ALIGN(64) -FN_PREFIX(CryptonightWOW_template_part1): - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movq xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movq xmm0, r12 - movq xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movq xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -FN_PREFIX(CryptonightWOW_template_mainloop): - movdqa xmm5, XMMWORD PTR [r9+r11] - movq xmm0, r15 - movq xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - movd r10d, xmm5 - and r10d, 2097136 - - mov r12d, r9d - mov eax, r9d - xor r9d, 48 - xor r12d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movdqu xmm2, XMMWORD PTR [r12+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - paddq xmm0, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm0 - movq r12, xmm5 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - -FN_PREFIX(CryptonightWOW_template_part2): - mov rax, r13 - mul r12 - movq xmm0, rax - movq xmm3, rdx - punpcklqdq xmm3, xmm0 - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - xor rdx, QWORD PTR [r12+r11] - xor rax, QWORD PTR [r11+r12+8] - movdqa xmm2, XMMWORD PTR [r9+r11] - pxor xmm3, xmm2 - paddq xmm7, XMMWORD PTR [r10+r11] - paddq xmm1, xmm4 - paddq xmm3, xmm6 - movdqu XMMWORD PTR [r9+r11], xmm7 - movdqu XMMWORD PTR [r12+r11], xmm3 - movdqu XMMWORD PTR [r10+r11], xmm1 - - movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz FN_PREFIX(CryptonightWOW_template_mainloop) - -FN_PREFIX(CryptonightWOW_template_part3): - movq rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -FN_PREFIX(CryptonightWOW_template_end): - -ALIGN(64) -FN_PREFIX(CryptonightWOW_template_double_part1): - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movq xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movq xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movq xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movq xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movq xmm0, rcx - mov r11d, 524288 - movq xmm10, rax - punpcklqdq xmm10, xmm0 - - movq xmm14, QWORD PTR [rsp+128] - movq xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -FN_PREFIX(CryptonightWOW_template_double_mainloop): - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movq xmm0, r12 - mov ecx, ebx - movq xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movq rdx, xmm6 - movq xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movq xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movq rdi, xmm5 - movq rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movq xmm0, rsp - movq xmm1, rsi - movq xmm2, rdi - movq xmm11, rbp - movq xmm12, r15 - movq xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -FN_PREFIX(CryptonightWOW_template_double_part2): - - movq rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movq rsi, xmm1 - movq rdi, xmm2 - movq rbp, xmm11 - movq r15, xmm12 - movq rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movq xmm1, rdx - movq xmm0, r8 - punpcklqdq xmm1, xmm0 - pxor xmm1, XMMWORD PTR [rcx+rsi] - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - xor rdx, QWORD PTR [rsi+rcx] - paddq xmm2, xmm3 - xor r8, QWORD PTR [rsi+rcx+8] - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movq rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movq xmm0, rsp - movq xmm1, rbx - movq xmm2, rsi - movq xmm11, rdi - movq xmm12, rbp - movq xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movq xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -FN_PREFIX(CryptonightWOW_template_double_part3): - - movq rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movq rbx, xmm1 - movq rsi, xmm2 - movq rdi, xmm11 - movq rbp, xmm12 - movq r15, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - movq xmm1, rdx - movq xmm0, rax - punpcklqdq xmm1, xmm0 - mov rdi, rcx - mov r8, rax - pxor xmm1, XMMWORD PTR [rbp+rcx] - xor ebp, 48 - paddq xmm1, xmm8 - xor r8, QWORD PTR [rbp+rcx+8] - xor rdx, QWORD PTR [rbp+rcx] - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movq rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz FN_PREFIX(CryptonightWOW_template_double_mainloop) - -FN_PREFIX(CryptonightWOW_template_double_part4): - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -FN_PREFIX(CryptonightWOW_template_double_end): diff --git a/src/crypto/asm/CryptonightWOW_template_win.inc b/src/crypto/asm/CryptonightWOW_template_win.inc deleted file mode 100644 index 644c01f1..00000000 --- a/src/crypto/asm/CryptonightWOW_template_win.inc +++ /dev/null @@ -1,491 +0,0 @@ -PUBLIC CryptonightWOW_template_part1 -PUBLIC CryptonightWOW_template_mainloop -PUBLIC CryptonightWOW_template_part2 -PUBLIC CryptonightWOW_template_part3 -PUBLIC CryptonightWOW_template_end -PUBLIC CryptonightWOW_template_double_part1 -PUBLIC CryptonightWOW_template_double_mainloop -PUBLIC CryptonightWOW_template_double_part2 -PUBLIC CryptonightWOW_template_double_part3 -PUBLIC CryptonightWOW_template_double_part4 -PUBLIC CryptonightWOW_template_double_end - -ALIGN(64) -CryptonightWOW_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movq xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movq xmm0, r12 - movq xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movq xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -CryptonightWOW_template_mainloop: - movdqa xmm5, XMMWORD PTR [r9+r11] - movq xmm0, r15 - movq xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - movd r10d, xmm5 - and r10d, 2097136 - - mov r12d, r9d - mov eax, r9d - xor r9d, 48 - xor r12d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movdqu xmm2, XMMWORD PTR [r12+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - paddq xmm0, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm0 - movq r12, xmm5 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - -CryptonightWOW_template_part2: - mov rax, r13 - mul r12 - movq xmm0, rax - movq xmm3, rdx - punpcklqdq xmm3, xmm0 - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - xor rdx, QWORD PTR [r12+r11] - xor rax, QWORD PTR [r11+r12+8] - movdqa xmm2, XMMWORD PTR [r9+r11] - pxor xmm3, xmm2 - paddq xmm7, XMMWORD PTR [r10+r11] - paddq xmm1, xmm4 - paddq xmm3, xmm6 - movdqu XMMWORD PTR [r9+r11], xmm7 - movdqu XMMWORD PTR [r12+r11], xmm3 - movdqu XMMWORD PTR [r10+r11], xmm1 - - movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz CryptonightWOW_template_mainloop - -CryptonightWOW_template_part3: - movq rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -CryptonightWOW_template_end: - -ALIGN(64) -CryptonightWOW_template_double_part1: - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movq xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movq xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movq xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movq xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movq xmm0, rcx - mov r11d, 524288 - movq xmm10, rax - punpcklqdq xmm10, xmm0 - - movq xmm14, QWORD PTR [rsp+128] - movq xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -CryptonightWOW_template_double_mainloop: - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movq xmm0, r12 - mov ecx, ebx - movq xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movq rdx, xmm6 - movq xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movq xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movq rdi, xmm5 - movq rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movq xmm0, rsp - movq xmm1, rsi - movq xmm2, rdi - movq xmm11, rbp - movq xmm12, r15 - movq xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -CryptonightWOW_template_double_part2: - - movq rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movq rsi, xmm1 - movq rdi, xmm2 - movq rbp, xmm11 - movq r15, xmm12 - movq rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movq xmm1, rdx - movq xmm0, r8 - punpcklqdq xmm1, xmm0 - pxor xmm1, XMMWORD PTR [rcx+rsi] - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - xor rdx, QWORD PTR [rsi+rcx] - paddq xmm2, xmm3 - xor r8, QWORD PTR [rsi+rcx+8] - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movq rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movq xmm0, rsp - movq xmm1, rbx - movq xmm2, rsi - movq xmm11, rdi - movq xmm12, rbp - movq xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movq xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -CryptonightWOW_template_double_part3: - - movq rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movq rbx, xmm1 - movq rsi, xmm2 - movq rdi, xmm11 - movq rbp, xmm12 - movq r15, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - movq xmm1, rdx - movq xmm0, rax - punpcklqdq xmm1, xmm0 - mov rdi, rcx - mov r8, rax - pxor xmm1, XMMWORD PTR [rbp+rcx] - xor ebp, 48 - paddq xmm1, xmm8 - xor r8, QWORD PTR [rbp+rcx+8] - xor rdx, QWORD PTR [rbp+rcx] - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movq rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz CryptonightWOW_template_double_mainloop - -CryptonightWOW_template_double_part4: - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -CryptonightWOW_template_double_end: diff --git a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc deleted file mode 100644 index 1710cac7..00000000 --- a/src/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc +++ /dev/null @@ -1,413 +0,0 @@ - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov rax, rsp - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 184 - - stmxcsr DWORD PTR [rsp+272] - mov DWORD PTR [rsp+276], 24448 - ldmxcsr DWORD PTR [rsp+276] - - mov r13, QWORD PTR [rcx+224] - mov r9, rdx - mov r10, QWORD PTR [rcx+32] - mov r8, rcx - xor r10, QWORD PTR [rcx] - mov r14d, 524288 - mov r11, QWORD PTR [rcx+40] - xor r11, QWORD PTR [rcx+8] - mov rsi, QWORD PTR [rdx+224] - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov rdi, QWORD PTR [r9+32] - xor rdi, QWORD PTR [r9] - mov rbp, QWORD PTR [r9+40] - xor rbp, QWORD PTR [r9+8] - movq xmm0, rdx - movaps XMMWORD PTR [rax-88], xmm6 - movaps XMMWORD PTR [rax-104], xmm7 - movaps XMMWORD PTR [rax-120], xmm8 - movaps XMMWORD PTR [rsp+112], xmm9 - movaps XMMWORD PTR [rsp+96], xmm10 - movaps XMMWORD PTR [rsp+80], xmm11 - movaps XMMWORD PTR [rsp+64], xmm12 - movaps XMMWORD PTR [rsp+48], xmm13 - movaps XMMWORD PTR [rsp+32], xmm14 - movaps XMMWORD PTR [rsp+16], xmm15 - mov rdx, r10 - movq xmm4, QWORD PTR [r8+96] - and edx, 2097136 - mov rax, QWORD PTR [rcx+48] - xorps xmm13, xmm13 - xor rax, QWORD PTR [rcx+16] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r8+72] - movq xmm5, QWORD PTR [r8+104] - movq xmm7, rax - - mov eax, 1 - shl rax, 52 - movq xmm14, rax - punpcklqdq xmm14, xmm14 - - mov eax, 1023 - shl rax, 52 - movq xmm12, rax - punpcklqdq xmm12, xmm12 - - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [r9+56] - xor rcx, QWORD PTR [r9+24] - movq xmm3, rax - mov rax, QWORD PTR [r9+48] - xor rax, QWORD PTR [r9+16] - punpcklqdq xmm3, xmm0 - movq xmm0, rcx - mov QWORD PTR [rsp], r13 - mov rcx, QWORD PTR [r9+88] - xor rcx, QWORD PTR [r9+72] - movq xmm6, rax - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - punpcklqdq xmm6, xmm0 - movq xmm0, rcx - mov QWORD PTR [rsp+256], r10 - mov rcx, rdi - mov QWORD PTR [rsp+264], r11 - movq xmm8, rax - and ecx, 2097136 - punpcklqdq xmm8, xmm0 - movq xmm0, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movq xmm0, QWORD PTR [r9+104] - lea r8, QWORD PTR [rcx+rsi] - movdqu xmm11, XMMWORD PTR [r8] - punpcklqdq xmm5, xmm0 - lea r9, QWORD PTR [rdx+r13] - movdqu xmm15, XMMWORD PTR [r9] - - ALIGN(64) -main_loop_double_sandybridge: - movdqu xmm9, xmm15 - mov eax, edx - mov ebx, edx - xor eax, 16 - xor ebx, 32 - xor edx, 48 - - movq xmm0, r11 - movq xmm2, r10 - punpcklqdq xmm2, xmm0 - aesenc xmm9, xmm2 - - movdqu xmm0, XMMWORD PTR [rax+r13] - movdqu xmm1, XMMWORD PTR [rbx+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [rbx+r13], xmm0 - movdqu xmm0, XMMWORD PTR [rdx+r13] - movdqu XMMWORD PTR [rdx+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [rax+r13], xmm0 - - movq r11, xmm9 - mov edx, r11d - and edx, 2097136 - movdqa xmm0, xmm9 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [r9], xmm0 - - lea rbx, QWORD PTR [rdx+r13] - mov r10, QWORD PTR [rdx+r13] - - movdqu xmm10, xmm11 - movq xmm0, rbp - movq xmm11, rdi - punpcklqdq xmm11, xmm0 - aesenc xmm10, xmm11 - - mov eax, ecx - mov r12d, ecx - xor eax, 16 - xor r12d, 32 - xor ecx, 48 - - movdqu xmm0, XMMWORD PTR [rax+rsi] - paddq xmm0, xmm6 - movdqu xmm1, XMMWORD PTR [r12+rsi] - movdqu XMMWORD PTR [r12+rsi], xmm0 - paddq xmm1, xmm11 - movdqu xmm0, XMMWORD PTR [rcx+rsi] - movdqu XMMWORD PTR [rcx+rsi], xmm1 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [rax+rsi], xmm0 - - movq rcx, xmm10 - and ecx, 2097136 - - movdqa xmm0, xmm10 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [r8], xmm0 - mov r12, QWORD PTR [rcx+rsi] - - mov r9, QWORD PTR [rbx+8] - - xor edx, 16 - mov r8d, edx - mov r15d, edx - - movq rdx, xmm5 - shl rdx, 32 - movq rax, xmm4 - xor rdx, rax - xor r10, rdx - mov rax, r10 - mul r11 - mov r11d, r8d - xor r11d, 48 - movq xmm0, rdx - xor rdx, [r11+r13] - movq xmm1, rax - xor rax, [r11+r13+8] - punpcklqdq xmm0, xmm1 - - pxor xmm0, XMMWORD PTR [r8+r13] - xor r8d, 32 - movdqu xmm1, XMMWORD PTR [r11+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [r11+r13], xmm0 - movdqu xmm0, XMMWORD PTR [r8+r13] - movdqu XMMWORD PTR [r8+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [r15+r13], xmm0 - - mov r11, QWORD PTR [rsp+256] - add r11, rdx - mov rdx, QWORD PTR [rsp+264] - add rdx, rax - mov QWORD PTR [rbx], r11 - xor r11, r10 - mov QWORD PTR [rbx+8], rdx - xor rdx, r9 - mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 - mov QWORD PTR [rsp+264], rdx - mov QWORD PTR [rsp+8], r11 - lea r15, QWORD PTR [r11+r13] - movdqu xmm15, XMMWORD PTR [r11+r13] - lea r13, QWORD PTR [rsi+rcx] - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movaps xmm2, xmm13 - movq r10, xmm0 - psllq xmm5, 1 - shl r10, 32 - movdqa xmm0, xmm9 - psrldq xmm0, 8 - movdqa xmm1, xmm10 - movq r11, xmm0 - psrldq xmm1, 8 - movq r8, xmm1 - psrldq xmm4, 8 - movaps xmm0, xmm13 - movq rax, xmm4 - xor r10, rax - movaps xmm1, xmm13 - xor r10, r12 - lea rax, QWORD PTR [r11+1] - shr rax, 1 - movdqa xmm3, xmm9 - punpcklqdq xmm3, xmm10 - paddq xmm5, xmm3 - movq rdx, xmm5 - psrldq xmm5, 8 - cvtsi2sd xmm2, rax - or edx, -2147483647 - lea rax, QWORD PTR [r8+1] - shr rax, 1 - movq r9, xmm5 - cvtsi2sd xmm0, rax - or r9d, -2147483647 - cvtsi2sd xmm1, rdx - unpcklpd xmm2, xmm0 - movaps xmm0, xmm13 - cvtsi2sd xmm0, r9 - unpcklpd xmm1, xmm0 - divpd xmm2, xmm1 - paddq xmm2, xmm14 - cvttsd2si rax, xmm2 - psrldq xmm2, 8 - mov rbx, rax - imul rax, rdx - sub r11, rax - js div_fix_1_sandybridge -div_fix_1_ret_sandybridge: - - cvttsd2si rdx, xmm2 - mov rax, rdx - imul rax, r9 - movd xmm2, r11d - movd xmm4, ebx - sub r8, rax - js div_fix_2_sandybridge -div_fix_2_ret_sandybridge: - - movd xmm1, r8d - movd xmm0, edx - punpckldq xmm2, xmm1 - punpckldq xmm4, xmm0 - punpckldq xmm4, xmm2 - paddq xmm3, xmm4 - movdqa xmm0, xmm3 - psrlq xmm0, 12 - paddq xmm0, xmm12 - sqrtpd xmm1, xmm0 - movq r9, xmm1 - movdqa xmm5, xmm1 - psrlq xmm5, 19 - test r9, 524287 - je sqrt_fix_1_sandybridge -sqrt_fix_1_ret_sandybridge: - - movq r9, xmm10 - psrldq xmm1, 8 - movq r8, xmm1 - test r8, 524287 - je sqrt_fix_2_sandybridge -sqrt_fix_2_ret_sandybridge: - - mov r12d, ecx - mov r8d, ecx - xor r12d, 16 - xor r8d, 32 - xor ecx, 48 - mov rax, r10 - mul r9 - movq xmm0, rax - movq xmm3, rdx - punpcklqdq xmm3, xmm0 - - movdqu xmm0, XMMWORD PTR [r12+rsi] - pxor xmm0, xmm3 - movdqu xmm1, XMMWORD PTR [r8+rsi] - xor rdx, [r8+rsi] - xor rax, [r8+rsi+8] - movdqu xmm3, XMMWORD PTR [rcx+rsi] - paddq xmm0, xmm6 - paddq xmm1, xmm11 - paddq xmm3, xmm8 - movdqu XMMWORD PTR [r8+rsi], xmm0 - movdqu XMMWORD PTR [rcx+rsi], xmm1 - movdqu XMMWORD PTR [r12+rsi], xmm3 - - add rdi, rdx - mov QWORD PTR [r13], rdi - xor rdi, r10 - mov ecx, edi - and ecx, 2097136 - lea r8, QWORD PTR [rcx+rsi] - - mov rdx, QWORD PTR [r13+8] - add rbp, rax - mov QWORD PTR [r13+8], rbp - movdqu xmm11, XMMWORD PTR [rcx+rsi] - xor rbp, rdx - mov r13, QWORD PTR [rsp] - movdqa xmm3, xmm7 - mov rdx, QWORD PTR [rsp+8] - movdqa xmm8, xmm6 - mov r10, QWORD PTR [rsp+256] - movdqa xmm7, xmm9 - mov r11, QWORD PTR [rsp+264] - movdqa xmm6, xmm10 - mov r9, r15 - dec r14d - jne main_loop_double_sandybridge - - ldmxcsr DWORD PTR [rsp+272] - movaps xmm13, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+184] - movaps xmm6, XMMWORD PTR [r11-24] - movaps xmm7, XMMWORD PTR [r11-40] - movaps xmm8, XMMWORD PTR [r11-56] - movaps xmm9, XMMWORD PTR [r11-72] - movaps xmm10, XMMWORD PTR [r11-88] - movaps xmm11, XMMWORD PTR [r11-104] - movaps xmm12, XMMWORD PTR [r11-120] - movaps xmm14, XMMWORD PTR [rsp+32] - movaps xmm15, XMMWORD PTR [rsp+16] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - jmp cnv2_double_mainloop_asm_sandybridge_endp - -div_fix_1_sandybridge: - dec rbx - add r11, rdx - jmp div_fix_1_ret_sandybridge - -div_fix_2_sandybridge: - dec rdx - add r8, r9 - jmp div_fix_2_ret_sandybridge - -sqrt_fix_1_sandybridge: - movq r8, xmm3 - movdqa xmm0, xmm5 - psrldq xmm0, 8 - dec r9 - mov r11d, -1022 - shl r11, 32 - mov rax, r9 - shr r9, 19 - shr rax, 20 - mov rdx, r9 - sub rdx, rax - lea rdx, [rdx+r11+1] - add rax, r11 - imul rdx, rax - sub rdx, r8 - adc r9, 0 - movq xmm5, r9 - punpcklqdq xmm5, xmm0 - jmp sqrt_fix_1_ret_sandybridge - -sqrt_fix_2_sandybridge: - psrldq xmm3, 8 - movq r11, xmm3 - dec r8 - mov ebx, -1022 - shl rbx, 32 - mov rax, r8 - shr r8, 19 - shr rax, 20 - mov rdx, r8 - sub rdx, rax - lea rdx, [rdx+rbx+1] - add rax, rbx - imul rdx, rax - sub rdx, r11 - adc r8, 0 - movq xmm0, r8 - punpcklqdq xmm5, xmm0 - jmp sqrt_fix_2_ret_sandybridge - -cnv2_double_mainloop_asm_sandybridge_endp: diff --git a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc deleted file mode 100644 index b881b669..00000000 --- a/src/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc +++ /dev/null @@ -1,182 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 64 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov ebp, 524288 - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movq xmm3, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movq xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - mov rdi, QWORD PTR [r9+104] - and r10d, 2097136 - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm4, rax - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - xorps xmm8, xmm8 - mov ax, 1023 - shl rax, 52 - movq xmm7, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm3, xmm0 - movq xmm0, rcx - punpcklqdq xmm4, xmm0 - - ALIGN(64) -cnv2_main_loop_bulldozer: - movdqa xmm5, XMMWORD PTR [r10+rbx] - movq xmm6, r8 - pinsrq xmm6, r11, 1 - lea rdx, QWORD PTR [r10+rbx] - lea r9, QWORD PTR [rdi+rdi] - shl rdi, 32 - - mov ecx, r10d - mov eax, r10d - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - aesenc xmm5, xmm6 - movdqa xmm2, XMMWORD PTR [rcx+rbx] - movdqa xmm1, XMMWORD PTR [rax+rbx] - movdqa xmm0, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - paddq xmm0, xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm0 - movdqa XMMWORD PTR [rax+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movaps xmm1, xmm8 - mov rsi, r15 - xor rsi, rdi - - mov edi, 1023 - shl rdi, 52 - - movq r14, xmm5 - pextrq rax, xmm5, 1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm3 - mov r10, r14 - and r10d, 2097136 - movdqa XMMWORD PTR [rdx], xmm0 - xor rsi, QWORD PTR [r10+rbx] - lea r12, QWORD PTR [r10+rbx] - mov r13, QWORD PTR [r10+rbx+8] - - add r9d, r14d - or r9d, -2147483647 - xor edx, edx - div r9 - mov eax, eax - shl rdx, 32 - lea r15, [rax+rdx] - lea rax, [r14+r15] - shr rax, 12 - add rax, rdi - movq xmm0, rax - sqrtsd xmm1, xmm0 - movq rdi, xmm1 - test rdi, 524287 - je sqrt_fixup_bulldozer - shr rdi, 19 - -sqrt_fixup_bulldozer_ret: - mov rax, rsi - mul r14 - movq xmm1, rax - movq xmm0, rdx - punpcklqdq xmm0, xmm1 - - mov r9d, r10d - mov ecx, r10d - xor r9d, 16 - xor ecx, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [rcx+rbx] - xor rdx, [rcx+rbx] - xor rax, [rcx+rbx+8] - movdqa xmm2, XMMWORD PTR [r9+rbx] - pxor xmm2, xmm0 - paddq xmm4, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - movdqa XMMWORD PTR [r9+rbx], xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movdqa xmm4, xmm3 - add r8, rdx - add r11, rax - mov QWORD PTR [r12], r8 - xor r8, rsi - mov QWORD PTR [r12+8], r11 - mov r10, r8 - xor r11, r13 - and r10d, 2097136 - movdqa xmm3, xmm5 - dec ebp - jne cnv2_main_loop_bulldozer - - ldmxcsr DWORD PTR [rsp] - movaps xmm6, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+64] - mov rbx, QWORD PTR [r11+56] - mov rbp, QWORD PTR [r11+64] - mov rsi, QWORD PTR [r11+72] - movaps xmm8, XMMWORD PTR [r11-48] - movaps xmm7, XMMWORD PTR [rsp+32] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - jmp cnv2_main_loop_bulldozer_endp - -sqrt_fixup_bulldozer: - movq r9, xmm5 - add r9, r15 - dec rdi - mov edx, -1022 - shl rdx, 32 - mov rax, rdi - shr rdi, 19 - shr rax, 20 - mov rcx, rdi - sub rcx, rax - lea rcx, [rcx+rdx+1] - add rax, rdx - imul rcx, rax - sub rcx, r9 - adc rdi, 0 - jmp sqrt_fixup_bulldozer_ret - -cnv2_main_loop_bulldozer_endp: diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc deleted file mode 100644 index 863673de..00000000 --- a/src/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc +++ /dev/null @@ -1,188 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 80 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov esi, 524288 - mov r8, QWORD PTR [rcx+32] - mov r13d, -2147483647 - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movq xmm4, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movq xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - movq xmm3, QWORD PTR [r9+104] - movaps XMMWORD PTR [rsp+64], xmm6 - movaps XMMWORD PTR [rsp+48], xmm7 - movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 - movq xmm5, rax - - xor eax, eax - mov QWORD PTR [rsp+16], rax - - mov ax, 1023 - shl rax, 52 - movq xmm8, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movq xmm0, rcx - punpcklqdq xmm5, xmm0 - movdqu xmm6, XMMWORD PTR [r10+rbx] - - ALIGN(64) -main_loop_ivybridge: - lea rdx, QWORD PTR [r10+rbx] - mov ecx, r10d - mov eax, r10d - mov rdi, r15 - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - movq xmm0, r11 - movq xmm7, r8 - punpcklqdq xmm7, xmm0 - aesenc xmm6, xmm7 - movq rbp, xmm6 - mov r9, rbp - and r9d, 2097136 - movdqu xmm2, XMMWORD PTR [rcx+rbx] - movdqu xmm1, XMMWORD PTR [rax+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm1, xmm7 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [rax+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - mov r10, r9 - xor r10d, 32 - movq rcx, xmm3 - mov rax, rcx - shl rax, 32 - xor rdi, rax - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r9+rbx] - lea r14, QWORD PTR [r9+rbx] - mov r12, QWORD PTR [r14+8] - xor edx, edx - lea r9d, DWORD PTR [ecx+ecx] - add r9d, ebp - movdqa xmm0, xmm6 - psrldq xmm0, 8 - or r9d, r13d - movq rax, xmm0 - div r9 - xorps xmm3, xmm3 - mov eax, eax - shl rdx, 32 - add rdx, rax - lea r9, QWORD PTR [rdx+rbp] - mov r15, rdx - mov rax, r9 - shr rax, 12 - movq xmm0, rax - paddq xmm0, xmm8 - sqrtsd xmm3, xmm0 - psubq xmm3, XMMWORD PTR [rsp+16] - movq rdx, xmm3 - test edx, 524287 - je sqrt_fixup_ivybridge - psrlq xmm3, 19 -sqrt_fixup_ivybridge_ret: - - mov ecx, r10d - mov rax, rdi - mul rbp - movq xmm2, rdx - xor rdx, [rcx+rbx] - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov edi, r8d - and edi, 2097136 - movq xmm0, rax - xor rax, [rcx+rbx+8] - add r11, rax - mov QWORD PTR [r14+8], r11 - punpcklqdq xmm2, xmm0 - - mov r9d, r10d - xor r9d, 48 - xor r10d, 16 - pxor xmm2, XMMWORD PTR [r9+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm5 - movdqu xmm1, XMMWORD PTR [rcx+rbx] - paddq xmm2, xmm4 - paddq xmm1, xmm7 - movdqa xmm5, xmm4 - movdqu XMMWORD PTR [r9+rbx], xmm0 - movdqa xmm4, xmm6 - movdqu XMMWORD PTR [rcx+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - movdqu xmm6, [rdi+rbx] - mov r10d, edi - xor r11, r12 - dec rsi - jne main_loop_ivybridge - - ldmxcsr DWORD PTR [rsp] - mov rbx, QWORD PTR [rsp+160] - movaps xmm6, XMMWORD PTR [rsp+64] - movaps xmm7, XMMWORD PTR [rsp+48] - movaps xmm8, XMMWORD PTR [rsp+32] - add rsp, 80 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - jmp cnv2_main_loop_ivybridge_endp - -sqrt_fixup_ivybridge: - dec rdx - mov r13d, -1022 - shl r13, 32 - mov rax, rdx - shr rdx, 19 - shr rax, 20 - mov rcx, rdx - sub rcx, rax - add rax, r13 - not r13 - sub rcx, r13 - mov r13d, -2147483647 - imul rcx, rax - sub rcx, r9 - adc rdx, 0 - movq xmm3, rdx - jmp sqrt_fixup_ivybridge_ret - -cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc deleted file mode 100644 index 8ccc5e17..00000000 --- a/src/crypto/asm/cn2/cnv2_main_loop_ryzen.inc +++ /dev/null @@ -1,181 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 64 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov ebp, 524288 - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movq xmm3, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movq xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - mov rdi, QWORD PTR [r9+104] - and r10d, 2097136 - movaps XMMWORD PTR [rsp+48], xmm6 - movq xmm4, rax - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - xorps xmm8, xmm8 - mov ax, 1023 - shl rax, 52 - movq xmm7, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm3, xmm0 - movq xmm0, rcx - punpcklqdq xmm4, xmm0 - - ALIGN(64) -main_loop_ryzen: - movdqa xmm5, XMMWORD PTR [r10+rbx] - movq xmm0, r11 - movq xmm6, r8 - punpcklqdq xmm6, xmm0 - lea rdx, QWORD PTR [r10+rbx] - lea r9, QWORD PTR [rdi+rdi] - shl rdi, 32 - - mov ecx, r10d - mov eax, r10d - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - aesenc xmm5, xmm6 - movdqa xmm2, XMMWORD PTR [rcx+rbx] - movdqa xmm1, XMMWORD PTR [rax+rbx] - movdqa xmm0, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - paddq xmm0, xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm0 - movdqa XMMWORD PTR [rax+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movaps xmm1, xmm8 - mov rsi, r15 - xor rsi, rdi - movq r14, xmm5 - movdqa xmm0, xmm5 - pxor xmm0, xmm3 - mov r10, r14 - and r10d, 2097136 - movdqa XMMWORD PTR [rdx], xmm0 - xor rsi, QWORD PTR [r10+rbx] - lea r12, QWORD PTR [r10+rbx] - mov r13, QWORD PTR [r10+rbx+8] - - add r9d, r14d - or r9d, -2147483647 - xor edx, edx - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movq rax, xmm0 - - div r9 - movq xmm0, rax - movq xmm1, rdx - punpckldq xmm0, xmm1 - movq r15, xmm0 - paddq xmm0, xmm5 - movdqa xmm2, xmm0 - psrlq xmm0, 12 - paddq xmm0, xmm7 - sqrtsd xmm1, xmm0 - movq rdi, xmm1 - test rdi, 524287 - je sqrt_fixup_ryzen - shr rdi, 19 - -sqrt_fixup_ryzen_ret: - mov rax, rsi - mul r14 - movq xmm1, rax - movq xmm0, rdx - punpcklqdq xmm0, xmm1 - - mov r9d, r10d - mov ecx, r10d - xor r9d, 16 - xor ecx, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [rcx+rbx] - xor rdx, [rcx+rbx] - xor rax, [rcx+rbx+8] - movdqa xmm2, XMMWORD PTR [r9+rbx] - pxor xmm2, xmm0 - paddq xmm4, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - movdqa XMMWORD PTR [r9+rbx], xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movdqa xmm4, xmm3 - add r8, rdx - add r11, rax - mov QWORD PTR [r12], r8 - xor r8, rsi - mov QWORD PTR [r12+8], r11 - mov r10, r8 - xor r11, r13 - and r10d, 2097136 - movdqa xmm3, xmm5 - dec ebp - jne main_loop_ryzen - - ldmxcsr DWORD PTR [rsp] - movaps xmm6, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+64] - mov rbx, QWORD PTR [r11+56] - mov rbp, QWORD PTR [r11+64] - mov rsi, QWORD PTR [r11+72] - movaps xmm8, XMMWORD PTR [r11-48] - movaps xmm7, XMMWORD PTR [rsp+32] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - jmp cnv2_main_loop_ryzen_endp - -sqrt_fixup_ryzen: - movq r9, xmm2 - dec rdi - mov edx, -1022 - shl rdx, 32 - mov rax, rdi - shr rdi, 19 - shr rax, 20 - mov rcx, rdi - sub rcx, rax - lea rcx, [rcx+rdx+1] - add rax, rdx - imul rcx, rax - sub rcx, r9 - adc rdi, 0 - jmp sqrt_fixup_ryzen_ret - -cnv2_main_loop_ryzen_endp: diff --git a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc deleted file mode 100644 index d9bfc9c1..00000000 --- a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc +++ /dev/null @@ -1,413 +0,0 @@ - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov rax, rsp - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 184 - - stmxcsr DWORD PTR [rsp+272] - mov DWORD PTR [rsp+276], 24448 - ldmxcsr DWORD PTR [rsp+276] - - mov r13, QWORD PTR [rcx+224] - mov r9, rdx - mov r10, QWORD PTR [rcx+32] - mov r8, rcx - xor r10, QWORD PTR [rcx] - mov r14d, 393216 - mov r11, QWORD PTR [rcx+40] - xor r11, QWORD PTR [rcx+8] - mov rsi, QWORD PTR [rdx+224] - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov rdi, QWORD PTR [r9+32] - xor rdi, QWORD PTR [r9] - mov rbp, QWORD PTR [r9+40] - xor rbp, QWORD PTR [r9+8] - movq xmm0, rdx - movaps XMMWORD PTR [rax-88], xmm6 - movaps XMMWORD PTR [rax-104], xmm7 - movaps XMMWORD PTR [rax-120], xmm8 - movaps XMMWORD PTR [rsp+112], xmm9 - movaps XMMWORD PTR [rsp+96], xmm10 - movaps XMMWORD PTR [rsp+80], xmm11 - movaps XMMWORD PTR [rsp+64], xmm12 - movaps XMMWORD PTR [rsp+48], xmm13 - movaps XMMWORD PTR [rsp+32], xmm14 - movaps XMMWORD PTR [rsp+16], xmm15 - mov rdx, r10 - movq xmm4, QWORD PTR [r8+96] - and edx, 2097136 - mov rax, QWORD PTR [rcx+48] - xorps xmm13, xmm13 - xor rax, QWORD PTR [rcx+16] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r8+72] - movq xmm5, QWORD PTR [r8+104] - movq xmm7, rax - - mov eax, 1 - shl rax, 52 - movq xmm14, rax - punpcklqdq xmm14, xmm14 - - mov eax, 1023 - shl rax, 52 - movq xmm12, rax - punpcklqdq xmm12, xmm12 - - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - punpcklqdq xmm7, xmm0 - movq xmm0, rcx - mov rcx, QWORD PTR [r9+56] - xor rcx, QWORD PTR [r9+24] - movq xmm3, rax - mov rax, QWORD PTR [r9+48] - xor rax, QWORD PTR [r9+16] - punpcklqdq xmm3, xmm0 - movq xmm0, rcx - mov QWORD PTR [rsp], r13 - mov rcx, QWORD PTR [r9+88] - xor rcx, QWORD PTR [r9+72] - movq xmm6, rax - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - punpcklqdq xmm6, xmm0 - movq xmm0, rcx - mov QWORD PTR [rsp+256], r10 - mov rcx, rdi - mov QWORD PTR [rsp+264], r11 - movq xmm8, rax - and ecx, 2097136 - punpcklqdq xmm8, xmm0 - movq xmm0, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movq xmm0, QWORD PTR [r9+104] - lea r8, QWORD PTR [rcx+rsi] - movdqu xmm11, XMMWORD PTR [r8] - punpcklqdq xmm5, xmm0 - lea r9, QWORD PTR [rdx+r13] - movdqu xmm15, XMMWORD PTR [r9] - - ALIGN(64) -rwz_main_loop_double: - movdqu xmm9, xmm15 - mov eax, edx - mov ebx, edx - xor eax, 16 - xor ebx, 32 - xor edx, 48 - - movq xmm0, r11 - movq xmm2, r10 - punpcklqdq xmm2, xmm0 - aesenc xmm9, xmm2 - - movdqu xmm0, XMMWORD PTR [rdx+r13] - movdqu xmm1, XMMWORD PTR [rbx+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [rbx+r13], xmm0 - movdqu xmm0, XMMWORD PTR [rax+r13] - movdqu XMMWORD PTR [rdx+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [rax+r13], xmm0 - - movq r11, xmm9 - mov edx, r11d - and edx, 2097136 - movdqa xmm0, xmm9 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [r9], xmm0 - - lea rbx, QWORD PTR [rdx+r13] - mov r10, QWORD PTR [rdx+r13] - - movdqu xmm10, xmm11 - movq xmm0, rbp - movq xmm11, rdi - punpcklqdq xmm11, xmm0 - aesenc xmm10, xmm11 - - mov eax, ecx - mov r12d, ecx - xor eax, 16 - xor r12d, 32 - xor ecx, 48 - - movdqu xmm0, XMMWORD PTR [rcx+rsi] - paddq xmm0, xmm6 - movdqu xmm1, XMMWORD PTR [r12+rsi] - movdqu XMMWORD PTR [r12+rsi], xmm0 - paddq xmm1, xmm11 - movdqu xmm0, XMMWORD PTR [rax+rsi] - movdqu XMMWORD PTR [rcx+rsi], xmm1 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [rax+rsi], xmm0 - - movq rcx, xmm10 - and ecx, 2097136 - - movdqa xmm0, xmm10 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [r8], xmm0 - mov r12, QWORD PTR [rcx+rsi] - - mov r9, QWORD PTR [rbx+8] - - xor edx, 16 - mov r8d, edx - mov r15d, edx - - movq rdx, xmm5 - shl rdx, 32 - movq rax, xmm4 - xor rdx, rax - xor r10, rdx - mov rax, r10 - mul r11 - mov r11d, r8d - xor r11d, 48 - movq xmm0, rdx - xor rdx, [r11+r13] - movq xmm1, rax - xor rax, [r11+r13+8] - punpcklqdq xmm0, xmm1 - - pxor xmm0, XMMWORD PTR [r8+r13] - movdqu xmm1, XMMWORD PTR [r11+r13] - paddq xmm0, xmm3 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [r8+r13], xmm0 - xor r8d, 32 - movdqu xmm0, XMMWORD PTR [r8+r13] - movdqu XMMWORD PTR [r8+r13], xmm1 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r11+r13], xmm0 - - mov r11, QWORD PTR [rsp+256] - add r11, rdx - mov rdx, QWORD PTR [rsp+264] - add rdx, rax - mov QWORD PTR [rbx], r11 - xor r11, r10 - mov QWORD PTR [rbx+8], rdx - xor rdx, r9 - mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 - mov QWORD PTR [rsp+264], rdx - mov QWORD PTR [rsp+8], r11 - lea r15, QWORD PTR [r11+r13] - movdqu xmm15, XMMWORD PTR [r11+r13] - lea r13, QWORD PTR [rsi+rcx] - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movaps xmm2, xmm13 - movq r10, xmm0 - psllq xmm5, 1 - shl r10, 32 - movdqa xmm0, xmm9 - psrldq xmm0, 8 - movdqa xmm1, xmm10 - movq r11, xmm0 - psrldq xmm1, 8 - movq r8, xmm1 - psrldq xmm4, 8 - movaps xmm0, xmm13 - movq rax, xmm4 - xor r10, rax - movaps xmm1, xmm13 - xor r10, r12 - lea rax, QWORD PTR [r11+1] - shr rax, 1 - movdqa xmm3, xmm9 - punpcklqdq xmm3, xmm10 - paddq xmm5, xmm3 - movq rdx, xmm5 - psrldq xmm5, 8 - cvtsi2sd xmm2, rax - or edx, -2147483647 - lea rax, QWORD PTR [r8+1] - shr rax, 1 - movq r9, xmm5 - cvtsi2sd xmm0, rax - or r9d, -2147483647 - cvtsi2sd xmm1, rdx - unpcklpd xmm2, xmm0 - movaps xmm0, xmm13 - cvtsi2sd xmm0, r9 - unpcklpd xmm1, xmm0 - divpd xmm2, xmm1 - paddq xmm2, xmm14 - cvttsd2si rax, xmm2 - psrldq xmm2, 8 - mov rbx, rax - imul rax, rdx - sub r11, rax - js rwz_div_fix_1 -rwz_div_fix_1_ret: - - cvttsd2si rdx, xmm2 - mov rax, rdx - imul rax, r9 - movd xmm2, r11d - movd xmm4, ebx - sub r8, rax - js rwz_div_fix_2 -rwz_div_fix_2_ret: - - movd xmm1, r8d - movd xmm0, edx - punpckldq xmm2, xmm1 - punpckldq xmm4, xmm0 - punpckldq xmm4, xmm2 - paddq xmm3, xmm4 - movdqa xmm0, xmm3 - psrlq xmm0, 12 - paddq xmm0, xmm12 - sqrtpd xmm1, xmm0 - movq r9, xmm1 - movdqa xmm5, xmm1 - psrlq xmm5, 19 - test r9, 524287 - je rwz_sqrt_fix_1 -rwz_sqrt_fix_1_ret: - - movq r9, xmm10 - psrldq xmm1, 8 - movq r8, xmm1 - test r8, 524287 - je rwz_sqrt_fix_2 -rwz_sqrt_fix_2_ret: - - mov r12d, ecx - mov r8d, ecx - xor r12d, 16 - xor r8d, 32 - xor ecx, 48 - mov rax, r10 - mul r9 - movq xmm0, rax - movq xmm3, rdx - punpcklqdq xmm3, xmm0 - - movdqu xmm0, XMMWORD PTR [r12+rsi] - pxor xmm0, xmm3 - movdqu xmm1, XMMWORD PTR [r8+rsi] - xor rdx, [r8+rsi] - xor rax, [r8+rsi+8] - movdqu xmm3, XMMWORD PTR [rcx+rsi] - paddq xmm3, xmm6 - paddq xmm1, xmm11 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [r8+rsi], xmm3 - movdqu XMMWORD PTR [rcx+rsi], xmm1 - movdqu XMMWORD PTR [r12+rsi], xmm0 - - add rdi, rdx - mov QWORD PTR [r13], rdi - xor rdi, r10 - mov ecx, edi - and ecx, 2097136 - lea r8, QWORD PTR [rcx+rsi] - - mov rdx, QWORD PTR [r13+8] - add rbp, rax - mov QWORD PTR [r13+8], rbp - movdqu xmm11, XMMWORD PTR [rcx+rsi] - xor rbp, rdx - mov r13, QWORD PTR [rsp] - movdqa xmm3, xmm7 - mov rdx, QWORD PTR [rsp+8] - movdqa xmm8, xmm6 - mov r10, QWORD PTR [rsp+256] - movdqa xmm7, xmm9 - mov r11, QWORD PTR [rsp+264] - movdqa xmm6, xmm10 - mov r9, r15 - dec r14d - jne rwz_main_loop_double - - ldmxcsr DWORD PTR [rsp+272] - movaps xmm13, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+184] - movaps xmm6, XMMWORD PTR [r11-24] - movaps xmm7, XMMWORD PTR [r11-40] - movaps xmm8, XMMWORD PTR [r11-56] - movaps xmm9, XMMWORD PTR [r11-72] - movaps xmm10, XMMWORD PTR [r11-88] - movaps xmm11, XMMWORD PTR [r11-104] - movaps xmm12, XMMWORD PTR [r11-120] - movaps xmm14, XMMWORD PTR [rsp+32] - movaps xmm15, XMMWORD PTR [rsp+16] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - jmp rwz_cnv2_double_mainloop_asm_endp - -rwz_div_fix_1: - dec rbx - add r11, rdx - jmp rwz_div_fix_1_ret - -rwz_div_fix_2: - dec rdx - add r8, r9 - jmp rwz_div_fix_2_ret - -rwz_sqrt_fix_1: - movq r8, xmm3 - movdqa xmm0, xmm5 - psrldq xmm0, 8 - dec r9 - mov r11d, -1022 - shl r11, 32 - mov rax, r9 - shr r9, 19 - shr rax, 20 - mov rdx, r9 - sub rdx, rax - lea rdx, [rdx+r11+1] - add rax, r11 - imul rdx, rax - sub rdx, r8 - adc r9, 0 - movq xmm5, r9 - punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_1_ret - -rwz_sqrt_fix_2: - psrldq xmm3, 8 - movq r11, xmm3 - dec r8 - mov ebx, -1022 - shl rbx, 32 - mov rax, r8 - shr r8, 19 - shr rax, 20 - mov rdx, r8 - sub rdx, rax - lea rdx, [rdx+rbx+1] - add rax, rbx - imul rdx, rax - sub rdx, r11 - adc r8, 0 - movq xmm0, r8 - punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_2_ret - -rwz_cnv2_double_mainloop_asm_endp: diff --git a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc deleted file mode 100644 index b59c02d6..00000000 --- a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc +++ /dev/null @@ -1,188 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 80 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov esi, 393216 - mov r8, QWORD PTR [rcx+32] - mov r13d, -2147483647 - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movq xmm4, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movq xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - movq xmm3, QWORD PTR [r9+104] - movaps XMMWORD PTR [rsp+64], xmm6 - movaps XMMWORD PTR [rsp+48], xmm7 - movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 - movq xmm5, rax - - xor eax, eax - mov QWORD PTR [rsp+16], rax - - mov ax, 1023 - shl rax, 52 - movq xmm8, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movq xmm0, rcx - punpcklqdq xmm5, xmm0 - movdqu xmm6, XMMWORD PTR [r10+rbx] - - ALIGN(64) -rwz_main_loop: - lea rdx, QWORD PTR [r10+rbx] - mov ecx, r10d - mov eax, r10d - mov rdi, r15 - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - movq xmm0, r11 - movq xmm7, r8 - punpcklqdq xmm7, xmm0 - aesenc xmm6, xmm7 - movq rbp, xmm6 - mov r9, rbp - and r9d, 2097136 - movdqu xmm0, XMMWORD PTR [rcx+rbx] - movdqu xmm1, XMMWORD PTR [rax+rbx] - movdqu xmm2, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm5 - paddq xmm1, xmm7 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [rax+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - mov r10, r9 - xor r10d, 32 - movq rcx, xmm3 - mov rax, rcx - shl rax, 32 - xor rdi, rax - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r9+rbx] - lea r14, QWORD PTR [r9+rbx] - mov r12, QWORD PTR [r14+8] - xor edx, edx - lea r9d, DWORD PTR [ecx+ecx] - add r9d, ebp - movdqa xmm0, xmm6 - psrldq xmm0, 8 - or r9d, r13d - movq rax, xmm0 - div r9 - xorps xmm3, xmm3 - mov eax, eax - shl rdx, 32 - add rdx, rax - lea r9, QWORD PTR [rdx+rbp] - mov r15, rdx - mov rax, r9 - shr rax, 12 - movq xmm0, rax - paddq xmm0, xmm8 - sqrtsd xmm3, xmm0 - psubq xmm3, XMMWORD PTR [rsp+16] - movq rdx, xmm3 - test edx, 524287 - je rwz_sqrt_fixup - psrlq xmm3, 19 -rwz_sqrt_fixup_ret: - - mov ecx, r10d - mov rax, rdi - mul rbp - movq xmm2, rdx - xor rdx, [rcx+rbx] - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov edi, r8d - and edi, 2097136 - movq xmm0, rax - xor rax, [rcx+rbx+8] - add r11, rax - mov QWORD PTR [r14+8], r11 - punpcklqdq xmm2, xmm0 - - mov r9d, r10d - xor r9d, 48 - xor r10d, 16 - pxor xmm2, XMMWORD PTR [r9+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm4 - movdqu xmm1, XMMWORD PTR [rcx+rbx] - paddq xmm2, xmm5 - paddq xmm1, xmm7 - movdqa xmm5, xmm4 - movdqu XMMWORD PTR [r9+rbx], xmm2 - movdqa xmm4, xmm6 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [r10+rbx], xmm1 - movdqu xmm6, [rdi+rbx] - mov r10d, edi - xor r11, r12 - dec rsi - jne rwz_main_loop - - ldmxcsr DWORD PTR [rsp] - mov rbx, QWORD PTR [rsp+160] - movaps xmm6, XMMWORD PTR [rsp+64] - movaps xmm7, XMMWORD PTR [rsp+48] - movaps xmm8, XMMWORD PTR [rsp+32] - add rsp, 80 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - jmp cnv2_rwz_main_loop_endp - -rwz_sqrt_fixup: - dec rdx - mov r13d, -1022 - shl r13, 32 - mov rax, rdx - shr rdx, 19 - shr rax, 20 - mov rcx, rdx - sub rcx, rax - add rax, r13 - not r13 - sub rcx, r13 - mov r13d, -2147483647 - imul rcx, rax - sub rcx, r9 - adc rdx, 0 - movq xmm3, rdx - jmp rwz_sqrt_fixup_ret - -cnv2_rwz_main_loop_endp: diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S deleted file mode 100644 index 7aed6c20..00000000 --- a/src/crypto/asm/cn_main_loop.S +++ /dev/null @@ -1,73 +0,0 @@ -#ifdef __APPLE__ -# define ALIGN(x) .align 6 -#else -# define ALIGN(x) .align 64 -#endif -.intel_syntax noprefix -#ifdef __APPLE__ -# define FN_PREFIX(fn) _ ## fn -.text -#else -# define FN_PREFIX(fn) fn -.section .text -#endif -.global FN_PREFIX(cnv2_mainloop_ivybridge_asm) -.global FN_PREFIX(cnv2_mainloop_ryzen_asm) -.global FN_PREFIX(cnv2_mainloop_bulldozer_asm) -.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) -.global FN_PREFIX(cnv2_rwz_mainloop_asm) -.global FN_PREFIX(cnv2_rwz_double_mainloop_asm) - -ALIGN(64) -FN_PREFIX(cnv2_mainloop_ivybridge_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_main_loop_ivybridge.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_mainloop_ryzen_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_main_loop_ryzen.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_mainloop_bulldozer_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_main_loop_bulldozer.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_double_main_loop_sandybridge.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_rwz_mainloop_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_rwz_main_loop.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 - -ALIGN(64) -FN_PREFIX(cnv2_rwz_double_mainloop_asm): - sub rsp, 48 - mov rcx, rdi - #include "cn2/cnv2_rwz_double_main_loop.inc" - add rsp, 48 - ret 0 - mov eax, 3735929054 diff --git a/src/crypto/asm/cn_main_loop.asm b/src/crypto/asm/cn_main_loop.asm deleted file mode 100644 index f0766a7c..00000000 --- a/src/crypto/asm/cn_main_loop.asm +++ /dev/null @@ -1,52 +0,0 @@ -_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE -PUBLIC cnv2_mainloop_ivybridge_asm -PUBLIC cnv2_mainloop_ryzen_asm -PUBLIC cnv2_mainloop_bulldozer_asm -PUBLIC cnv2_double_mainloop_sandybridge_asm -PUBLIC cnv2_rwz_mainloop_asm -PUBLIC cnv2_rwz_double_mainloop_asm - -ALIGN(64) -cnv2_mainloop_ivybridge_asm PROC - INCLUDE cn2/cnv2_main_loop_ivybridge.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_ivybridge_asm ENDP - -ALIGN(64) -cnv2_mainloop_ryzen_asm PROC - INCLUDE cn2/cnv2_main_loop_ryzen.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_ryzen_asm ENDP - -ALIGN(64) -cnv2_mainloop_bulldozer_asm PROC - INCLUDE cn2/cnv2_main_loop_bulldozer.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_bulldozer_asm ENDP - -ALIGN(64) -cnv2_double_mainloop_sandybridge_asm PROC - INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc - ret 0 - mov eax, 3735929054 -cnv2_double_mainloop_sandybridge_asm ENDP - -ALIGN(64) -cnv2_rwz_mainloop_asm PROC - INCLUDE cn2/cnv2_rwz_main_loop.inc - ret 0 - mov eax, 3735929054 -cnv2_rwz_mainloop_asm ENDP - -ALIGN(64) -cnv2_rwz_double_mainloop_asm PROC - INCLUDE cn2/cnv2_rwz_double_main_loop.inc - ret 0 - mov eax, 3735929054 -cnv2_rwz_double_mainloop_asm ENDP - -_TEXT_CNV2_MAINLOOP ENDS -END diff --git a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc deleted file mode 100644 index 6898a604..00000000 --- a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc +++ /dev/null @@ -1,281 +0,0 @@ -PUBLIC CryptonightR_soft_aes_template_part1 -PUBLIC CryptonightR_soft_aes_template_mainloop -PUBLIC CryptonightR_soft_aes_template_part2 -PUBLIC CryptonightR_soft_aes_template_part3 -PUBLIC CryptonightR_soft_aes_template_end - -ALIGN(64) -CryptonightR_soft_aes_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movd xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movd xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movd xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movd xmm10, QWORD PTR [r10+96] - movd xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movd xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movd xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -CryptonightR_soft_aes_template_mainloop: - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movd xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movd xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movd r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - pxor xmm6, xmm1 - pxor xmm6, xmm0 - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movd rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movd rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - movaps xmm0, xmm5 - psrldq xmm0, 8 - movd r9d, xmm0 - -CryptonightR_soft_aes_template_part2: - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov edi, edi - shl rbp, 32 - or rbp, rdi - xor r8, rbp - - mov ebx, ebx - shl rsi, 32 - or rsi, rbx - xor QWORD PTR [rsp+320], rsi - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm2 - pxor xmm6, xmm1 - paddq xmm1, xmm7 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm6, xmm0 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne CryptonightR_soft_aes_template_mainloop - -CryptonightR_soft_aes_template_part3: - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -CryptonightR_soft_aes_template_end: diff --git a/src/crypto/asm/win64/CryptonightR_template.asm b/src/crypto/asm/win64/CryptonightR_template.asm deleted file mode 100644 index 250eca3d..00000000 --- a/src/crypto/asm/win64/CryptonightR_template.asm +++ /dev/null @@ -1,1585 +0,0 @@ -; Auto-generated file, do not edit - -_TEXT_CN_TEMPLATE SEGMENT PAGE READ EXECUTE -PUBLIC CryptonightR_instruction0 -PUBLIC CryptonightR_instruction1 -PUBLIC CryptonightR_instruction2 -PUBLIC CryptonightR_instruction3 -PUBLIC CryptonightR_instruction4 -PUBLIC CryptonightR_instruction5 -PUBLIC CryptonightR_instruction6 -PUBLIC CryptonightR_instruction7 -PUBLIC CryptonightR_instruction8 -PUBLIC CryptonightR_instruction9 -PUBLIC CryptonightR_instruction10 -PUBLIC CryptonightR_instruction11 -PUBLIC CryptonightR_instruction12 -PUBLIC CryptonightR_instruction13 -PUBLIC CryptonightR_instruction14 -PUBLIC CryptonightR_instruction15 -PUBLIC CryptonightR_instruction16 -PUBLIC CryptonightR_instruction17 -PUBLIC CryptonightR_instruction18 -PUBLIC CryptonightR_instruction19 -PUBLIC CryptonightR_instruction20 -PUBLIC CryptonightR_instruction21 -PUBLIC CryptonightR_instruction22 -PUBLIC CryptonightR_instruction23 -PUBLIC CryptonightR_instruction24 -PUBLIC CryptonightR_instruction25 -PUBLIC CryptonightR_instruction26 -PUBLIC CryptonightR_instruction27 -PUBLIC CryptonightR_instruction28 -PUBLIC CryptonightR_instruction29 -PUBLIC CryptonightR_instruction30 -PUBLIC CryptonightR_instruction31 -PUBLIC CryptonightR_instruction32 -PUBLIC CryptonightR_instruction33 -PUBLIC CryptonightR_instruction34 -PUBLIC CryptonightR_instruction35 -PUBLIC CryptonightR_instruction36 -PUBLIC CryptonightR_instruction37 -PUBLIC CryptonightR_instruction38 -PUBLIC CryptonightR_instruction39 -PUBLIC CryptonightR_instruction40 -PUBLIC CryptonightR_instruction41 -PUBLIC CryptonightR_instruction42 -PUBLIC CryptonightR_instruction43 -PUBLIC CryptonightR_instruction44 -PUBLIC CryptonightR_instruction45 -PUBLIC CryptonightR_instruction46 -PUBLIC CryptonightR_instruction47 -PUBLIC CryptonightR_instruction48 -PUBLIC CryptonightR_instruction49 -PUBLIC CryptonightR_instruction50 -PUBLIC CryptonightR_instruction51 -PUBLIC CryptonightR_instruction52 -PUBLIC CryptonightR_instruction53 -PUBLIC CryptonightR_instruction54 -PUBLIC CryptonightR_instruction55 -PUBLIC CryptonightR_instruction56 -PUBLIC CryptonightR_instruction57 -PUBLIC CryptonightR_instruction58 -PUBLIC CryptonightR_instruction59 -PUBLIC CryptonightR_instruction60 -PUBLIC CryptonightR_instruction61 -PUBLIC CryptonightR_instruction62 -PUBLIC CryptonightR_instruction63 -PUBLIC CryptonightR_instruction64 -PUBLIC CryptonightR_instruction65 -PUBLIC CryptonightR_instruction66 -PUBLIC CryptonightR_instruction67 -PUBLIC CryptonightR_instruction68 -PUBLIC CryptonightR_instruction69 -PUBLIC CryptonightR_instruction70 -PUBLIC CryptonightR_instruction71 -PUBLIC CryptonightR_instruction72 -PUBLIC CryptonightR_instruction73 -PUBLIC CryptonightR_instruction74 -PUBLIC CryptonightR_instruction75 -PUBLIC CryptonightR_instruction76 -PUBLIC CryptonightR_instruction77 -PUBLIC CryptonightR_instruction78 -PUBLIC CryptonightR_instruction79 -PUBLIC CryptonightR_instruction80 -PUBLIC CryptonightR_instruction81 -PUBLIC CryptonightR_instruction82 -PUBLIC CryptonightR_instruction83 -PUBLIC CryptonightR_instruction84 -PUBLIC CryptonightR_instruction85 -PUBLIC CryptonightR_instruction86 -PUBLIC CryptonightR_instruction87 -PUBLIC CryptonightR_instruction88 -PUBLIC CryptonightR_instruction89 -PUBLIC CryptonightR_instruction90 -PUBLIC CryptonightR_instruction91 -PUBLIC CryptonightR_instruction92 -PUBLIC CryptonightR_instruction93 -PUBLIC CryptonightR_instruction94 -PUBLIC CryptonightR_instruction95 -PUBLIC CryptonightR_instruction96 -PUBLIC CryptonightR_instruction97 -PUBLIC CryptonightR_instruction98 -PUBLIC CryptonightR_instruction99 -PUBLIC CryptonightR_instruction100 -PUBLIC CryptonightR_instruction101 -PUBLIC CryptonightR_instruction102 -PUBLIC CryptonightR_instruction103 -PUBLIC CryptonightR_instruction104 -PUBLIC CryptonightR_instruction105 -PUBLIC CryptonightR_instruction106 -PUBLIC CryptonightR_instruction107 -PUBLIC CryptonightR_instruction108 -PUBLIC CryptonightR_instruction109 -PUBLIC CryptonightR_instruction110 -PUBLIC CryptonightR_instruction111 -PUBLIC CryptonightR_instruction112 -PUBLIC CryptonightR_instruction113 -PUBLIC CryptonightR_instruction114 -PUBLIC CryptonightR_instruction115 -PUBLIC CryptonightR_instruction116 -PUBLIC CryptonightR_instruction117 -PUBLIC CryptonightR_instruction118 -PUBLIC CryptonightR_instruction119 -PUBLIC CryptonightR_instruction120 -PUBLIC CryptonightR_instruction121 -PUBLIC CryptonightR_instruction122 -PUBLIC CryptonightR_instruction123 -PUBLIC CryptonightR_instruction124 -PUBLIC CryptonightR_instruction125 -PUBLIC CryptonightR_instruction126 -PUBLIC CryptonightR_instruction127 -PUBLIC CryptonightR_instruction128 -PUBLIC CryptonightR_instruction129 -PUBLIC CryptonightR_instruction130 -PUBLIC CryptonightR_instruction131 -PUBLIC CryptonightR_instruction132 -PUBLIC CryptonightR_instruction133 -PUBLIC CryptonightR_instruction134 -PUBLIC CryptonightR_instruction135 -PUBLIC CryptonightR_instruction136 -PUBLIC CryptonightR_instruction137 -PUBLIC CryptonightR_instruction138 -PUBLIC CryptonightR_instruction139 -PUBLIC CryptonightR_instruction140 -PUBLIC CryptonightR_instruction141 -PUBLIC CryptonightR_instruction142 -PUBLIC CryptonightR_instruction143 -PUBLIC CryptonightR_instruction144 -PUBLIC CryptonightR_instruction145 -PUBLIC CryptonightR_instruction146 -PUBLIC CryptonightR_instruction147 -PUBLIC CryptonightR_instruction148 -PUBLIC CryptonightR_instruction149 -PUBLIC CryptonightR_instruction150 -PUBLIC CryptonightR_instruction151 -PUBLIC CryptonightR_instruction152 -PUBLIC CryptonightR_instruction153 -PUBLIC CryptonightR_instruction154 -PUBLIC CryptonightR_instruction155 -PUBLIC CryptonightR_instruction156 -PUBLIC CryptonightR_instruction157 -PUBLIC CryptonightR_instruction158 -PUBLIC CryptonightR_instruction159 -PUBLIC CryptonightR_instruction160 -PUBLIC CryptonightR_instruction161 -PUBLIC CryptonightR_instruction162 -PUBLIC CryptonightR_instruction163 -PUBLIC CryptonightR_instruction164 -PUBLIC CryptonightR_instruction165 -PUBLIC CryptonightR_instruction166 -PUBLIC CryptonightR_instruction167 -PUBLIC CryptonightR_instruction168 -PUBLIC CryptonightR_instruction169 -PUBLIC CryptonightR_instruction170 -PUBLIC CryptonightR_instruction171 -PUBLIC CryptonightR_instruction172 -PUBLIC CryptonightR_instruction173 -PUBLIC CryptonightR_instruction174 -PUBLIC CryptonightR_instruction175 -PUBLIC CryptonightR_instruction176 -PUBLIC CryptonightR_instruction177 -PUBLIC CryptonightR_instruction178 -PUBLIC CryptonightR_instruction179 -PUBLIC CryptonightR_instruction180 -PUBLIC CryptonightR_instruction181 -PUBLIC CryptonightR_instruction182 -PUBLIC CryptonightR_instruction183 -PUBLIC CryptonightR_instruction184 -PUBLIC CryptonightR_instruction185 -PUBLIC CryptonightR_instruction186 -PUBLIC CryptonightR_instruction187 -PUBLIC CryptonightR_instruction188 -PUBLIC CryptonightR_instruction189 -PUBLIC CryptonightR_instruction190 -PUBLIC CryptonightR_instruction191 -PUBLIC CryptonightR_instruction192 -PUBLIC CryptonightR_instruction193 -PUBLIC CryptonightR_instruction194 -PUBLIC CryptonightR_instruction195 -PUBLIC CryptonightR_instruction196 -PUBLIC CryptonightR_instruction197 -PUBLIC CryptonightR_instruction198 -PUBLIC CryptonightR_instruction199 -PUBLIC CryptonightR_instruction200 -PUBLIC CryptonightR_instruction201 -PUBLIC CryptonightR_instruction202 -PUBLIC CryptonightR_instruction203 -PUBLIC CryptonightR_instruction204 -PUBLIC CryptonightR_instruction205 -PUBLIC CryptonightR_instruction206 -PUBLIC CryptonightR_instruction207 -PUBLIC CryptonightR_instruction208 -PUBLIC CryptonightR_instruction209 -PUBLIC CryptonightR_instruction210 -PUBLIC CryptonightR_instruction211 -PUBLIC CryptonightR_instruction212 -PUBLIC CryptonightR_instruction213 -PUBLIC CryptonightR_instruction214 -PUBLIC CryptonightR_instruction215 -PUBLIC CryptonightR_instruction216 -PUBLIC CryptonightR_instruction217 -PUBLIC CryptonightR_instruction218 -PUBLIC CryptonightR_instruction219 -PUBLIC CryptonightR_instruction220 -PUBLIC CryptonightR_instruction221 -PUBLIC CryptonightR_instruction222 -PUBLIC CryptonightR_instruction223 -PUBLIC CryptonightR_instruction224 -PUBLIC CryptonightR_instruction225 -PUBLIC CryptonightR_instruction226 -PUBLIC CryptonightR_instruction227 -PUBLIC CryptonightR_instruction228 -PUBLIC CryptonightR_instruction229 -PUBLIC CryptonightR_instruction230 -PUBLIC CryptonightR_instruction231 -PUBLIC CryptonightR_instruction232 -PUBLIC CryptonightR_instruction233 -PUBLIC CryptonightR_instruction234 -PUBLIC CryptonightR_instruction235 -PUBLIC CryptonightR_instruction236 -PUBLIC CryptonightR_instruction237 -PUBLIC CryptonightR_instruction238 -PUBLIC CryptonightR_instruction239 -PUBLIC CryptonightR_instruction240 -PUBLIC CryptonightR_instruction241 -PUBLIC CryptonightR_instruction242 -PUBLIC CryptonightR_instruction243 -PUBLIC CryptonightR_instruction244 -PUBLIC CryptonightR_instruction245 -PUBLIC CryptonightR_instruction246 -PUBLIC CryptonightR_instruction247 -PUBLIC CryptonightR_instruction248 -PUBLIC CryptonightR_instruction249 -PUBLIC CryptonightR_instruction250 -PUBLIC CryptonightR_instruction251 -PUBLIC CryptonightR_instruction252 -PUBLIC CryptonightR_instruction253 -PUBLIC CryptonightR_instruction254 -PUBLIC CryptonightR_instruction255 -PUBLIC CryptonightR_instruction256 -PUBLIC CryptonightR_instruction_mov0 -PUBLIC CryptonightR_instruction_mov1 -PUBLIC CryptonightR_instruction_mov2 -PUBLIC CryptonightR_instruction_mov3 -PUBLIC CryptonightR_instruction_mov4 -PUBLIC CryptonightR_instruction_mov5 -PUBLIC CryptonightR_instruction_mov6 -PUBLIC CryptonightR_instruction_mov7 -PUBLIC CryptonightR_instruction_mov8 -PUBLIC CryptonightR_instruction_mov9 -PUBLIC CryptonightR_instruction_mov10 -PUBLIC CryptonightR_instruction_mov11 -PUBLIC CryptonightR_instruction_mov12 -PUBLIC CryptonightR_instruction_mov13 -PUBLIC CryptonightR_instruction_mov14 -PUBLIC CryptonightR_instruction_mov15 -PUBLIC CryptonightR_instruction_mov16 -PUBLIC CryptonightR_instruction_mov17 -PUBLIC CryptonightR_instruction_mov18 -PUBLIC CryptonightR_instruction_mov19 -PUBLIC CryptonightR_instruction_mov20 -PUBLIC CryptonightR_instruction_mov21 -PUBLIC CryptonightR_instruction_mov22 -PUBLIC CryptonightR_instruction_mov23 -PUBLIC CryptonightR_instruction_mov24 -PUBLIC CryptonightR_instruction_mov25 -PUBLIC CryptonightR_instruction_mov26 -PUBLIC CryptonightR_instruction_mov27 -PUBLIC CryptonightR_instruction_mov28 -PUBLIC CryptonightR_instruction_mov29 -PUBLIC CryptonightR_instruction_mov30 -PUBLIC CryptonightR_instruction_mov31 -PUBLIC CryptonightR_instruction_mov32 -PUBLIC CryptonightR_instruction_mov33 -PUBLIC CryptonightR_instruction_mov34 -PUBLIC CryptonightR_instruction_mov35 -PUBLIC CryptonightR_instruction_mov36 -PUBLIC CryptonightR_instruction_mov37 -PUBLIC CryptonightR_instruction_mov38 -PUBLIC CryptonightR_instruction_mov39 -PUBLIC CryptonightR_instruction_mov40 -PUBLIC CryptonightR_instruction_mov41 -PUBLIC CryptonightR_instruction_mov42 -PUBLIC CryptonightR_instruction_mov43 -PUBLIC CryptonightR_instruction_mov44 -PUBLIC CryptonightR_instruction_mov45 -PUBLIC CryptonightR_instruction_mov46 -PUBLIC CryptonightR_instruction_mov47 -PUBLIC CryptonightR_instruction_mov48 -PUBLIC CryptonightR_instruction_mov49 -PUBLIC CryptonightR_instruction_mov50 -PUBLIC CryptonightR_instruction_mov51 -PUBLIC CryptonightR_instruction_mov52 -PUBLIC CryptonightR_instruction_mov53 -PUBLIC CryptonightR_instruction_mov54 -PUBLIC CryptonightR_instruction_mov55 -PUBLIC CryptonightR_instruction_mov56 -PUBLIC CryptonightR_instruction_mov57 -PUBLIC CryptonightR_instruction_mov58 -PUBLIC CryptonightR_instruction_mov59 -PUBLIC CryptonightR_instruction_mov60 -PUBLIC CryptonightR_instruction_mov61 -PUBLIC CryptonightR_instruction_mov62 -PUBLIC CryptonightR_instruction_mov63 -PUBLIC CryptonightR_instruction_mov64 -PUBLIC CryptonightR_instruction_mov65 -PUBLIC CryptonightR_instruction_mov66 -PUBLIC CryptonightR_instruction_mov67 -PUBLIC CryptonightR_instruction_mov68 -PUBLIC CryptonightR_instruction_mov69 -PUBLIC CryptonightR_instruction_mov70 -PUBLIC CryptonightR_instruction_mov71 -PUBLIC CryptonightR_instruction_mov72 -PUBLIC CryptonightR_instruction_mov73 -PUBLIC CryptonightR_instruction_mov74 -PUBLIC CryptonightR_instruction_mov75 -PUBLIC CryptonightR_instruction_mov76 -PUBLIC CryptonightR_instruction_mov77 -PUBLIC CryptonightR_instruction_mov78 -PUBLIC CryptonightR_instruction_mov79 -PUBLIC CryptonightR_instruction_mov80 -PUBLIC CryptonightR_instruction_mov81 -PUBLIC CryptonightR_instruction_mov82 -PUBLIC CryptonightR_instruction_mov83 -PUBLIC CryptonightR_instruction_mov84 -PUBLIC CryptonightR_instruction_mov85 -PUBLIC CryptonightR_instruction_mov86 -PUBLIC CryptonightR_instruction_mov87 -PUBLIC CryptonightR_instruction_mov88 -PUBLIC CryptonightR_instruction_mov89 -PUBLIC CryptonightR_instruction_mov90 -PUBLIC CryptonightR_instruction_mov91 -PUBLIC CryptonightR_instruction_mov92 -PUBLIC CryptonightR_instruction_mov93 -PUBLIC CryptonightR_instruction_mov94 -PUBLIC CryptonightR_instruction_mov95 -PUBLIC CryptonightR_instruction_mov96 -PUBLIC CryptonightR_instruction_mov97 -PUBLIC CryptonightR_instruction_mov98 -PUBLIC CryptonightR_instruction_mov99 -PUBLIC CryptonightR_instruction_mov100 -PUBLIC CryptonightR_instruction_mov101 -PUBLIC CryptonightR_instruction_mov102 -PUBLIC CryptonightR_instruction_mov103 -PUBLIC CryptonightR_instruction_mov104 -PUBLIC CryptonightR_instruction_mov105 -PUBLIC CryptonightR_instruction_mov106 -PUBLIC CryptonightR_instruction_mov107 -PUBLIC CryptonightR_instruction_mov108 -PUBLIC CryptonightR_instruction_mov109 -PUBLIC CryptonightR_instruction_mov110 -PUBLIC CryptonightR_instruction_mov111 -PUBLIC CryptonightR_instruction_mov112 -PUBLIC CryptonightR_instruction_mov113 -PUBLIC CryptonightR_instruction_mov114 -PUBLIC CryptonightR_instruction_mov115 -PUBLIC CryptonightR_instruction_mov116 -PUBLIC CryptonightR_instruction_mov117 -PUBLIC CryptonightR_instruction_mov118 -PUBLIC CryptonightR_instruction_mov119 -PUBLIC CryptonightR_instruction_mov120 -PUBLIC CryptonightR_instruction_mov121 -PUBLIC CryptonightR_instruction_mov122 -PUBLIC CryptonightR_instruction_mov123 -PUBLIC CryptonightR_instruction_mov124 -PUBLIC CryptonightR_instruction_mov125 -PUBLIC CryptonightR_instruction_mov126 -PUBLIC CryptonightR_instruction_mov127 -PUBLIC CryptonightR_instruction_mov128 -PUBLIC CryptonightR_instruction_mov129 -PUBLIC CryptonightR_instruction_mov130 -PUBLIC CryptonightR_instruction_mov131 -PUBLIC CryptonightR_instruction_mov132 -PUBLIC CryptonightR_instruction_mov133 -PUBLIC CryptonightR_instruction_mov134 -PUBLIC CryptonightR_instruction_mov135 -PUBLIC CryptonightR_instruction_mov136 -PUBLIC CryptonightR_instruction_mov137 -PUBLIC CryptonightR_instruction_mov138 -PUBLIC CryptonightR_instruction_mov139 -PUBLIC CryptonightR_instruction_mov140 -PUBLIC CryptonightR_instruction_mov141 -PUBLIC CryptonightR_instruction_mov142 -PUBLIC CryptonightR_instruction_mov143 -PUBLIC CryptonightR_instruction_mov144 -PUBLIC CryptonightR_instruction_mov145 -PUBLIC CryptonightR_instruction_mov146 -PUBLIC CryptonightR_instruction_mov147 -PUBLIC CryptonightR_instruction_mov148 -PUBLIC CryptonightR_instruction_mov149 -PUBLIC CryptonightR_instruction_mov150 -PUBLIC CryptonightR_instruction_mov151 -PUBLIC CryptonightR_instruction_mov152 -PUBLIC CryptonightR_instruction_mov153 -PUBLIC CryptonightR_instruction_mov154 -PUBLIC CryptonightR_instruction_mov155 -PUBLIC CryptonightR_instruction_mov156 -PUBLIC CryptonightR_instruction_mov157 -PUBLIC CryptonightR_instruction_mov158 -PUBLIC CryptonightR_instruction_mov159 -PUBLIC CryptonightR_instruction_mov160 -PUBLIC CryptonightR_instruction_mov161 -PUBLIC CryptonightR_instruction_mov162 -PUBLIC CryptonightR_instruction_mov163 -PUBLIC CryptonightR_instruction_mov164 -PUBLIC CryptonightR_instruction_mov165 -PUBLIC CryptonightR_instruction_mov166 -PUBLIC CryptonightR_instruction_mov167 -PUBLIC CryptonightR_instruction_mov168 -PUBLIC CryptonightR_instruction_mov169 -PUBLIC CryptonightR_instruction_mov170 -PUBLIC CryptonightR_instruction_mov171 -PUBLIC CryptonightR_instruction_mov172 -PUBLIC CryptonightR_instruction_mov173 -PUBLIC CryptonightR_instruction_mov174 -PUBLIC CryptonightR_instruction_mov175 -PUBLIC CryptonightR_instruction_mov176 -PUBLIC CryptonightR_instruction_mov177 -PUBLIC CryptonightR_instruction_mov178 -PUBLIC CryptonightR_instruction_mov179 -PUBLIC CryptonightR_instruction_mov180 -PUBLIC CryptonightR_instruction_mov181 -PUBLIC CryptonightR_instruction_mov182 -PUBLIC CryptonightR_instruction_mov183 -PUBLIC CryptonightR_instruction_mov184 -PUBLIC CryptonightR_instruction_mov185 -PUBLIC CryptonightR_instruction_mov186 -PUBLIC CryptonightR_instruction_mov187 -PUBLIC CryptonightR_instruction_mov188 -PUBLIC CryptonightR_instruction_mov189 -PUBLIC CryptonightR_instruction_mov190 -PUBLIC CryptonightR_instruction_mov191 -PUBLIC CryptonightR_instruction_mov192 -PUBLIC CryptonightR_instruction_mov193 -PUBLIC CryptonightR_instruction_mov194 -PUBLIC CryptonightR_instruction_mov195 -PUBLIC CryptonightR_instruction_mov196 -PUBLIC CryptonightR_instruction_mov197 -PUBLIC CryptonightR_instruction_mov198 -PUBLIC CryptonightR_instruction_mov199 -PUBLIC CryptonightR_instruction_mov200 -PUBLIC CryptonightR_instruction_mov201 -PUBLIC CryptonightR_instruction_mov202 -PUBLIC CryptonightR_instruction_mov203 -PUBLIC CryptonightR_instruction_mov204 -PUBLIC CryptonightR_instruction_mov205 -PUBLIC CryptonightR_instruction_mov206 -PUBLIC CryptonightR_instruction_mov207 -PUBLIC CryptonightR_instruction_mov208 -PUBLIC CryptonightR_instruction_mov209 -PUBLIC CryptonightR_instruction_mov210 -PUBLIC CryptonightR_instruction_mov211 -PUBLIC CryptonightR_instruction_mov212 -PUBLIC CryptonightR_instruction_mov213 -PUBLIC CryptonightR_instruction_mov214 -PUBLIC CryptonightR_instruction_mov215 -PUBLIC CryptonightR_instruction_mov216 -PUBLIC CryptonightR_instruction_mov217 -PUBLIC CryptonightR_instruction_mov218 -PUBLIC CryptonightR_instruction_mov219 -PUBLIC CryptonightR_instruction_mov220 -PUBLIC CryptonightR_instruction_mov221 -PUBLIC CryptonightR_instruction_mov222 -PUBLIC CryptonightR_instruction_mov223 -PUBLIC CryptonightR_instruction_mov224 -PUBLIC CryptonightR_instruction_mov225 -PUBLIC CryptonightR_instruction_mov226 -PUBLIC CryptonightR_instruction_mov227 -PUBLIC CryptonightR_instruction_mov228 -PUBLIC CryptonightR_instruction_mov229 -PUBLIC CryptonightR_instruction_mov230 -PUBLIC CryptonightR_instruction_mov231 -PUBLIC CryptonightR_instruction_mov232 -PUBLIC CryptonightR_instruction_mov233 -PUBLIC CryptonightR_instruction_mov234 -PUBLIC CryptonightR_instruction_mov235 -PUBLIC CryptonightR_instruction_mov236 -PUBLIC CryptonightR_instruction_mov237 -PUBLIC CryptonightR_instruction_mov238 -PUBLIC CryptonightR_instruction_mov239 -PUBLIC CryptonightR_instruction_mov240 -PUBLIC CryptonightR_instruction_mov241 -PUBLIC CryptonightR_instruction_mov242 -PUBLIC CryptonightR_instruction_mov243 -PUBLIC CryptonightR_instruction_mov244 -PUBLIC CryptonightR_instruction_mov245 -PUBLIC CryptonightR_instruction_mov246 -PUBLIC CryptonightR_instruction_mov247 -PUBLIC CryptonightR_instruction_mov248 -PUBLIC CryptonightR_instruction_mov249 -PUBLIC CryptonightR_instruction_mov250 -PUBLIC CryptonightR_instruction_mov251 -PUBLIC CryptonightR_instruction_mov252 -PUBLIC CryptonightR_instruction_mov253 -PUBLIC CryptonightR_instruction_mov254 -PUBLIC CryptonightR_instruction_mov255 -PUBLIC CryptonightR_instruction_mov256 - -INCLUDE CryptonightWOW_template_win.inc -INCLUDE CryptonightR_template_win.inc -INCLUDE CryptonightWOW_soft_aes_template_win.inc -INCLUDE CryptonightR_soft_aes_template_win.inc - -CryptonightR_instruction0: - imul rbx, rbx -CryptonightR_instruction1: - imul rbx, rbx -CryptonightR_instruction2: - imul rbx, rbx -CryptonightR_instruction3: - add rbx, r9 - add rbx, 2147483647 -CryptonightR_instruction4: - sub rbx, r9 -CryptonightR_instruction5: - ror ebx, cl -CryptonightR_instruction6: - rol ebx, cl -CryptonightR_instruction7: - xor rbx, r9 -CryptonightR_instruction8: - imul rsi, rbx -CryptonightR_instruction9: - imul rsi, rbx -CryptonightR_instruction10: - imul rsi, rbx -CryptonightR_instruction11: - add rsi, rbx - add rsi, 2147483647 -CryptonightR_instruction12: - sub rsi, rbx -CryptonightR_instruction13: - ror esi, cl -CryptonightR_instruction14: - rol esi, cl -CryptonightR_instruction15: - xor rsi, rbx -CryptonightR_instruction16: - imul rdi, rbx -CryptonightR_instruction17: - imul rdi, rbx -CryptonightR_instruction18: - imul rdi, rbx -CryptonightR_instruction19: - add rdi, rbx - add rdi, 2147483647 -CryptonightR_instruction20: - sub rdi, rbx -CryptonightR_instruction21: - ror edi, cl -CryptonightR_instruction22: - rol edi, cl -CryptonightR_instruction23: - xor rdi, rbx -CryptonightR_instruction24: - imul rbp, rbx -CryptonightR_instruction25: - imul rbp, rbx -CryptonightR_instruction26: - imul rbp, rbx -CryptonightR_instruction27: - add rbp, rbx - add rbp, 2147483647 -CryptonightR_instruction28: - sub rbp, rbx -CryptonightR_instruction29: - ror ebp, cl -CryptonightR_instruction30: - rol ebp, cl -CryptonightR_instruction31: - xor rbp, rbx -CryptonightR_instruction32: - imul rbx, rsi -CryptonightR_instruction33: - imul rbx, rsi -CryptonightR_instruction34: - imul rbx, rsi -CryptonightR_instruction35: - add rbx, rsi - add rbx, 2147483647 -CryptonightR_instruction36: - sub rbx, rsi -CryptonightR_instruction37: - ror ebx, cl -CryptonightR_instruction38: - rol ebx, cl -CryptonightR_instruction39: - xor rbx, rsi -CryptonightR_instruction40: - imul rsi, rsi -CryptonightR_instruction41: - imul rsi, rsi -CryptonightR_instruction42: - imul rsi, rsi -CryptonightR_instruction43: - add rsi, r9 - add rsi, 2147483647 -CryptonightR_instruction44: - sub rsi, r9 -CryptonightR_instruction45: - ror esi, cl -CryptonightR_instruction46: - rol esi, cl -CryptonightR_instruction47: - xor rsi, r9 -CryptonightR_instruction48: - imul rdi, rsi -CryptonightR_instruction49: - imul rdi, rsi -CryptonightR_instruction50: - imul rdi, rsi -CryptonightR_instruction51: - add rdi, rsi - add rdi, 2147483647 -CryptonightR_instruction52: - sub rdi, rsi -CryptonightR_instruction53: - ror edi, cl -CryptonightR_instruction54: - rol edi, cl -CryptonightR_instruction55: - xor rdi, rsi -CryptonightR_instruction56: - imul rbp, rsi -CryptonightR_instruction57: - imul rbp, rsi -CryptonightR_instruction58: - imul rbp, rsi -CryptonightR_instruction59: - add rbp, rsi - add rbp, 2147483647 -CryptonightR_instruction60: - sub rbp, rsi -CryptonightR_instruction61: - ror ebp, cl -CryptonightR_instruction62: - rol ebp, cl -CryptonightR_instruction63: - xor rbp, rsi -CryptonightR_instruction64: - imul rbx, rdi -CryptonightR_instruction65: - imul rbx, rdi -CryptonightR_instruction66: - imul rbx, rdi -CryptonightR_instruction67: - add rbx, rdi - add rbx, 2147483647 -CryptonightR_instruction68: - sub rbx, rdi -CryptonightR_instruction69: - ror ebx, cl -CryptonightR_instruction70: - rol ebx, cl -CryptonightR_instruction71: - xor rbx, rdi -CryptonightR_instruction72: - imul rsi, rdi -CryptonightR_instruction73: - imul rsi, rdi -CryptonightR_instruction74: - imul rsi, rdi -CryptonightR_instruction75: - add rsi, rdi - add rsi, 2147483647 -CryptonightR_instruction76: - sub rsi, rdi -CryptonightR_instruction77: - ror esi, cl -CryptonightR_instruction78: - rol esi, cl -CryptonightR_instruction79: - xor rsi, rdi -CryptonightR_instruction80: - imul rdi, rdi -CryptonightR_instruction81: - imul rdi, rdi -CryptonightR_instruction82: - imul rdi, rdi -CryptonightR_instruction83: - add rdi, r9 - add rdi, 2147483647 -CryptonightR_instruction84: - sub rdi, r9 -CryptonightR_instruction85: - ror edi, cl -CryptonightR_instruction86: - rol edi, cl -CryptonightR_instruction87: - xor rdi, r9 -CryptonightR_instruction88: - imul rbp, rdi -CryptonightR_instruction89: - imul rbp, rdi -CryptonightR_instruction90: - imul rbp, rdi -CryptonightR_instruction91: - add rbp, rdi - add rbp, 2147483647 -CryptonightR_instruction92: - sub rbp, rdi -CryptonightR_instruction93: - ror ebp, cl -CryptonightR_instruction94: - rol ebp, cl -CryptonightR_instruction95: - xor rbp, rdi -CryptonightR_instruction96: - imul rbx, rbp -CryptonightR_instruction97: - imul rbx, rbp -CryptonightR_instruction98: - imul rbx, rbp -CryptonightR_instruction99: - add rbx, rbp - add rbx, 2147483647 -CryptonightR_instruction100: - sub rbx, rbp -CryptonightR_instruction101: - ror ebx, cl -CryptonightR_instruction102: - rol ebx, cl -CryptonightR_instruction103: - xor rbx, rbp -CryptonightR_instruction104: - imul rsi, rbp -CryptonightR_instruction105: - imul rsi, rbp -CryptonightR_instruction106: - imul rsi, rbp -CryptonightR_instruction107: - add rsi, rbp - add rsi, 2147483647 -CryptonightR_instruction108: - sub rsi, rbp -CryptonightR_instruction109: - ror esi, cl -CryptonightR_instruction110: - rol esi, cl -CryptonightR_instruction111: - xor rsi, rbp -CryptonightR_instruction112: - imul rdi, rbp -CryptonightR_instruction113: - imul rdi, rbp -CryptonightR_instruction114: - imul rdi, rbp -CryptonightR_instruction115: - add rdi, rbp - add rdi, 2147483647 -CryptonightR_instruction116: - sub rdi, rbp -CryptonightR_instruction117: - ror edi, cl -CryptonightR_instruction118: - rol edi, cl -CryptonightR_instruction119: - xor rdi, rbp -CryptonightR_instruction120: - imul rbp, rbp -CryptonightR_instruction121: - imul rbp, rbp -CryptonightR_instruction122: - imul rbp, rbp -CryptonightR_instruction123: - add rbp, r9 - add rbp, 2147483647 -CryptonightR_instruction124: - sub rbp, r9 -CryptonightR_instruction125: - ror ebp, cl -CryptonightR_instruction126: - rol ebp, cl -CryptonightR_instruction127: - xor rbp, r9 -CryptonightR_instruction128: - imul rbx, rsp -CryptonightR_instruction129: - imul rbx, rsp -CryptonightR_instruction130: - imul rbx, rsp -CryptonightR_instruction131: - add rbx, rsp - add rbx, 2147483647 -CryptonightR_instruction132: - sub rbx, rsp -CryptonightR_instruction133: - ror ebx, cl -CryptonightR_instruction134: - rol ebx, cl -CryptonightR_instruction135: - xor rbx, rsp -CryptonightR_instruction136: - imul rsi, rsp -CryptonightR_instruction137: - imul rsi, rsp -CryptonightR_instruction138: - imul rsi, rsp -CryptonightR_instruction139: - add rsi, rsp - add rsi, 2147483647 -CryptonightR_instruction140: - sub rsi, rsp -CryptonightR_instruction141: - ror esi, cl -CryptonightR_instruction142: - rol esi, cl -CryptonightR_instruction143: - xor rsi, rsp -CryptonightR_instruction144: - imul rdi, rsp -CryptonightR_instruction145: - imul rdi, rsp -CryptonightR_instruction146: - imul rdi, rsp -CryptonightR_instruction147: - add rdi, rsp - add rdi, 2147483647 -CryptonightR_instruction148: - sub rdi, rsp -CryptonightR_instruction149: - ror edi, cl -CryptonightR_instruction150: - rol edi, cl -CryptonightR_instruction151: - xor rdi, rsp -CryptonightR_instruction152: - imul rbp, rsp -CryptonightR_instruction153: - imul rbp, rsp -CryptonightR_instruction154: - imul rbp, rsp -CryptonightR_instruction155: - add rbp, rsp - add rbp, 2147483647 -CryptonightR_instruction156: - sub rbp, rsp -CryptonightR_instruction157: - ror ebp, cl -CryptonightR_instruction158: - rol ebp, cl -CryptonightR_instruction159: - xor rbp, rsp -CryptonightR_instruction160: - imul rbx, r15 -CryptonightR_instruction161: - imul rbx, r15 -CryptonightR_instruction162: - imul rbx, r15 -CryptonightR_instruction163: - add rbx, r15 - add rbx, 2147483647 -CryptonightR_instruction164: - sub rbx, r15 -CryptonightR_instruction165: - ror ebx, cl -CryptonightR_instruction166: - rol ebx, cl -CryptonightR_instruction167: - xor rbx, r15 -CryptonightR_instruction168: - imul rsi, r15 -CryptonightR_instruction169: - imul rsi, r15 -CryptonightR_instruction170: - imul rsi, r15 -CryptonightR_instruction171: - add rsi, r15 - add rsi, 2147483647 -CryptonightR_instruction172: - sub rsi, r15 -CryptonightR_instruction173: - ror esi, cl -CryptonightR_instruction174: - rol esi, cl -CryptonightR_instruction175: - xor rsi, r15 -CryptonightR_instruction176: - imul rdi, r15 -CryptonightR_instruction177: - imul rdi, r15 -CryptonightR_instruction178: - imul rdi, r15 -CryptonightR_instruction179: - add rdi, r15 - add rdi, 2147483647 -CryptonightR_instruction180: - sub rdi, r15 -CryptonightR_instruction181: - ror edi, cl -CryptonightR_instruction182: - rol edi, cl -CryptonightR_instruction183: - xor rdi, r15 -CryptonightR_instruction184: - imul rbp, r15 -CryptonightR_instruction185: - imul rbp, r15 -CryptonightR_instruction186: - imul rbp, r15 -CryptonightR_instruction187: - add rbp, r15 - add rbp, 2147483647 -CryptonightR_instruction188: - sub rbp, r15 -CryptonightR_instruction189: - ror ebp, cl -CryptonightR_instruction190: - rol ebp, cl -CryptonightR_instruction191: - xor rbp, r15 -CryptonightR_instruction192: - imul rbx, rax -CryptonightR_instruction193: - imul rbx, rax -CryptonightR_instruction194: - imul rbx, rax -CryptonightR_instruction195: - add rbx, rax - add rbx, 2147483647 -CryptonightR_instruction196: - sub rbx, rax -CryptonightR_instruction197: - ror ebx, cl -CryptonightR_instruction198: - rol ebx, cl -CryptonightR_instruction199: - xor rbx, rax -CryptonightR_instruction200: - imul rsi, rax -CryptonightR_instruction201: - imul rsi, rax -CryptonightR_instruction202: - imul rsi, rax -CryptonightR_instruction203: - add rsi, rax - add rsi, 2147483647 -CryptonightR_instruction204: - sub rsi, rax -CryptonightR_instruction205: - ror esi, cl -CryptonightR_instruction206: - rol esi, cl -CryptonightR_instruction207: - xor rsi, rax -CryptonightR_instruction208: - imul rdi, rax -CryptonightR_instruction209: - imul rdi, rax -CryptonightR_instruction210: - imul rdi, rax -CryptonightR_instruction211: - add rdi, rax - add rdi, 2147483647 -CryptonightR_instruction212: - sub rdi, rax -CryptonightR_instruction213: - ror edi, cl -CryptonightR_instruction214: - rol edi, cl -CryptonightR_instruction215: - xor rdi, rax -CryptonightR_instruction216: - imul rbp, rax -CryptonightR_instruction217: - imul rbp, rax -CryptonightR_instruction218: - imul rbp, rax -CryptonightR_instruction219: - add rbp, rax - add rbp, 2147483647 -CryptonightR_instruction220: - sub rbp, rax -CryptonightR_instruction221: - ror ebp, cl -CryptonightR_instruction222: - rol ebp, cl -CryptonightR_instruction223: - xor rbp, rax -CryptonightR_instruction224: - imul rbx, rdx -CryptonightR_instruction225: - imul rbx, rdx -CryptonightR_instruction226: - imul rbx, rdx -CryptonightR_instruction227: - add rbx, rdx - add rbx, 2147483647 -CryptonightR_instruction228: - sub rbx, rdx -CryptonightR_instruction229: - ror ebx, cl -CryptonightR_instruction230: - rol ebx, cl -CryptonightR_instruction231: - xor rbx, rdx -CryptonightR_instruction232: - imul rsi, rdx -CryptonightR_instruction233: - imul rsi, rdx -CryptonightR_instruction234: - imul rsi, rdx -CryptonightR_instruction235: - add rsi, rdx - add rsi, 2147483647 -CryptonightR_instruction236: - sub rsi, rdx -CryptonightR_instruction237: - ror esi, cl -CryptonightR_instruction238: - rol esi, cl -CryptonightR_instruction239: - xor rsi, rdx -CryptonightR_instruction240: - imul rdi, rdx -CryptonightR_instruction241: - imul rdi, rdx -CryptonightR_instruction242: - imul rdi, rdx -CryptonightR_instruction243: - add rdi, rdx - add rdi, 2147483647 -CryptonightR_instruction244: - sub rdi, rdx -CryptonightR_instruction245: - ror edi, cl -CryptonightR_instruction246: - rol edi, cl -CryptonightR_instruction247: - xor rdi, rdx -CryptonightR_instruction248: - imul rbp, rdx -CryptonightR_instruction249: - imul rbp, rdx -CryptonightR_instruction250: - imul rbp, rdx -CryptonightR_instruction251: - add rbp, rdx - add rbp, 2147483647 -CryptonightR_instruction252: - sub rbp, rdx -CryptonightR_instruction253: - ror ebp, cl -CryptonightR_instruction254: - rol ebp, cl -CryptonightR_instruction255: - xor rbp, rdx -CryptonightR_instruction256: - imul rbx, rbx -CryptonightR_instruction_mov0: - -CryptonightR_instruction_mov1: - -CryptonightR_instruction_mov2: - -CryptonightR_instruction_mov3: - -CryptonightR_instruction_mov4: - -CryptonightR_instruction_mov5: - mov rcx, rbx -CryptonightR_instruction_mov6: - mov rcx, rbx -CryptonightR_instruction_mov7: - -CryptonightR_instruction_mov8: - -CryptonightR_instruction_mov9: - -CryptonightR_instruction_mov10: - -CryptonightR_instruction_mov11: - -CryptonightR_instruction_mov12: - -CryptonightR_instruction_mov13: - mov rcx, rbx -CryptonightR_instruction_mov14: - mov rcx, rbx -CryptonightR_instruction_mov15: - -CryptonightR_instruction_mov16: - -CryptonightR_instruction_mov17: - -CryptonightR_instruction_mov18: - -CryptonightR_instruction_mov19: - -CryptonightR_instruction_mov20: - -CryptonightR_instruction_mov21: - mov rcx, rbx -CryptonightR_instruction_mov22: - mov rcx, rbx -CryptonightR_instruction_mov23: - -CryptonightR_instruction_mov24: - -CryptonightR_instruction_mov25: - -CryptonightR_instruction_mov26: - -CryptonightR_instruction_mov27: - -CryptonightR_instruction_mov28: - -CryptonightR_instruction_mov29: - mov rcx, rbx -CryptonightR_instruction_mov30: - mov rcx, rbx -CryptonightR_instruction_mov31: - -CryptonightR_instruction_mov32: - -CryptonightR_instruction_mov33: - -CryptonightR_instruction_mov34: - -CryptonightR_instruction_mov35: - -CryptonightR_instruction_mov36: - -CryptonightR_instruction_mov37: - mov rcx, rsi -CryptonightR_instruction_mov38: - mov rcx, rsi -CryptonightR_instruction_mov39: - -CryptonightR_instruction_mov40: - -CryptonightR_instruction_mov41: - -CryptonightR_instruction_mov42: - -CryptonightR_instruction_mov43: - -CryptonightR_instruction_mov44: - -CryptonightR_instruction_mov45: - mov rcx, rsi -CryptonightR_instruction_mov46: - mov rcx, rsi -CryptonightR_instruction_mov47: - -CryptonightR_instruction_mov48: - -CryptonightR_instruction_mov49: - -CryptonightR_instruction_mov50: - -CryptonightR_instruction_mov51: - -CryptonightR_instruction_mov52: - -CryptonightR_instruction_mov53: - mov rcx, rsi -CryptonightR_instruction_mov54: - mov rcx, rsi -CryptonightR_instruction_mov55: - -CryptonightR_instruction_mov56: - -CryptonightR_instruction_mov57: - -CryptonightR_instruction_mov58: - -CryptonightR_instruction_mov59: - -CryptonightR_instruction_mov60: - -CryptonightR_instruction_mov61: - mov rcx, rsi -CryptonightR_instruction_mov62: - mov rcx, rsi -CryptonightR_instruction_mov63: - -CryptonightR_instruction_mov64: - -CryptonightR_instruction_mov65: - -CryptonightR_instruction_mov66: - -CryptonightR_instruction_mov67: - -CryptonightR_instruction_mov68: - -CryptonightR_instruction_mov69: - mov rcx, rdi -CryptonightR_instruction_mov70: - mov rcx, rdi -CryptonightR_instruction_mov71: - -CryptonightR_instruction_mov72: - -CryptonightR_instruction_mov73: - -CryptonightR_instruction_mov74: - -CryptonightR_instruction_mov75: - -CryptonightR_instruction_mov76: - -CryptonightR_instruction_mov77: - mov rcx, rdi -CryptonightR_instruction_mov78: - mov rcx, rdi -CryptonightR_instruction_mov79: - -CryptonightR_instruction_mov80: - -CryptonightR_instruction_mov81: - -CryptonightR_instruction_mov82: - -CryptonightR_instruction_mov83: - -CryptonightR_instruction_mov84: - -CryptonightR_instruction_mov85: - mov rcx, rdi -CryptonightR_instruction_mov86: - mov rcx, rdi -CryptonightR_instruction_mov87: - -CryptonightR_instruction_mov88: - -CryptonightR_instruction_mov89: - -CryptonightR_instruction_mov90: - -CryptonightR_instruction_mov91: - -CryptonightR_instruction_mov92: - -CryptonightR_instruction_mov93: - mov rcx, rdi -CryptonightR_instruction_mov94: - mov rcx, rdi -CryptonightR_instruction_mov95: - -CryptonightR_instruction_mov96: - -CryptonightR_instruction_mov97: - -CryptonightR_instruction_mov98: - -CryptonightR_instruction_mov99: - -CryptonightR_instruction_mov100: - -CryptonightR_instruction_mov101: - mov rcx, rbp -CryptonightR_instruction_mov102: - mov rcx, rbp -CryptonightR_instruction_mov103: - -CryptonightR_instruction_mov104: - -CryptonightR_instruction_mov105: - -CryptonightR_instruction_mov106: - -CryptonightR_instruction_mov107: - -CryptonightR_instruction_mov108: - -CryptonightR_instruction_mov109: - mov rcx, rbp -CryptonightR_instruction_mov110: - mov rcx, rbp -CryptonightR_instruction_mov111: - -CryptonightR_instruction_mov112: - -CryptonightR_instruction_mov113: - -CryptonightR_instruction_mov114: - -CryptonightR_instruction_mov115: - -CryptonightR_instruction_mov116: - -CryptonightR_instruction_mov117: - mov rcx, rbp -CryptonightR_instruction_mov118: - mov rcx, rbp -CryptonightR_instruction_mov119: - -CryptonightR_instruction_mov120: - -CryptonightR_instruction_mov121: - -CryptonightR_instruction_mov122: - -CryptonightR_instruction_mov123: - -CryptonightR_instruction_mov124: - -CryptonightR_instruction_mov125: - mov rcx, rbp -CryptonightR_instruction_mov126: - mov rcx, rbp -CryptonightR_instruction_mov127: - -CryptonightR_instruction_mov128: - -CryptonightR_instruction_mov129: - -CryptonightR_instruction_mov130: - -CryptonightR_instruction_mov131: - -CryptonightR_instruction_mov132: - -CryptonightR_instruction_mov133: - mov rcx, rsp -CryptonightR_instruction_mov134: - mov rcx, rsp -CryptonightR_instruction_mov135: - -CryptonightR_instruction_mov136: - -CryptonightR_instruction_mov137: - -CryptonightR_instruction_mov138: - -CryptonightR_instruction_mov139: - -CryptonightR_instruction_mov140: - -CryptonightR_instruction_mov141: - mov rcx, rsp -CryptonightR_instruction_mov142: - mov rcx, rsp -CryptonightR_instruction_mov143: - -CryptonightR_instruction_mov144: - -CryptonightR_instruction_mov145: - -CryptonightR_instruction_mov146: - -CryptonightR_instruction_mov147: - -CryptonightR_instruction_mov148: - -CryptonightR_instruction_mov149: - mov rcx, rsp -CryptonightR_instruction_mov150: - mov rcx, rsp -CryptonightR_instruction_mov151: - -CryptonightR_instruction_mov152: - -CryptonightR_instruction_mov153: - -CryptonightR_instruction_mov154: - -CryptonightR_instruction_mov155: - -CryptonightR_instruction_mov156: - -CryptonightR_instruction_mov157: - mov rcx, rsp -CryptonightR_instruction_mov158: - mov rcx, rsp -CryptonightR_instruction_mov159: - -CryptonightR_instruction_mov160: - -CryptonightR_instruction_mov161: - -CryptonightR_instruction_mov162: - -CryptonightR_instruction_mov163: - -CryptonightR_instruction_mov164: - -CryptonightR_instruction_mov165: - mov rcx, r15 -CryptonightR_instruction_mov166: - mov rcx, r15 -CryptonightR_instruction_mov167: - -CryptonightR_instruction_mov168: - -CryptonightR_instruction_mov169: - -CryptonightR_instruction_mov170: - -CryptonightR_instruction_mov171: - -CryptonightR_instruction_mov172: - -CryptonightR_instruction_mov173: - mov rcx, r15 -CryptonightR_instruction_mov174: - mov rcx, r15 -CryptonightR_instruction_mov175: - -CryptonightR_instruction_mov176: - -CryptonightR_instruction_mov177: - -CryptonightR_instruction_mov178: - -CryptonightR_instruction_mov179: - -CryptonightR_instruction_mov180: - -CryptonightR_instruction_mov181: - mov rcx, r15 -CryptonightR_instruction_mov182: - mov rcx, r15 -CryptonightR_instruction_mov183: - -CryptonightR_instruction_mov184: - -CryptonightR_instruction_mov185: - -CryptonightR_instruction_mov186: - -CryptonightR_instruction_mov187: - -CryptonightR_instruction_mov188: - -CryptonightR_instruction_mov189: - mov rcx, r15 -CryptonightR_instruction_mov190: - mov rcx, r15 -CryptonightR_instruction_mov191: - -CryptonightR_instruction_mov192: - -CryptonightR_instruction_mov193: - -CryptonightR_instruction_mov194: - -CryptonightR_instruction_mov195: - -CryptonightR_instruction_mov196: - -CryptonightR_instruction_mov197: - mov rcx, rax -CryptonightR_instruction_mov198: - mov rcx, rax -CryptonightR_instruction_mov199: - -CryptonightR_instruction_mov200: - -CryptonightR_instruction_mov201: - -CryptonightR_instruction_mov202: - -CryptonightR_instruction_mov203: - -CryptonightR_instruction_mov204: - -CryptonightR_instruction_mov205: - mov rcx, rax -CryptonightR_instruction_mov206: - mov rcx, rax -CryptonightR_instruction_mov207: - -CryptonightR_instruction_mov208: - -CryptonightR_instruction_mov209: - -CryptonightR_instruction_mov210: - -CryptonightR_instruction_mov211: - -CryptonightR_instruction_mov212: - -CryptonightR_instruction_mov213: - mov rcx, rax -CryptonightR_instruction_mov214: - mov rcx, rax -CryptonightR_instruction_mov215: - -CryptonightR_instruction_mov216: - -CryptonightR_instruction_mov217: - -CryptonightR_instruction_mov218: - -CryptonightR_instruction_mov219: - -CryptonightR_instruction_mov220: - -CryptonightR_instruction_mov221: - mov rcx, rax -CryptonightR_instruction_mov222: - mov rcx, rax -CryptonightR_instruction_mov223: - -CryptonightR_instruction_mov224: - -CryptonightR_instruction_mov225: - -CryptonightR_instruction_mov226: - -CryptonightR_instruction_mov227: - -CryptonightR_instruction_mov228: - -CryptonightR_instruction_mov229: - mov rcx, rdx -CryptonightR_instruction_mov230: - mov rcx, rdx -CryptonightR_instruction_mov231: - -CryptonightR_instruction_mov232: - -CryptonightR_instruction_mov233: - -CryptonightR_instruction_mov234: - -CryptonightR_instruction_mov235: - -CryptonightR_instruction_mov236: - -CryptonightR_instruction_mov237: - mov rcx, rdx -CryptonightR_instruction_mov238: - mov rcx, rdx -CryptonightR_instruction_mov239: - -CryptonightR_instruction_mov240: - -CryptonightR_instruction_mov241: - -CryptonightR_instruction_mov242: - -CryptonightR_instruction_mov243: - -CryptonightR_instruction_mov244: - -CryptonightR_instruction_mov245: - mov rcx, rdx -CryptonightR_instruction_mov246: - mov rcx, rdx -CryptonightR_instruction_mov247: - -CryptonightR_instruction_mov248: - -CryptonightR_instruction_mov249: - -CryptonightR_instruction_mov250: - -CryptonightR_instruction_mov251: - -CryptonightR_instruction_mov252: - -CryptonightR_instruction_mov253: - mov rcx, rdx -CryptonightR_instruction_mov254: - mov rcx, rdx -CryptonightR_instruction_mov255: - -CryptonightR_instruction_mov256: - -_TEXT_CN_TEMPLATE ENDS -END diff --git a/src/crypto/asm/win64/CryptonightR_template_win.inc b/src/crypto/asm/win64/CryptonightR_template_win.inc deleted file mode 100644 index d24eedaa..00000000 --- a/src/crypto/asm/win64/CryptonightR_template_win.inc +++ /dev/null @@ -1,536 +0,0 @@ -PUBLIC CryptonightR_template_part1 -PUBLIC CryptonightR_template_mainloop -PUBLIC CryptonightR_template_part2 -PUBLIC CryptonightR_template_part3 -PUBLIC CryptonightR_template_end -PUBLIC CryptonightR_template_double_part1 -PUBLIC CryptonightR_template_double_mainloop -PUBLIC CryptonightR_template_double_part2 -PUBLIC CryptonightR_template_double_part3 -PUBLIC CryptonightR_template_double_part4 -PUBLIC CryptonightR_template_double_end - -ALIGN(64) -CryptonightR_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movd xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movd xmm0, r12 - movd xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movd xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -CryptonightR_template_mainloop: - movdqa xmm5, XMMWORD PTR [r9+r11] - movd xmm0, r15 - movd xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - - mov r13d, r9d - mov eax, r9d - xor r9d, 48 - xor r13d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r13+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - pxor xmm0, xmm2 - pxor xmm5, xmm1 - pxor xmm5, xmm0 - - movd r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - - paddq xmm3, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r13+r11], xmm3 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - -CryptonightR_template_part2: - lea rcx, [r10+r11] - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor rsp, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov rax, r13 - mul r12 - add r15, rax - add rsp, rdx - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - movaps xmm3, xmm1 - movdqa xmm2, XMMWORD PTR [r9+r11] - movdqa xmm0, XMMWORD PTR [r10+r11] - pxor xmm1, xmm2 - pxor xmm5, xmm0 - pxor xmm5, xmm1 - paddq xmm3, xmm4 - paddq xmm2, xmm6 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqu XMMWORD PTR [r12+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm3 - - movdqa xmm7, xmm6 - mov QWORD PTR [rcx], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [rcx+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz CryptonightR_template_mainloop - -CryptonightR_template_part3: - movd rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -CryptonightR_template_end: - -ALIGN(64) -CryptonightR_template_double_part1: - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movd xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movd xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movd xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movd xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movd xmm0, rcx - mov r11d, 524288 - movd xmm10, rax - punpcklqdq xmm10, xmm0 - - movd xmm14, QWORD PTR [rsp+128] - movd xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -CryptonightR_template_double_mainloop: - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movd xmm0, r12 - mov ecx, ebx - movd xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movd xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm1 - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - movd rdx, xmm6 - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movd xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm1 - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movd rdi, xmm5 - movd rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movd xmm0, rsp - movd xmm1, rsi - movd xmm2, rdi - movd xmm11, rbp - movd xmm12, r15 - movd xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -CryptonightR_template_double_part2: - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r14, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r12, rax - - movd rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movd rsi, xmm1 - movd rdi, xmm2 - movd rbp, xmm11 - movd r15, xmm12 - movd rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movdqu xmm1, XMMWORD PTR [rcx+rsi] - pxor xmm6, xmm1 - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - pxor xmm6, xmm2 - paddq xmm2, xmm3 - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - pxor xmm6, xmm0 - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movd rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movd xmm0, rsp - movd xmm1, rbx - movd xmm2, rsi - movd xmm11, rdi - movd xmm12, rbp - movd xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movd xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -CryptonightR_template_double_part3: - - movd r15, xmm13 - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r13, rax - - movd rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movd rbx, xmm1 - movd rsi, xmm2 - movd rdi, xmm11 - movd rbp, xmm12 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - mov rdi, rcx - mov r8, rax - movdqu xmm1, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm1 - xor ebp, 48 - paddq xmm1, xmm8 - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm2 - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movd rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz CryptonightR_template_double_mainloop - -CryptonightR_template_double_part4: - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -CryptonightR_template_double_end: diff --git a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc deleted file mode 100644 index 1c73f77c..00000000 --- a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc +++ /dev/null @@ -1,268 +0,0 @@ -PUBLIC CryptonightWOW_soft_aes_template_part1 -PUBLIC CryptonightWOW_soft_aes_template_mainloop -PUBLIC CryptonightWOW_soft_aes_template_part2 -PUBLIC CryptonightWOW_soft_aes_template_part3 -PUBLIC CryptonightWOW_soft_aes_template_end - -ALIGN(64) -CryptonightWOW_soft_aes_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+8], rcx - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 232 - - mov eax, [rcx+96] - mov ebx, [rcx+100] - mov esi, [rcx+104] - mov edx, [rcx+108] - mov [rsp+144], eax - mov [rsp+148], ebx - mov [rsp+152], esi - mov [rsp+156], edx - - mov rax, QWORD PTR [rcx+48] - mov r10, rcx - xor rax, QWORD PTR [rcx+16] - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r9, QWORD PTR [rcx+40] - xor r9, QWORD PTR [rcx+8] - movd xmm4, rax - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov r11, QWORD PTR [rcx+224] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r10+72] - mov rax, QWORD PTR [r10+80] - movd xmm0, rdx - xor rax, QWORD PTR [r10+64] - - movaps XMMWORD PTR [rsp+16], xmm6 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+48], xmm8 - movaps XMMWORD PTR [rsp+64], xmm9 - movaps XMMWORD PTR [rsp+80], xmm10 - movaps XMMWORD PTR [rsp+96], xmm11 - movaps XMMWORD PTR [rsp+112], xmm12 - movaps XMMWORD PTR [rsp+128], xmm13 - - movd xmm5, rax - - mov rax, r8 - punpcklqdq xmm4, xmm0 - and eax, 2097136 - movd xmm10, QWORD PTR [r10+96] - movd xmm0, rcx - mov rcx, QWORD PTR [r10+104] - xorps xmm9, xmm9 - mov QWORD PTR [rsp+328], rax - movd xmm12, r11 - mov QWORD PTR [rsp+320], r9 - punpcklqdq xmm5, xmm0 - movd xmm13, rcx - mov r12d, 524288 - - ALIGN(64) -CryptonightWOW_soft_aes_template_mainloop: - movd xmm11, r12d - mov r12, QWORD PTR [r10+272] - lea r13, QWORD PTR [rax+r11] - mov esi, DWORD PTR [r13] - movd xmm0, r9 - mov r10d, DWORD PTR [r13+4] - movd xmm7, r8 - mov ebp, DWORD PTR [r13+12] - mov r14d, DWORD PTR [r13+8] - mov rdx, QWORD PTR [rsp+328] - movzx ecx, sil - shr esi, 8 - punpcklqdq xmm7, xmm0 - mov r15d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - mov edi, DWORD PTR [r12+rcx*4] - movzx ecx, r14b - shr r14d, 8 - mov ebx, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - shr ebp, 8 - mov r9d, DWORD PTR [r12+rcx*4] - movzx ecx, r10b - shr r10d, 8 - xor r15d, DWORD PTR [r12+rcx*4+1024] - movzx ecx, r14b - shr r14d, 8 - mov eax, r14d - shr eax, 8 - xor edi, DWORD PTR [r12+rcx*4+1024] - add eax, 256 - movzx ecx, bpl - shr ebp, 8 - xor ebx, DWORD PTR [r12+rcx*4+1024] - movzx ecx, sil - shr esi, 8 - xor r9d, DWORD PTR [r12+rcx*4+1024] - add r12, 2048 - movzx ecx, r10b - shr r10d, 8 - add r10d, 256 - mov r11d, DWORD PTR [r12+rax*4] - xor r11d, DWORD PTR [r12+rcx*4] - xor r11d, r9d - movzx ecx, sil - mov r10d, DWORD PTR [r12+r10*4] - shr esi, 8 - add esi, 256 - xor r10d, DWORD PTR [r12+rcx*4] - movzx ecx, bpl - xor r10d, ebx - shr ebp, 8 - movd xmm1, r11d - add ebp, 256 - movd r11, xmm12 - mov r9d, DWORD PTR [r12+rcx*4] - xor r9d, DWORD PTR [r12+rsi*4] - mov eax, DWORD PTR [r12+rbp*4] - xor r9d, edi - movzx ecx, r14b - movd xmm0, r10d - movd xmm2, r9d - xor eax, DWORD PTR [r12+rcx*4] - mov rcx, rdx - xor eax, r15d - punpckldq xmm2, xmm1 - xor rcx, 16 - movd xmm6, eax - mov rax, rdx - punpckldq xmm6, xmm0 - xor rax, 32 - punpckldq xmm6, xmm2 - xor rdx, 48 - movdqu xmm2, XMMWORD PTR [rcx+r11] - pxor xmm6, xmm7 - paddq xmm2, xmm4 - movdqu xmm1, XMMWORD PTR [rax+r11] - movdqu xmm0, XMMWORD PTR [rdx+r11] - paddq xmm0, xmm5 - movdqu XMMWORD PTR [rcx+r11], xmm0 - movdqu XMMWORD PTR [rax+r11], xmm2 - movd rcx, xmm13 - paddq xmm1, xmm7 - movdqu XMMWORD PTR [rdx+r11], xmm1 - movd rdi, xmm6 - mov r10, rdi - and r10d, 2097136 - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [r13], xmm0 - - mov ebx, [rsp+144] - mov ebp, [rsp+152] - add ebx, [rsp+148] - add ebp, [rsp+156] - shl rbp, 32 - or rbx, rbp - - xor rbx, QWORD PTR [r10+r11] - lea r14, QWORD PTR [r10+r11] - mov rbp, QWORD PTR [r14+8] - - mov [rsp+160], rbx - mov [rsp+168], rdi - mov [rsp+176], rbp - mov [rsp+184], r10 - mov r10, rsp - - mov ebx, [rsp+144] - mov esi, [rsp+148] - mov edi, [rsp+152] - mov ebp, [rsp+156] - - movd esp, xmm7 - movaps xmm0, xmm7 - psrldq xmm0, 8 - movd r15d, xmm0 - movd eax, xmm4 - movd edx, xmm5 - -CryptonightWOW_soft_aes_template_part2: - mov rsp, r10 - mov [rsp+144], ebx - mov [rsp+148], esi - mov [rsp+152], edi - mov [rsp+156], ebp - - mov rbx, [rsp+160] - mov rdi, [rsp+168] - mov rbp, [rsp+176] - mov r10, [rsp+184] - - mov r9, r10 - xor r9, 16 - mov rcx, r10 - xor rcx, 32 - xor r10, 48 - mov rax, rbx - mul rdi - movdqu xmm2, XMMWORD PTR [r9+r11] - movdqu xmm1, XMMWORD PTR [rcx+r11] - paddq xmm1, xmm7 - movd xmm0, rax - movd xmm3, rdx - xor rax, QWORD PTR [r11+rcx+8] - xor rdx, QWORD PTR [rcx+r11] - punpcklqdq xmm3, xmm0 - add r8, rdx - movdqu xmm0, XMMWORD PTR [r10+r11] - pxor xmm2, xmm3 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqa xmm5, xmm4 - mov r9, QWORD PTR [rsp+320] - movdqa xmm4, xmm6 - add r9, rax - movdqu XMMWORD PTR [rcx+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm1 - mov r10, QWORD PTR [rsp+304] - movd r12d, xmm11 - mov QWORD PTR [r14], r8 - xor r8, rbx - mov rax, r8 - mov QWORD PTR [r14+8], r9 - and eax, 2097136 - xor r9, rbp - mov QWORD PTR [rsp+320], r9 - mov QWORD PTR [rsp+328], rax - sub r12d, 1 - jne CryptonightWOW_soft_aes_template_mainloop - -CryptonightWOW_soft_aes_template_part3: - movaps xmm6, XMMWORD PTR [rsp+16] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+48] - movaps xmm9, XMMWORD PTR [rsp+64] - movaps xmm10, XMMWORD PTR [rsp+80] - movaps xmm11, XMMWORD PTR [rsp+96] - movaps xmm12, XMMWORD PTR [rsp+112] - movaps xmm13, XMMWORD PTR [rsp+128] - - add rsp, 232 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - ret -CryptonightWOW_soft_aes_template_end: diff --git a/src/crypto/asm/win64/CryptonightWOW_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_template_win.inc deleted file mode 100644 index 55c8c8df..00000000 --- a/src/crypto/asm/win64/CryptonightWOW_template_win.inc +++ /dev/null @@ -1,491 +0,0 @@ -PUBLIC CryptonightWOW_template_part1 -PUBLIC CryptonightWOW_template_mainloop -PUBLIC CryptonightWOW_template_part2 -PUBLIC CryptonightWOW_template_part3 -PUBLIC CryptonightWOW_template_end -PUBLIC CryptonightWOW_template_double_part1 -PUBLIC CryptonightWOW_template_double_mainloop -PUBLIC CryptonightWOW_template_double_part2 -PUBLIC CryptonightWOW_template_double_part3 -PUBLIC CryptonightWOW_template_double_part4 -PUBLIC CryptonightWOW_template_double_end - -ALIGN(64) -CryptonightWOW_template_part1: - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movd xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movd xmm0, r12 - movd xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movd xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -CryptonightWOW_template_mainloop: - movdqa xmm5, XMMWORD PTR [r9+r11] - movd xmm0, r15 - movd xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - movd r10d, xmm5 - and r10d, 2097136 - - mov r12d, r9d - mov eax, r9d - xor r9d, 48 - xor r12d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movdqu xmm2, XMMWORD PTR [r12+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - paddq xmm0, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm0 - movd r12, xmm5 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - -CryptonightWOW_template_part2: - mov rax, r13 - mul r12 - movd xmm0, rax - movd xmm3, rdx - punpcklqdq xmm3, xmm0 - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - xor rdx, QWORD PTR [r12+r11] - xor rax, QWORD PTR [r11+r12+8] - movdqa xmm2, XMMWORD PTR [r9+r11] - pxor xmm3, xmm2 - paddq xmm7, XMMWORD PTR [r10+r11] - paddq xmm1, xmm4 - paddq xmm3, xmm6 - movdqu XMMWORD PTR [r9+r11], xmm7 - movdqu XMMWORD PTR [r12+r11], xmm3 - movdqu XMMWORD PTR [r10+r11], xmm1 - - movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz CryptonightWOW_template_mainloop - -CryptonightWOW_template_part3: - movd rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -CryptonightWOW_template_end: - -ALIGN(64) -CryptonightWOW_template_double_part1: - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movd xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movd xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movd xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movd xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movd xmm0, rcx - mov r11d, 524288 - movd xmm10, rax - punpcklqdq xmm10, xmm0 - - movd xmm14, QWORD PTR [rsp+128] - movd xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -CryptonightWOW_template_double_mainloop: - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movd xmm0, r12 - mov ecx, ebx - movd xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movd rdx, xmm6 - movd xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movd xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movd rdi, xmm5 - movd rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movd xmm0, rsp - movd xmm1, rsi - movd xmm2, rdi - movd xmm11, rbp - movd xmm12, r15 - movd xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -CryptonightWOW_template_double_part2: - - movd rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movd rsi, xmm1 - movd rdi, xmm2 - movd rbp, xmm11 - movd r15, xmm12 - movd rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movd xmm1, rdx - movd xmm0, r8 - punpcklqdq xmm1, xmm0 - pxor xmm1, XMMWORD PTR [rcx+rsi] - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - xor rdx, QWORD PTR [rsi+rcx] - paddq xmm2, xmm3 - xor r8, QWORD PTR [rsi+rcx+8] - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movd rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movd xmm0, rsp - movd xmm1, rbx - movd xmm2, rsi - movd xmm11, rdi - movd xmm12, rbp - movd xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movd xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -CryptonightWOW_template_double_part3: - - movd rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movd rbx, xmm1 - movd rsi, xmm2 - movd rdi, xmm11 - movd rbp, xmm12 - movd r15, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - movd xmm1, rdx - movd xmm0, rax - punpcklqdq xmm1, xmm0 - mov rdi, rcx - mov r8, rax - pxor xmm1, XMMWORD PTR [rbp+rcx] - xor ebp, 48 - paddq xmm1, xmm8 - xor r8, QWORD PTR [rbp+rcx+8] - xor rdx, QWORD PTR [rbp+rcx] - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movd rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz CryptonightWOW_template_double_mainloop - -CryptonightWOW_template_double_part4: - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -CryptonightWOW_template_double_end: diff --git a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc b/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc deleted file mode 100644 index 85077a20..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_double_main_loop_sandybridge.inc +++ /dev/null @@ -1,413 +0,0 @@ - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov rax, rsp - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 184 - - stmxcsr DWORD PTR [rsp+272] - mov DWORD PTR [rsp+276], 24448 - ldmxcsr DWORD PTR [rsp+276] - - mov r13, QWORD PTR [rcx+224] - mov r9, rdx - mov r10, QWORD PTR [rcx+32] - mov r8, rcx - xor r10, QWORD PTR [rcx] - mov r14d, 524288 - mov r11, QWORD PTR [rcx+40] - xor r11, QWORD PTR [rcx+8] - mov rsi, QWORD PTR [rdx+224] - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov rdi, QWORD PTR [r9+32] - xor rdi, QWORD PTR [r9] - mov rbp, QWORD PTR [r9+40] - xor rbp, QWORD PTR [r9+8] - movd xmm0, rdx - movaps XMMWORD PTR [rax-88], xmm6 - movaps XMMWORD PTR [rax-104], xmm7 - movaps XMMWORD PTR [rax-120], xmm8 - movaps XMMWORD PTR [rsp+112], xmm9 - movaps XMMWORD PTR [rsp+96], xmm10 - movaps XMMWORD PTR [rsp+80], xmm11 - movaps XMMWORD PTR [rsp+64], xmm12 - movaps XMMWORD PTR [rsp+48], xmm13 - movaps XMMWORD PTR [rsp+32], xmm14 - movaps XMMWORD PTR [rsp+16], xmm15 - mov rdx, r10 - movd xmm4, QWORD PTR [r8+96] - and edx, 2097136 - mov rax, QWORD PTR [rcx+48] - xorps xmm13, xmm13 - xor rax, QWORD PTR [rcx+16] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r8+72] - movd xmm5, QWORD PTR [r8+104] - movd xmm7, rax - - mov eax, 1 - shl rax, 52 - movd xmm14, rax - punpcklqdq xmm14, xmm14 - - mov eax, 1023 - shl rax, 52 - movd xmm12, rax - punpcklqdq xmm12, xmm12 - - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [r9+56] - xor rcx, QWORD PTR [r9+24] - movd xmm3, rax - mov rax, QWORD PTR [r9+48] - xor rax, QWORD PTR [r9+16] - punpcklqdq xmm3, xmm0 - movd xmm0, rcx - mov QWORD PTR [rsp], r13 - mov rcx, QWORD PTR [r9+88] - xor rcx, QWORD PTR [r9+72] - movd xmm6, rax - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - punpcklqdq xmm6, xmm0 - movd xmm0, rcx - mov QWORD PTR [rsp+256], r10 - mov rcx, rdi - mov QWORD PTR [rsp+264], r11 - movd xmm8, rax - and ecx, 2097136 - punpcklqdq xmm8, xmm0 - movd xmm0, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movd xmm0, QWORD PTR [r9+104] - lea r8, QWORD PTR [rcx+rsi] - movdqu xmm11, XMMWORD PTR [r8] - punpcklqdq xmm5, xmm0 - lea r9, QWORD PTR [rdx+r13] - movdqu xmm15, XMMWORD PTR [r9] - - ALIGN(64) -main_loop_double_sandybridge: - movdqu xmm9, xmm15 - mov eax, edx - mov ebx, edx - xor eax, 16 - xor ebx, 32 - xor edx, 48 - - movd xmm0, r11 - movd xmm2, r10 - punpcklqdq xmm2, xmm0 - aesenc xmm9, xmm2 - - movdqu xmm0, XMMWORD PTR [rax+r13] - movdqu xmm1, XMMWORD PTR [rbx+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [rbx+r13], xmm0 - movdqu xmm0, XMMWORD PTR [rdx+r13] - movdqu XMMWORD PTR [rdx+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [rax+r13], xmm0 - - movd r11, xmm9 - mov edx, r11d - and edx, 2097136 - movdqa xmm0, xmm9 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [r9], xmm0 - - lea rbx, QWORD PTR [rdx+r13] - mov r10, QWORD PTR [rdx+r13] - - movdqu xmm10, xmm11 - movd xmm0, rbp - movd xmm11, rdi - punpcklqdq xmm11, xmm0 - aesenc xmm10, xmm11 - - mov eax, ecx - mov r12d, ecx - xor eax, 16 - xor r12d, 32 - xor ecx, 48 - - movdqu xmm0, XMMWORD PTR [rax+rsi] - paddq xmm0, xmm6 - movdqu xmm1, XMMWORD PTR [r12+rsi] - movdqu XMMWORD PTR [r12+rsi], xmm0 - paddq xmm1, xmm11 - movdqu xmm0, XMMWORD PTR [rcx+rsi] - movdqu XMMWORD PTR [rcx+rsi], xmm1 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [rax+rsi], xmm0 - - movd rcx, xmm10 - and ecx, 2097136 - - movdqa xmm0, xmm10 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [r8], xmm0 - mov r12, QWORD PTR [rcx+rsi] - - mov r9, QWORD PTR [rbx+8] - - xor edx, 16 - mov r8d, edx - mov r15d, edx - - movd rdx, xmm5 - shl rdx, 32 - movd rax, xmm4 - xor rdx, rax - xor r10, rdx - mov rax, r10 - mul r11 - mov r11d, r8d - xor r11d, 48 - movd xmm0, rdx - xor rdx, [r11+r13] - movd xmm1, rax - xor rax, [r11+r13+8] - punpcklqdq xmm0, xmm1 - - pxor xmm0, XMMWORD PTR [r8+r13] - xor r8d, 32 - movdqu xmm1, XMMWORD PTR [r11+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [r11+r13], xmm0 - movdqu xmm0, XMMWORD PTR [r8+r13] - movdqu XMMWORD PTR [r8+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [r15+r13], xmm0 - - mov r11, QWORD PTR [rsp+256] - add r11, rdx - mov rdx, QWORD PTR [rsp+264] - add rdx, rax - mov QWORD PTR [rbx], r11 - xor r11, r10 - mov QWORD PTR [rbx+8], rdx - xor rdx, r9 - mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 - mov QWORD PTR [rsp+264], rdx - mov QWORD PTR [rsp+8], r11 - lea r15, QWORD PTR [r11+r13] - movdqu xmm15, XMMWORD PTR [r11+r13] - lea r13, QWORD PTR [rsi+rcx] - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movaps xmm2, xmm13 - movd r10, xmm0 - psllq xmm5, 1 - shl r10, 32 - movdqa xmm0, xmm9 - psrldq xmm0, 8 - movdqa xmm1, xmm10 - movd r11, xmm0 - psrldq xmm1, 8 - movd r8, xmm1 - psrldq xmm4, 8 - movaps xmm0, xmm13 - movd rax, xmm4 - xor r10, rax - movaps xmm1, xmm13 - xor r10, r12 - lea rax, QWORD PTR [r11+1] - shr rax, 1 - movdqa xmm3, xmm9 - punpcklqdq xmm3, xmm10 - paddq xmm5, xmm3 - movd rdx, xmm5 - psrldq xmm5, 8 - cvtsi2sd xmm2, rax - or edx, -2147483647 - lea rax, QWORD PTR [r8+1] - shr rax, 1 - movd r9, xmm5 - cvtsi2sd xmm0, rax - or r9d, -2147483647 - cvtsi2sd xmm1, rdx - unpcklpd xmm2, xmm0 - movaps xmm0, xmm13 - cvtsi2sd xmm0, r9 - unpcklpd xmm1, xmm0 - divpd xmm2, xmm1 - paddq xmm2, xmm14 - cvttsd2si rax, xmm2 - psrldq xmm2, 8 - mov rbx, rax - imul rax, rdx - sub r11, rax - js div_fix_1_sandybridge -div_fix_1_ret_sandybridge: - - cvttsd2si rdx, xmm2 - mov rax, rdx - imul rax, r9 - movd xmm2, r11d - movd xmm4, ebx - sub r8, rax - js div_fix_2_sandybridge -div_fix_2_ret_sandybridge: - - movd xmm1, r8d - movd xmm0, edx - punpckldq xmm2, xmm1 - punpckldq xmm4, xmm0 - punpckldq xmm4, xmm2 - paddq xmm3, xmm4 - movdqa xmm0, xmm3 - psrlq xmm0, 12 - paddq xmm0, xmm12 - sqrtpd xmm1, xmm0 - movd r9, xmm1 - movdqa xmm5, xmm1 - psrlq xmm5, 19 - test r9, 524287 - je sqrt_fix_1_sandybridge -sqrt_fix_1_ret_sandybridge: - - movd r9, xmm10 - psrldq xmm1, 8 - movd r8, xmm1 - test r8, 524287 - je sqrt_fix_2_sandybridge -sqrt_fix_2_ret_sandybridge: - - mov r12d, ecx - mov r8d, ecx - xor r12d, 16 - xor r8d, 32 - xor ecx, 48 - mov rax, r10 - mul r9 - movd xmm0, rax - movd xmm3, rdx - punpcklqdq xmm3, xmm0 - - movdqu xmm0, XMMWORD PTR [r12+rsi] - pxor xmm0, xmm3 - movdqu xmm1, XMMWORD PTR [r8+rsi] - xor rdx, [r8+rsi] - xor rax, [r8+rsi+8] - movdqu xmm3, XMMWORD PTR [rcx+rsi] - paddq xmm0, xmm6 - paddq xmm1, xmm11 - paddq xmm3, xmm8 - movdqu XMMWORD PTR [r8+rsi], xmm0 - movdqu XMMWORD PTR [rcx+rsi], xmm1 - movdqu XMMWORD PTR [r12+rsi], xmm3 - - add rdi, rdx - mov QWORD PTR [r13], rdi - xor rdi, r10 - mov ecx, edi - and ecx, 2097136 - lea r8, QWORD PTR [rcx+rsi] - - mov rdx, QWORD PTR [r13+8] - add rbp, rax - mov QWORD PTR [r13+8], rbp - movdqu xmm11, XMMWORD PTR [rcx+rsi] - xor rbp, rdx - mov r13, QWORD PTR [rsp] - movdqa xmm3, xmm7 - mov rdx, QWORD PTR [rsp+8] - movdqa xmm8, xmm6 - mov r10, QWORD PTR [rsp+256] - movdqa xmm7, xmm9 - mov r11, QWORD PTR [rsp+264] - movdqa xmm6, xmm10 - mov r9, r15 - dec r14d - jne main_loop_double_sandybridge - - ldmxcsr DWORD PTR [rsp+272] - movaps xmm13, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+184] - movaps xmm6, XMMWORD PTR [r11-24] - movaps xmm7, XMMWORD PTR [r11-40] - movaps xmm8, XMMWORD PTR [r11-56] - movaps xmm9, XMMWORD PTR [r11-72] - movaps xmm10, XMMWORD PTR [r11-88] - movaps xmm11, XMMWORD PTR [r11-104] - movaps xmm12, XMMWORD PTR [r11-120] - movaps xmm14, XMMWORD PTR [rsp+32] - movaps xmm15, XMMWORD PTR [rsp+16] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - jmp cnv2_double_mainloop_asm_sandybridge_endp - -div_fix_1_sandybridge: - dec rbx - add r11, rdx - jmp div_fix_1_ret_sandybridge - -div_fix_2_sandybridge: - dec rdx - add r8, r9 - jmp div_fix_2_ret_sandybridge - -sqrt_fix_1_sandybridge: - movd r8, xmm3 - movdqa xmm0, xmm5 - psrldq xmm0, 8 - dec r9 - mov r11d, -1022 - shl r11, 32 - mov rax, r9 - shr r9, 19 - shr rax, 20 - mov rdx, r9 - sub rdx, rax - lea rdx, [rdx+r11+1] - add rax, r11 - imul rdx, rax - sub rdx, r8 - adc r9, 0 - movd xmm5, r9 - punpcklqdq xmm5, xmm0 - jmp sqrt_fix_1_ret_sandybridge - -sqrt_fix_2_sandybridge: - psrldq xmm3, 8 - movd r11, xmm3 - dec r8 - mov ebx, -1022 - shl rbx, 32 - mov rax, r8 - shr r8, 19 - shr rax, 20 - mov rdx, r8 - sub rdx, rax - lea rdx, [rdx+rbx+1] - add rax, rbx - imul rdx, rax - sub rdx, r11 - adc r8, 0 - movd xmm0, r8 - punpcklqdq xmm5, xmm0 - jmp sqrt_fix_2_ret_sandybridge - -cnv2_double_mainloop_asm_sandybridge_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc deleted file mode 100644 index f17017a0..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_bulldozer.inc +++ /dev/null @@ -1,182 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 64 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov ebp, 524288 - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movd xmm3, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movd xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - mov rdi, QWORD PTR [r9+104] - and r10d, 2097136 - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm4, rax - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - xorps xmm8, xmm8 - mov ax, 1023 - shl rax, 52 - movd xmm7, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm3, xmm0 - movd xmm0, rcx - punpcklqdq xmm4, xmm0 - - ALIGN(64) -cnv2_main_loop_bulldozer: - movdqa xmm5, XMMWORD PTR [r10+rbx] - movd xmm6, r8 - pinsrq xmm6, r11, 1 - lea rdx, QWORD PTR [r10+rbx] - lea r9, QWORD PTR [rdi+rdi] - shl rdi, 32 - - mov ecx, r10d - mov eax, r10d - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - aesenc xmm5, xmm6 - movdqa xmm2, XMMWORD PTR [rcx+rbx] - movdqa xmm1, XMMWORD PTR [rax+rbx] - movdqa xmm0, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - paddq xmm0, xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm0 - movdqa XMMWORD PTR [rax+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movaps xmm1, xmm8 - mov rsi, r15 - xor rsi, rdi - - mov edi, 1023 - shl rdi, 52 - - movd r14, xmm5 - pextrq rax, xmm5, 1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm3 - mov r10, r14 - and r10d, 2097136 - movdqa XMMWORD PTR [rdx], xmm0 - xor rsi, QWORD PTR [r10+rbx] - lea r12, QWORD PTR [r10+rbx] - mov r13, QWORD PTR [r10+rbx+8] - - add r9d, r14d - or r9d, -2147483647 - xor edx, edx - div r9 - mov eax, eax - shl rdx, 32 - lea r15, [rax+rdx] - lea rax, [r14+r15] - shr rax, 12 - add rax, rdi - movd xmm0, rax - sqrtsd xmm1, xmm0 - movd rdi, xmm1 - test rdi, 524287 - je sqrt_fixup_bulldozer - shr rdi, 19 - -sqrt_fixup_bulldozer_ret: - mov rax, rsi - mul r14 - movd xmm1, rax - movd xmm0, rdx - punpcklqdq xmm0, xmm1 - - mov r9d, r10d - mov ecx, r10d - xor r9d, 16 - xor ecx, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [rcx+rbx] - xor rdx, [rcx+rbx] - xor rax, [rcx+rbx+8] - movdqa xmm2, XMMWORD PTR [r9+rbx] - pxor xmm2, xmm0 - paddq xmm4, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - movdqa XMMWORD PTR [r9+rbx], xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movdqa xmm4, xmm3 - add r8, rdx - add r11, rax - mov QWORD PTR [r12], r8 - xor r8, rsi - mov QWORD PTR [r12+8], r11 - mov r10, r8 - xor r11, r13 - and r10d, 2097136 - movdqa xmm3, xmm5 - dec ebp - jne cnv2_main_loop_bulldozer - - ldmxcsr DWORD PTR [rsp] - movaps xmm6, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+64] - mov rbx, QWORD PTR [r11+56] - mov rbp, QWORD PTR [r11+64] - mov rsi, QWORD PTR [r11+72] - movaps xmm8, XMMWORD PTR [r11-48] - movaps xmm7, XMMWORD PTR [rsp+32] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - jmp cnv2_main_loop_bulldozer_endp - -sqrt_fixup_bulldozer: - movd r9, xmm5 - add r9, r15 - dec rdi - mov edx, -1022 - shl rdx, 32 - mov rax, rdi - shr rdi, 19 - shr rax, 20 - mov rcx, rdi - sub rcx, rax - lea rcx, [rcx+rdx+1] - add rax, rdx - imul rcx, rax - sub rcx, r9 - adc rdi, 0 - jmp sqrt_fixup_bulldozer_ret - -cnv2_main_loop_bulldozer_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc deleted file mode 100644 index a12ac35c..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ivybridge.inc +++ /dev/null @@ -1,188 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 80 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov esi, 524288 - mov r8, QWORD PTR [rcx+32] - mov r13d, -2147483647 - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movd xmm4, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movd xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - movd xmm3, QWORD PTR [r9+104] - movaps XMMWORD PTR [rsp+64], xmm6 - movaps XMMWORD PTR [rsp+48], xmm7 - movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 - movd xmm5, rax - - xor eax, eax - mov QWORD PTR [rsp+16], rax - - mov ax, 1023 - shl rax, 52 - movd xmm8, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movd xmm0, rcx - punpcklqdq xmm5, xmm0 - movdqu xmm6, XMMWORD PTR [r10+rbx] - - ALIGN(64) -main_loop_ivybridge: - lea rdx, QWORD PTR [r10+rbx] - mov ecx, r10d - mov eax, r10d - mov rdi, r15 - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - movd xmm0, r11 - movd xmm7, r8 - punpcklqdq xmm7, xmm0 - aesenc xmm6, xmm7 - movd rbp, xmm6 - mov r9, rbp - and r9d, 2097136 - movdqu xmm2, XMMWORD PTR [rcx+rbx] - movdqu xmm1, XMMWORD PTR [rax+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm1, xmm7 - paddq xmm0, xmm5 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [rax+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - mov r10, r9 - xor r10d, 32 - movd rcx, xmm3 - mov rax, rcx - shl rax, 32 - xor rdi, rax - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r9+rbx] - lea r14, QWORD PTR [r9+rbx] - mov r12, QWORD PTR [r14+8] - xor edx, edx - lea r9d, DWORD PTR [ecx+ecx] - add r9d, ebp - movdqa xmm0, xmm6 - psrldq xmm0, 8 - or r9d, r13d - movd rax, xmm0 - div r9 - xorps xmm3, xmm3 - mov eax, eax - shl rdx, 32 - add rdx, rax - lea r9, QWORD PTR [rdx+rbp] - mov r15, rdx - mov rax, r9 - shr rax, 12 - movd xmm0, rax - paddq xmm0, xmm8 - sqrtsd xmm3, xmm0 - psubq xmm3, XMMWORD PTR [rsp+16] - movd rdx, xmm3 - test edx, 524287 - je sqrt_fixup_ivybridge - psrlq xmm3, 19 -sqrt_fixup_ivybridge_ret: - - mov ecx, r10d - mov rax, rdi - mul rbp - movd xmm2, rdx - xor rdx, [rcx+rbx] - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov edi, r8d - and edi, 2097136 - movd xmm0, rax - xor rax, [rcx+rbx+8] - add r11, rax - mov QWORD PTR [r14+8], r11 - punpcklqdq xmm2, xmm0 - - mov r9d, r10d - xor r9d, 48 - xor r10d, 16 - pxor xmm2, XMMWORD PTR [r9+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm5 - movdqu xmm1, XMMWORD PTR [rcx+rbx] - paddq xmm2, xmm4 - paddq xmm1, xmm7 - movdqa xmm5, xmm4 - movdqu XMMWORD PTR [r9+rbx], xmm0 - movdqa xmm4, xmm6 - movdqu XMMWORD PTR [rcx+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - movdqu xmm6, [rdi+rbx] - mov r10d, edi - xor r11, r12 - dec rsi - jne main_loop_ivybridge - - ldmxcsr DWORD PTR [rsp] - mov rbx, QWORD PTR [rsp+160] - movaps xmm6, XMMWORD PTR [rsp+64] - movaps xmm7, XMMWORD PTR [rsp+48] - movaps xmm8, XMMWORD PTR [rsp+32] - add rsp, 80 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - jmp cnv2_main_loop_ivybridge_endp - -sqrt_fixup_ivybridge: - dec rdx - mov r13d, -1022 - shl r13, 32 - mov rax, rdx - shr rdx, 19 - shr rax, 20 - mov rcx, rdx - sub rcx, rax - add rax, r13 - not r13 - sub rcx, r13 - mov r13d, -2147483647 - imul rcx, rax - sub rcx, r9 - adc rdx, 0 - movd xmm3, rdx - jmp sqrt_fixup_ivybridge_ret - -cnv2_main_loop_ivybridge_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc b/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc deleted file mode 100644 index 044235d8..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_main_loop_ryzen.inc +++ /dev/null @@ -1,181 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 64 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov ebp, 524288 - mov r8, QWORD PTR [rcx+32] - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movd xmm3, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movd xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - mov rdi, QWORD PTR [r9+104] - and r10d, 2097136 - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm4, rax - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - xorps xmm8, xmm8 - mov ax, 1023 - shl rax, 52 - movd xmm7, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm3, xmm0 - movd xmm0, rcx - punpcklqdq xmm4, xmm0 - - ALIGN(64) -main_loop_ryzen: - movdqa xmm5, XMMWORD PTR [r10+rbx] - movd xmm0, r11 - movd xmm6, r8 - punpcklqdq xmm6, xmm0 - lea rdx, QWORD PTR [r10+rbx] - lea r9, QWORD PTR [rdi+rdi] - shl rdi, 32 - - mov ecx, r10d - mov eax, r10d - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - aesenc xmm5, xmm6 - movdqa xmm2, XMMWORD PTR [rcx+rbx] - movdqa xmm1, XMMWORD PTR [rax+rbx] - movdqa xmm0, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - paddq xmm0, xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm0 - movdqa XMMWORD PTR [rax+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movaps xmm1, xmm8 - mov rsi, r15 - xor rsi, rdi - movd r14, xmm5 - movdqa xmm0, xmm5 - pxor xmm0, xmm3 - mov r10, r14 - and r10d, 2097136 - movdqa XMMWORD PTR [rdx], xmm0 - xor rsi, QWORD PTR [r10+rbx] - lea r12, QWORD PTR [r10+rbx] - mov r13, QWORD PTR [r10+rbx+8] - - add r9d, r14d - or r9d, -2147483647 - xor edx, edx - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movd rax, xmm0 - - div r9 - movd xmm0, rax - movd xmm1, rdx - punpckldq xmm0, xmm1 - movd r15, xmm0 - paddq xmm0, xmm5 - movdqa xmm2, xmm0 - psrlq xmm0, 12 - paddq xmm0, xmm7 - sqrtsd xmm1, xmm0 - movd rdi, xmm1 - test rdi, 524287 - je sqrt_fixup_ryzen - shr rdi, 19 - -sqrt_fixup_ryzen_ret: - mov rax, rsi - mul r14 - movd xmm1, rax - movd xmm0, rdx - punpcklqdq xmm0, xmm1 - - mov r9d, r10d - mov ecx, r10d - xor r9d, 16 - xor ecx, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [rcx+rbx] - xor rdx, [rcx+rbx] - xor rax, [rcx+rbx+8] - movdqa xmm2, XMMWORD PTR [r9+rbx] - pxor xmm2, xmm0 - paddq xmm4, XMMWORD PTR [r10+rbx] - paddq xmm2, xmm3 - paddq xmm1, xmm6 - movdqa XMMWORD PTR [r9+rbx], xmm4 - movdqa XMMWORD PTR [rcx+rbx], xmm2 - movdqa XMMWORD PTR [r10+rbx], xmm1 - - movdqa xmm4, xmm3 - add r8, rdx - add r11, rax - mov QWORD PTR [r12], r8 - xor r8, rsi - mov QWORD PTR [r12+8], r11 - mov r10, r8 - xor r11, r13 - and r10d, 2097136 - movdqa xmm3, xmm5 - dec ebp - jne main_loop_ryzen - - ldmxcsr DWORD PTR [rsp] - movaps xmm6, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+64] - mov rbx, QWORD PTR [r11+56] - mov rbp, QWORD PTR [r11+64] - mov rsi, QWORD PTR [r11+72] - movaps xmm8, XMMWORD PTR [r11-48] - movaps xmm7, XMMWORD PTR [rsp+32] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - jmp cnv2_main_loop_ryzen_endp - -sqrt_fixup_ryzen: - movd r9, xmm2 - dec rdi - mov edx, -1022 - shl rdx, 32 - mov rax, rdi - shr rdi, 19 - shr rax, 20 - mov rcx, rdi - sub rcx, rax - lea rcx, [rcx+rdx+1] - add rax, rdx - imul rcx, rax - sub rcx, r9 - adc rdi, 0 - jmp sqrt_fixup_ryzen_ret - -cnv2_main_loop_ryzen_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc deleted file mode 100644 index 97fb691b..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc +++ /dev/null @@ -1,413 +0,0 @@ - mov rdx, [rcx+8] - mov rcx, [rcx] - - mov rax, rsp - push rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 184 - - stmxcsr DWORD PTR [rsp+272] - mov DWORD PTR [rsp+276], 24448 - ldmxcsr DWORD PTR [rsp+276] - - mov r13, QWORD PTR [rcx+224] - mov r9, rdx - mov r10, QWORD PTR [rcx+32] - mov r8, rcx - xor r10, QWORD PTR [rcx] - mov r14d, 393216 - mov r11, QWORD PTR [rcx+40] - xor r11, QWORD PTR [rcx+8] - mov rsi, QWORD PTR [rdx+224] - mov rdx, QWORD PTR [rcx+56] - xor rdx, QWORD PTR [rcx+24] - mov rdi, QWORD PTR [r9+32] - xor rdi, QWORD PTR [r9] - mov rbp, QWORD PTR [r9+40] - xor rbp, QWORD PTR [r9+8] - movd xmm0, rdx - movaps XMMWORD PTR [rax-88], xmm6 - movaps XMMWORD PTR [rax-104], xmm7 - movaps XMMWORD PTR [rax-120], xmm8 - movaps XMMWORD PTR [rsp+112], xmm9 - movaps XMMWORD PTR [rsp+96], xmm10 - movaps XMMWORD PTR [rsp+80], xmm11 - movaps XMMWORD PTR [rsp+64], xmm12 - movaps XMMWORD PTR [rsp+48], xmm13 - movaps XMMWORD PTR [rsp+32], xmm14 - movaps XMMWORD PTR [rsp+16], xmm15 - mov rdx, r10 - movd xmm4, QWORD PTR [r8+96] - and edx, 2097136 - mov rax, QWORD PTR [rcx+48] - xorps xmm13, xmm13 - xor rax, QWORD PTR [rcx+16] - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r8+72] - movd xmm5, QWORD PTR [r8+104] - movd xmm7, rax - - mov eax, 1 - shl rax, 52 - movd xmm14, rax - punpcklqdq xmm14, xmm14 - - mov eax, 1023 - shl rax, 52 - movd xmm12, rax - punpcklqdq xmm12, xmm12 - - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [r9+56] - xor rcx, QWORD PTR [r9+24] - movd xmm3, rax - mov rax, QWORD PTR [r9+48] - xor rax, QWORD PTR [r9+16] - punpcklqdq xmm3, xmm0 - movd xmm0, rcx - mov QWORD PTR [rsp], r13 - mov rcx, QWORD PTR [r9+88] - xor rcx, QWORD PTR [r9+72] - movd xmm6, rax - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - punpcklqdq xmm6, xmm0 - movd xmm0, rcx - mov QWORD PTR [rsp+256], r10 - mov rcx, rdi - mov QWORD PTR [rsp+264], r11 - movd xmm8, rax - and ecx, 2097136 - punpcklqdq xmm8, xmm0 - movd xmm0, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movd xmm0, QWORD PTR [r9+104] - lea r8, QWORD PTR [rcx+rsi] - movdqu xmm11, XMMWORD PTR [r8] - punpcklqdq xmm5, xmm0 - lea r9, QWORD PTR [rdx+r13] - movdqu xmm15, XMMWORD PTR [r9] - - ALIGN(64) -rwz_main_loop_double: - movdqu xmm9, xmm15 - mov eax, edx - mov ebx, edx - xor eax, 16 - xor ebx, 32 - xor edx, 48 - - movd xmm0, r11 - movd xmm2, r10 - punpcklqdq xmm2, xmm0 - aesenc xmm9, xmm2 - - movdqu xmm0, XMMWORD PTR [rdx+r13] - movdqu xmm1, XMMWORD PTR [rbx+r13] - paddq xmm0, xmm7 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [rbx+r13], xmm0 - movdqu xmm0, XMMWORD PTR [rax+r13] - movdqu XMMWORD PTR [rdx+r13], xmm1 - paddq xmm0, xmm3 - movdqu XMMWORD PTR [rax+r13], xmm0 - - movd r11, xmm9 - mov edx, r11d - and edx, 2097136 - movdqa xmm0, xmm9 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [r9], xmm0 - - lea rbx, QWORD PTR [rdx+r13] - mov r10, QWORD PTR [rdx+r13] - - movdqu xmm10, xmm11 - movd xmm0, rbp - movd xmm11, rdi - punpcklqdq xmm11, xmm0 - aesenc xmm10, xmm11 - - mov eax, ecx - mov r12d, ecx - xor eax, 16 - xor r12d, 32 - xor ecx, 48 - - movdqu xmm0, XMMWORD PTR [rcx+rsi] - paddq xmm0, xmm6 - movdqu xmm1, XMMWORD PTR [r12+rsi] - movdqu XMMWORD PTR [r12+rsi], xmm0 - paddq xmm1, xmm11 - movdqu xmm0, XMMWORD PTR [rax+rsi] - movdqu XMMWORD PTR [rcx+rsi], xmm1 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [rax+rsi], xmm0 - - movd rcx, xmm10 - and ecx, 2097136 - - movdqa xmm0, xmm10 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [r8], xmm0 - mov r12, QWORD PTR [rcx+rsi] - - mov r9, QWORD PTR [rbx+8] - - xor edx, 16 - mov r8d, edx - mov r15d, edx - - movd rdx, xmm5 - shl rdx, 32 - movd rax, xmm4 - xor rdx, rax - xor r10, rdx - mov rax, r10 - mul r11 - mov r11d, r8d - xor r11d, 48 - movd xmm0, rdx - xor rdx, [r11+r13] - movd xmm1, rax - xor rax, [r11+r13+8] - punpcklqdq xmm0, xmm1 - - pxor xmm0, XMMWORD PTR [r8+r13] - movdqu xmm1, XMMWORD PTR [r11+r13] - paddq xmm0, xmm3 - paddq xmm1, xmm2 - movdqu XMMWORD PTR [r8+r13], xmm0 - xor r8d, 32 - movdqu xmm0, XMMWORD PTR [r8+r13] - movdqu XMMWORD PTR [r8+r13], xmm1 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r11+r13], xmm0 - - mov r11, QWORD PTR [rsp+256] - add r11, rdx - mov rdx, QWORD PTR [rsp+264] - add rdx, rax - mov QWORD PTR [rbx], r11 - xor r11, r10 - mov QWORD PTR [rbx+8], rdx - xor rdx, r9 - mov QWORD PTR [rsp+256], r11 - and r11d, 2097136 - mov QWORD PTR [rsp+264], rdx - mov QWORD PTR [rsp+8], r11 - lea r15, QWORD PTR [r11+r13] - movdqu xmm15, XMMWORD PTR [r11+r13] - lea r13, QWORD PTR [rsi+rcx] - movdqa xmm0, xmm5 - psrldq xmm0, 8 - movaps xmm2, xmm13 - movd r10, xmm0 - psllq xmm5, 1 - shl r10, 32 - movdqa xmm0, xmm9 - psrldq xmm0, 8 - movdqa xmm1, xmm10 - movd r11, xmm0 - psrldq xmm1, 8 - movd r8, xmm1 - psrldq xmm4, 8 - movaps xmm0, xmm13 - movd rax, xmm4 - xor r10, rax - movaps xmm1, xmm13 - xor r10, r12 - lea rax, QWORD PTR [r11+1] - shr rax, 1 - movdqa xmm3, xmm9 - punpcklqdq xmm3, xmm10 - paddq xmm5, xmm3 - movd rdx, xmm5 - psrldq xmm5, 8 - cvtsi2sd xmm2, rax - or edx, -2147483647 - lea rax, QWORD PTR [r8+1] - shr rax, 1 - movd r9, xmm5 - cvtsi2sd xmm0, rax - or r9d, -2147483647 - cvtsi2sd xmm1, rdx - unpcklpd xmm2, xmm0 - movaps xmm0, xmm13 - cvtsi2sd xmm0, r9 - unpcklpd xmm1, xmm0 - divpd xmm2, xmm1 - paddq xmm2, xmm14 - cvttsd2si rax, xmm2 - psrldq xmm2, 8 - mov rbx, rax - imul rax, rdx - sub r11, rax - js rwz_div_fix_1 -rwz_div_fix_1_ret: - - cvttsd2si rdx, xmm2 - mov rax, rdx - imul rax, r9 - movd xmm2, r11d - movd xmm4, ebx - sub r8, rax - js rwz_div_fix_2 -rwz_div_fix_2_ret: - - movd xmm1, r8d - movd xmm0, edx - punpckldq xmm2, xmm1 - punpckldq xmm4, xmm0 - punpckldq xmm4, xmm2 - paddq xmm3, xmm4 - movdqa xmm0, xmm3 - psrlq xmm0, 12 - paddq xmm0, xmm12 - sqrtpd xmm1, xmm0 - movd r9, xmm1 - movdqa xmm5, xmm1 - psrlq xmm5, 19 - test r9, 524287 - je rwz_sqrt_fix_1 -rwz_sqrt_fix_1_ret: - - movd r9, xmm10 - psrldq xmm1, 8 - movd r8, xmm1 - test r8, 524287 - je rwz_sqrt_fix_2 -rwz_sqrt_fix_2_ret: - - mov r12d, ecx - mov r8d, ecx - xor r12d, 16 - xor r8d, 32 - xor ecx, 48 - mov rax, r10 - mul r9 - movd xmm0, rax - movd xmm3, rdx - punpcklqdq xmm3, xmm0 - - movdqu xmm0, XMMWORD PTR [r12+rsi] - pxor xmm0, xmm3 - movdqu xmm1, XMMWORD PTR [r8+rsi] - xor rdx, [r8+rsi] - xor rax, [r8+rsi+8] - movdqu xmm3, XMMWORD PTR [rcx+rsi] - paddq xmm3, xmm6 - paddq xmm1, xmm11 - paddq xmm0, xmm8 - movdqu XMMWORD PTR [r8+rsi], xmm3 - movdqu XMMWORD PTR [rcx+rsi], xmm1 - movdqu XMMWORD PTR [r12+rsi], xmm0 - - add rdi, rdx - mov QWORD PTR [r13], rdi - xor rdi, r10 - mov ecx, edi - and ecx, 2097136 - lea r8, QWORD PTR [rcx+rsi] - - mov rdx, QWORD PTR [r13+8] - add rbp, rax - mov QWORD PTR [r13+8], rbp - movdqu xmm11, XMMWORD PTR [rcx+rsi] - xor rbp, rdx - mov r13, QWORD PTR [rsp] - movdqa xmm3, xmm7 - mov rdx, QWORD PTR [rsp+8] - movdqa xmm8, xmm6 - mov r10, QWORD PTR [rsp+256] - movdqa xmm7, xmm9 - mov r11, QWORD PTR [rsp+264] - movdqa xmm6, xmm10 - mov r9, r15 - dec r14d - jne rwz_main_loop_double - - ldmxcsr DWORD PTR [rsp+272] - movaps xmm13, XMMWORD PTR [rsp+48] - lea r11, QWORD PTR [rsp+184] - movaps xmm6, XMMWORD PTR [r11-24] - movaps xmm7, XMMWORD PTR [r11-40] - movaps xmm8, XMMWORD PTR [r11-56] - movaps xmm9, XMMWORD PTR [r11-72] - movaps xmm10, XMMWORD PTR [r11-88] - movaps xmm11, XMMWORD PTR [r11-104] - movaps xmm12, XMMWORD PTR [r11-120] - movaps xmm14, XMMWORD PTR [rsp+32] - movaps xmm15, XMMWORD PTR [rsp+16] - mov rsp, r11 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - pop rbx - jmp rwz_cnv2_double_mainloop_asm_endp - -rwz_div_fix_1: - dec rbx - add r11, rdx - jmp rwz_div_fix_1_ret - -rwz_div_fix_2: - dec rdx - add r8, r9 - jmp rwz_div_fix_2_ret - -rwz_sqrt_fix_1: - movd r8, xmm3 - movdqa xmm0, xmm5 - psrldq xmm0, 8 - dec r9 - mov r11d, -1022 - shl r11, 32 - mov rax, r9 - shr r9, 19 - shr rax, 20 - mov rdx, r9 - sub rdx, rax - lea rdx, [rdx+r11+1] - add rax, r11 - imul rdx, rax - sub rdx, r8 - adc r9, 0 - movd xmm5, r9 - punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_1_ret - -rwz_sqrt_fix_2: - psrldq xmm3, 8 - movd r11, xmm3 - dec r8 - mov ebx, -1022 - shl rbx, 32 - mov rax, r8 - shr r8, 19 - shr rax, 20 - mov rdx, r8 - sub rdx, rax - lea rdx, [rdx+rbx+1] - add rax, rbx - imul rdx, rax - sub rdx, r11 - adc r8, 0 - movd xmm0, r8 - punpcklqdq xmm5, xmm0 - jmp rwz_sqrt_fix_2_ret - -rwz_cnv2_double_mainloop_asm_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc deleted file mode 100644 index e2b7a5fc..00000000 --- a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc +++ /dev/null @@ -1,188 +0,0 @@ - mov rcx, [rcx] - - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 80 - - stmxcsr DWORD PTR [rsp] - mov DWORD PTR [rsp+4], 24448 - ldmxcsr DWORD PTR [rsp+4] - - mov rax, QWORD PTR [rcx+48] - mov r9, rcx - xor rax, QWORD PTR [rcx+16] - mov esi, 393216 - mov r8, QWORD PTR [rcx+32] - mov r13d, -2147483647 - xor r8, QWORD PTR [rcx] - mov r11, QWORD PTR [rcx+40] - mov r10, r8 - mov rdx, QWORD PTR [rcx+56] - movd xmm4, rax - xor rdx, QWORD PTR [rcx+24] - xor r11, QWORD PTR [rcx+8] - mov rbx, QWORD PTR [rcx+224] - mov rax, QWORD PTR [r9+80] - xor rax, QWORD PTR [r9+64] - movd xmm0, rdx - mov rcx, QWORD PTR [rcx+88] - xor rcx, QWORD PTR [r9+72] - movd xmm3, QWORD PTR [r9+104] - movaps XMMWORD PTR [rsp+64], xmm6 - movaps XMMWORD PTR [rsp+48], xmm7 - movaps XMMWORD PTR [rsp+32], xmm8 - and r10d, 2097136 - movd xmm5, rax - - xor eax, eax - mov QWORD PTR [rsp+16], rax - - mov ax, 1023 - shl rax, 52 - movd xmm8, rax - mov r15, QWORD PTR [r9+96] - punpcklqdq xmm4, xmm0 - movd xmm0, rcx - punpcklqdq xmm5, xmm0 - movdqu xmm6, XMMWORD PTR [r10+rbx] - - ALIGN(64) -rwz_main_loop: - lea rdx, QWORD PTR [r10+rbx] - mov ecx, r10d - mov eax, r10d - mov rdi, r15 - xor ecx, 16 - xor eax, 32 - xor r10d, 48 - movd xmm0, r11 - movd xmm7, r8 - punpcklqdq xmm7, xmm0 - aesenc xmm6, xmm7 - movd rbp, xmm6 - mov r9, rbp - and r9d, 2097136 - movdqu xmm0, XMMWORD PTR [rcx+rbx] - movdqu xmm1, XMMWORD PTR [rax+rbx] - movdqu xmm2, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm5 - paddq xmm1, xmm7 - paddq xmm2, xmm4 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [rax+rbx], xmm2 - movdqu XMMWORD PTR [r10+rbx], xmm1 - mov r10, r9 - xor r10d, 32 - movd rcx, xmm3 - mov rax, rcx - shl rax, 32 - xor rdi, rax - movdqa xmm0, xmm6 - pxor xmm0, xmm4 - movdqu XMMWORD PTR [rdx], xmm0 - xor rdi, QWORD PTR [r9+rbx] - lea r14, QWORD PTR [r9+rbx] - mov r12, QWORD PTR [r14+8] - xor edx, edx - lea r9d, DWORD PTR [ecx+ecx] - add r9d, ebp - movdqa xmm0, xmm6 - psrldq xmm0, 8 - or r9d, r13d - movd rax, xmm0 - div r9 - xorps xmm3, xmm3 - mov eax, eax - shl rdx, 32 - add rdx, rax - lea r9, QWORD PTR [rdx+rbp] - mov r15, rdx - mov rax, r9 - shr rax, 12 - movd xmm0, rax - paddq xmm0, xmm8 - sqrtsd xmm3, xmm0 - psubq xmm3, XMMWORD PTR [rsp+16] - movd rdx, xmm3 - test edx, 524287 - je rwz_sqrt_fixup - psrlq xmm3, 19 -rwz_sqrt_fixup_ret: - - mov ecx, r10d - mov rax, rdi - mul rbp - movd xmm2, rdx - xor rdx, [rcx+rbx] - add r8, rdx - mov QWORD PTR [r14], r8 - xor r8, rdi - mov edi, r8d - and edi, 2097136 - movd xmm0, rax - xor rax, [rcx+rbx+8] - add r11, rax - mov QWORD PTR [r14+8], r11 - punpcklqdq xmm2, xmm0 - - mov r9d, r10d - xor r9d, 48 - xor r10d, 16 - pxor xmm2, XMMWORD PTR [r9+rbx] - movdqu xmm0, XMMWORD PTR [r10+rbx] - paddq xmm0, xmm4 - movdqu xmm1, XMMWORD PTR [rcx+rbx] - paddq xmm2, xmm5 - paddq xmm1, xmm7 - movdqa xmm5, xmm4 - movdqu XMMWORD PTR [r9+rbx], xmm2 - movdqa xmm4, xmm6 - movdqu XMMWORD PTR [rcx+rbx], xmm0 - movdqu XMMWORD PTR [r10+rbx], xmm1 - movdqu xmm6, [rdi+rbx] - mov r10d, edi - xor r11, r12 - dec rsi - jne rwz_main_loop - - ldmxcsr DWORD PTR [rsp] - mov rbx, QWORD PTR [rsp+160] - movaps xmm6, XMMWORD PTR [rsp+64] - movaps xmm7, XMMWORD PTR [rsp+48] - movaps xmm8, XMMWORD PTR [rsp+32] - add rsp, 80 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - jmp cnv2_rwz_main_loop_endp - -rwz_sqrt_fixup: - dec rdx - mov r13d, -1022 - shl r13, 32 - mov rax, rdx - shr rdx, 19 - shr rax, 20 - mov rcx, rdx - sub rcx, rax - add rax, r13 - not r13 - sub rcx, r13 - mov r13d, -2147483647 - imul rcx, rax - sub rcx, r9 - adc rdx, 0 - movd xmm3, rdx - jmp rwz_sqrt_fixup_ret - -cnv2_rwz_main_loop_endp: diff --git a/src/crypto/asm/win64/cn_main_loop.S b/src/crypto/asm/win64/cn_main_loop.S deleted file mode 100644 index 63c3a8ba..00000000 --- a/src/crypto/asm/win64/cn_main_loop.S +++ /dev/null @@ -1,45 +0,0 @@ -#define ALIGN(x) .align 64 -.intel_syntax noprefix -.section .text -.global cnv2_mainloop_ivybridge_asm -.global cnv2_mainloop_ryzen_asm -.global cnv2_mainloop_bulldozer_asm -.global cnv2_double_mainloop_sandybridge_asm -.global cnv2_rwz_mainloop_asm -.global cnv2_rwz_double_mainloop_asm - -ALIGN(64) -cnv2_mainloop_ivybridge_asm: - #include "../cn2/cnv2_main_loop_ivybridge.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_mainloop_ryzen_asm: - #include "../cn2/cnv2_main_loop_ryzen.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_mainloop_bulldozer_asm: - #include "../cn2/cnv2_main_loop_bulldozer.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_double_mainloop_sandybridge_asm: - #include "../cn2/cnv2_double_main_loop_sandybridge.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_rwz_mainloop_asm: - #include "cn2/cnv2_rwz_main_loop.inc" - ret 0 - mov eax, 3735929054 - -ALIGN(64) -cnv2_rwz_double_mainloop_asm: - #include "cn2/cnv2_rwz_double_main_loop.inc" - ret 0 - mov eax, 3735929054 diff --git a/src/crypto/asm/win64/cn_main_loop.asm b/src/crypto/asm/win64/cn_main_loop.asm deleted file mode 100644 index 57246cf5..00000000 --- a/src/crypto/asm/win64/cn_main_loop.asm +++ /dev/null @@ -1,52 +0,0 @@ -_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE -PUBLIC cnv2_mainloop_ivybridge_asm -PUBLIC cnv2_mainloop_ryzen_asm -PUBLIC cnv2_mainloop_bulldozer_asm -PUBLIC cnv2_double_mainloop_sandybridge_asm -PUBLIC cnv2_rwz_mainloop_asm -PUBLIC cnv2_rwz_double_mainloop_asm - -ALIGN 64 -cnv2_mainloop_ivybridge_asm PROC - INCLUDE cn2/cnv2_main_loop_ivybridge.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_ivybridge_asm ENDP - -ALIGN 64 -cnv2_mainloop_ryzen_asm PROC - INCLUDE cn2/cnv2_main_loop_ryzen.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_ryzen_asm ENDP - -ALIGN 64 -cnv2_mainloop_bulldozer_asm PROC - INCLUDE cn2/cnv2_main_loop_bulldozer.inc - ret 0 - mov eax, 3735929054 -cnv2_mainloop_bulldozer_asm ENDP - -ALIGN 64 -cnv2_double_mainloop_sandybridge_asm PROC - INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc - ret 0 - mov eax, 3735929054 -cnv2_double_mainloop_sandybridge_asm ENDP - -ALIGN(64) -cnv2_rwz_mainloop_asm PROC - INCLUDE cn2/cnv2_rwz_main_loop.inc - ret 0 - mov eax, 3735929054 -cnv2_rwz_mainloop_asm ENDP - -ALIGN(64) -cnv2_rwz_double_mainloop_asm PROC - INCLUDE cn2/cnv2_rwz_double_main_loop.inc - ret 0 - mov eax, 3735929054 -cnv2_rwz_double_mainloop_asm ENDP - -_TEXT_CNV2_MAINLOOP ENDS -END diff --git a/src/crypto/c_blake256.c b/src/crypto/c_blake256.c deleted file mode 100644 index 00a84c22..00000000 --- a/src/crypto/c_blake256.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * The blake256_* and blake224_* functions are largely copied from - * blake256_light.c and blake224_light.c from the BLAKE website: - * - * http://131002.net/blake/ - * - * The hmac_* functions implement HMAC-BLAKE-256 and HMAC-BLAKE-224. - * HMAC is specified by RFC 2104. - */ - -#include -#include -#include -#include "c_blake256.h" - -#define U8TO32(p) \ - (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \ - ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) )) -#define U32TO8(p, v) \ - (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \ - (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) ); - -const uint8_t sigma[][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, - {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3}, - {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4}, - { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}, - { 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13}, - { 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9}, - {12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11}, - {13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10}, - { 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0}, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, - {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3}, - {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4}, - { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8} -}; - -const uint32_t cst[16] = { - 0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344, - 0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89, - 0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C, - 0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917 -}; - -static const uint8_t padding[] = { - 0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - - -void blake256_compress(state *S, const uint8_t *block) { - uint32_t v[16], m[16], i; - -#define ROT(x,n) (((x)<<(32-n))|((x)>>(n))) -#define G(a,b,c,d,e) \ - v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \ - v[d] = ROT(v[d] ^ v[a],16); \ - v[c] += v[d]; \ - v[b] = ROT(v[b] ^ v[c],12); \ - v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b]; \ - v[d] = ROT(v[d] ^ v[a], 8); \ - v[c] += v[d]; \ - v[b] = ROT(v[b] ^ v[c], 7); - - for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4); - for (i = 0; i < 8; ++i) v[i] = S->h[i]; - v[ 8] = S->s[0] ^ 0x243F6A88; - v[ 9] = S->s[1] ^ 0x85A308D3; - v[10] = S->s[2] ^ 0x13198A2E; - v[11] = S->s[3] ^ 0x03707344; - v[12] = 0xA4093822; - v[13] = 0x299F31D0; - v[14] = 0x082EFA98; - v[15] = 0xEC4E6C89; - - if (S->nullt == 0) { - v[12] ^= S->t[0]; - v[13] ^= S->t[0]; - v[14] ^= S->t[1]; - v[15] ^= S->t[1]; - } - - for (i = 0; i < 14; ++i) { - G(0, 4, 8, 12, 0); - G(1, 5, 9, 13, 2); - G(2, 6, 10, 14, 4); - G(3, 7, 11, 15, 6); - G(3, 4, 9, 14, 14); - G(2, 7, 8, 13, 12); - G(0, 5, 10, 15, 8); - G(1, 6, 11, 12, 10); - } - - for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i]; - for (i = 0; i < 8; ++i) S->h[i] ^= S->s[i % 4]; -} - -void blake256_init(state *S) { - S->h[0] = 0x6A09E667; - S->h[1] = 0xBB67AE85; - S->h[2] = 0x3C6EF372; - S->h[3] = 0xA54FF53A; - S->h[4] = 0x510E527F; - S->h[5] = 0x9B05688C; - S->h[6] = 0x1F83D9AB; - S->h[7] = 0x5BE0CD19; - S->t[0] = S->t[1] = S->buflen = S->nullt = 0; - S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0; -} - -void blake224_init(state *S) { - S->h[0] = 0xC1059ED8; - S->h[1] = 0x367CD507; - S->h[2] = 0x3070DD17; - S->h[3] = 0xF70E5939; - S->h[4] = 0xFFC00B31; - S->h[5] = 0x68581511; - S->h[6] = 0x64F98FA7; - S->h[7] = 0xBEFA4FA4; - S->t[0] = S->t[1] = S->buflen = S->nullt = 0; - S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0; -} - -// datalen = number of bits -void blake256_update(state *S, const uint8_t *data, uint64_t datalen) { - int left = S->buflen >> 3; - int fill = 64 - left; - - if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) { - memcpy((void *) (S->buf + left), (void *) data, fill); - S->t[0] += 512; - if (S->t[0] == 0) S->t[1]++; - blake256_compress(S, S->buf); - data += fill; - datalen -= (fill << 3); - left = 0; - } - - while (datalen >= 512) { - S->t[0] += 512; - if (S->t[0] == 0) S->t[1]++; - blake256_compress(S, data); - data += 64; - datalen -= 512; - } - - if (datalen > 0) { - memcpy((void *) (S->buf + left), (void *) data, datalen >> 3); - S->buflen = (left << 3) + (int) datalen; - } else { - S->buflen = 0; - } -} - -// datalen = number of bits -void blake224_update(state *S, const uint8_t *data, uint64_t datalen) { - blake256_update(S, data, datalen); -} - -void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) { - uint8_t msglen[8]; - uint32_t lo = S->t[0] + S->buflen, hi = S->t[1]; - if (lo < (unsigned) S->buflen) hi++; - U32TO8(msglen + 0, hi); - U32TO8(msglen + 4, lo); - - if (S->buflen == 440) { /* one padding byte */ - S->t[0] -= 8; - blake256_update(S, &pa, 8); - } else { - if (S->buflen < 440) { /* enough space to fill the block */ - if (S->buflen == 0) S->nullt = 1; - S->t[0] -= 440 - S->buflen; - blake256_update(S, padding, 440 - S->buflen); - } else { /* need 2 compressions */ - S->t[0] -= 512 - S->buflen; - blake256_update(S, padding, 512 - S->buflen); - S->t[0] -= 440; - blake256_update(S, padding + 1, 440); - S->nullt = 1; - } - blake256_update(S, &pb, 8); - S->t[0] -= 8; - } - S->t[0] -= 64; - blake256_update(S, msglen, 64); - - U32TO8(digest + 0, S->h[0]); - U32TO8(digest + 4, S->h[1]); - U32TO8(digest + 8, S->h[2]); - U32TO8(digest + 12, S->h[3]); - U32TO8(digest + 16, S->h[4]); - U32TO8(digest + 20, S->h[5]); - U32TO8(digest + 24, S->h[6]); - U32TO8(digest + 28, S->h[7]); -} - -void blake256_final(state *S, uint8_t *digest) { - blake256_final_h(S, digest, 0x81, 0x01); -} - -void blake224_final(state *S, uint8_t *digest) { - blake256_final_h(S, digest, 0x80, 0x00); -} - -// inlen = number of bytes -void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) { - state S; - blake256_init(&S); - blake256_update(&S, in, inlen * 8); - blake256_final(&S, out); -} - -// inlen = number of bytes -void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) { - state S; - blake224_init(&S); - blake224_update(&S, in, inlen * 8); - blake224_final(&S, out); -} - -// keylen = number of bytes -void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) { - const uint8_t *key = _key; - uint8_t keyhash[32]; - uint8_t pad[64]; - uint64_t i; - - if (keylen > 64) { - blake256_hash(keyhash, key, keylen); - key = keyhash; - keylen = 32; - } - - blake256_init(&S->inner); - memset(pad, 0x36, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - blake256_update(&S->inner, pad, 512); - - blake256_init(&S->outer); - memset(pad, 0x5c, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - blake256_update(&S->outer, pad, 512); - - memset(keyhash, 0, 32); -} - -// keylen = number of bytes -void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) { - const uint8_t *key = _key; - uint8_t keyhash[32]; - uint8_t pad[64]; - uint64_t i; - - if (keylen > 64) { - blake256_hash(keyhash, key, keylen); - key = keyhash; - keylen = 28; - } - - blake224_init(&S->inner); - memset(pad, 0x36, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - blake224_update(&S->inner, pad, 512); - - blake224_init(&S->outer); - memset(pad, 0x5c, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - blake224_update(&S->outer, pad, 512); - - memset(keyhash, 0, 32); -} - -// datalen = number of bits -void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) { - // update the inner state - blake256_update(&S->inner, data, datalen); -} - -// datalen = number of bits -void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) { - // update the inner state - blake224_update(&S->inner, data, datalen); -} - -void hmac_blake256_final(hmac_state *S, uint8_t *digest) { - uint8_t ihash[32]; - blake256_final(&S->inner, ihash); - blake256_update(&S->outer, ihash, 256); - blake256_final(&S->outer, digest); - memset(ihash, 0, 32); -} - -void hmac_blake224_final(hmac_state *S, uint8_t *digest) { - uint8_t ihash[32]; - blake224_final(&S->inner, ihash); - blake224_update(&S->outer, ihash, 224); - blake224_final(&S->outer, digest); - memset(ihash, 0, 32); -} - -// keylen = number of bytes; inlen = number of bytes -void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) { - hmac_state S; - hmac_blake256_init(&S, key, keylen); - hmac_blake256_update(&S, in, inlen * 8); - hmac_blake256_final(&S, out); -} - -// keylen = number of bytes; inlen = number of bytes -void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) { - hmac_state S; - hmac_blake224_init(&S, key, keylen); - hmac_blake224_update(&S, in, inlen * 8); - hmac_blake224_final(&S, out); -} diff --git a/src/crypto/c_blake256.h b/src/crypto/c_blake256.h deleted file mode 100644 index b9c2aad0..00000000 --- a/src/crypto/c_blake256.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef _BLAKE256_H_ -#define _BLAKE256_H_ - -#include - -typedef struct { - uint32_t h[8], s[4], t[2]; - int buflen, nullt; - uint8_t buf[64]; -} state; - -typedef struct { - state inner; - state outer; -} hmac_state; - -void blake256_init(state *); -void blake224_init(state *); - -void blake256_update(state *, const uint8_t *, uint64_t); -void blake224_update(state *, const uint8_t *, uint64_t); - -void blake256_final(state *, uint8_t *); -void blake224_final(state *, uint8_t *); - -void blake256_hash(uint8_t *, const uint8_t *, uint64_t); -void blake224_hash(uint8_t *, const uint8_t *, uint64_t); - -/* HMAC functions: */ - -void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t); -void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t); - -void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t); -void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t); - -void hmac_blake256_final(hmac_state *, uint8_t *); -void hmac_blake224_final(hmac_state *, uint8_t *); - -void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); -void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); - -#endif /* _BLAKE256_H_ */ diff --git a/src/crypto/c_groestl.c b/src/crypto/c_groestl.c deleted file mode 100644 index 0f57ea12..00000000 --- a/src/crypto/c_groestl.c +++ /dev/null @@ -1,360 +0,0 @@ -/* hash.c April 2012 - * Groestl ANSI C code optimised for 32-bit machines - * Author: Thomas Krinninger - * - * This work is based on the implementation of - * Soeren S. Thomsen and Krystian Matusiewicz - * - * - */ - -#include "c_groestl.h" -#include "groestl_tables.h" - -#define P_TYPE 0 -#define Q_TYPE 1 - -const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}}; - -const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6}; - - -#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \ - v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \ - v1 = temp_var;} - - -#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \ - tu = T[2*(uint32_t)x[4*c0+0]]; \ - tl = T[2*(uint32_t)x[4*c0+0]+1]; \ - tv1 = T[2*(uint32_t)x[4*c1+1]]; \ - tv2 = T[2*(uint32_t)x[4*c1+1]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \ - tu ^= tv1; \ - tl ^= tv2; \ - tv1 = T[2*(uint32_t)x[4*c2+2]]; \ - tv2 = T[2*(uint32_t)x[4*c2+2]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \ - tu ^= tv1; \ - tl ^= tv2; \ - tv1 = T[2*(uint32_t)x[4*c3+3]]; \ - tv2 = T[2*(uint32_t)x[4*c3+3]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \ - tu ^= tv1; \ - tl ^= tv2; \ - tl ^= T[2*(uint32_t)x[4*c4+0]]; \ - tu ^= T[2*(uint32_t)x[4*c4+0]+1]; \ - tv1 = T[2*(uint32_t)x[4*c5+1]]; \ - tv2 = T[2*(uint32_t)x[4*c5+1]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \ - tl ^= tv1; \ - tu ^= tv2; \ - tv1 = T[2*(uint32_t)x[4*c6+2]]; \ - tv2 = T[2*(uint32_t)x[4*c6+2]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \ - tl ^= tv1; \ - tu ^= tv2; \ - tv1 = T[2*(uint32_t)x[4*c7+3]]; \ - tv2 = T[2*(uint32_t)x[4*c7+3]+1]; \ - ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \ - tl ^= tv1; \ - tu ^= tv2; \ - y[i] = tu; \ - y[i+1] = tl; - - -/* compute one round of P (short variants) */ -static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) { - uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp; - uint32_t* x32 = (uint32_t*)x; - x32[ 0] ^= 0x00000000^r; - x32[ 2] ^= 0x00000010^r; - x32[ 4] ^= 0x00000020^r; - x32[ 6] ^= 0x00000030^r; - x32[ 8] ^= 0x00000040^r; - x32[10] ^= 0x00000050^r; - x32[12] ^= 0x00000060^r; - x32[14] ^= 0x00000070^r; - COLUMN(x,y, 0, 0, 2, 4, 6, 9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 2, 2, 4, 6, 8, 11, 13, 15, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 4, 4, 6, 8, 10, 13, 15, 1, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 6, 6, 8, 10, 12, 15, 1, 3, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 8, 8, 10, 12, 14, 1, 3, 5, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,10, 10, 12, 14, 0, 3, 5, 7, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,12, 12, 14, 0, 2, 5, 7, 9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,14, 14, 0, 2, 4, 7, 9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); -} - -/* compute one round of Q (short variants) */ -static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) { - uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp; - uint32_t* x32 = (uint32_t*)x; - x32[ 0] = ~x32[ 0]; - x32[ 1] ^= 0xffffffff^r; - x32[ 2] = ~x32[ 2]; - x32[ 3] ^= 0xefffffff^r; - x32[ 4] = ~x32[ 4]; - x32[ 5] ^= 0xdfffffff^r; - x32[ 6] = ~x32[ 6]; - x32[ 7] ^= 0xcfffffff^r; - x32[ 8] = ~x32[ 8]; - x32[ 9] ^= 0xbfffffff^r; - x32[10] = ~x32[10]; - x32[11] ^= 0xafffffff^r; - x32[12] = ~x32[12]; - x32[13] ^= 0x9fffffff^r; - x32[14] = ~x32[14]; - x32[15] ^= 0x8fffffff^r; - COLUMN(x,y, 0, 2, 6, 10, 14, 1, 5, 9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 2, 4, 8, 12, 0, 3, 7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 4, 6, 10, 14, 2, 5, 9, 13, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 6, 8, 12, 0, 4, 7, 11, 15, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y, 8, 10, 14, 2, 6, 9, 13, 1, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,10, 12, 0, 4, 8, 11, 15, 3, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,12, 14, 2, 6, 10, 13, 1, 5, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); - COLUMN(x,y,14, 0, 4, 8, 12, 15, 3, 7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp); -} - -/* compute compression function (short variants) */ -static void F512(uint32_t *h, const uint32_t *m) { - int i; - uint32_t Ptmp[2*COLS512]; - uint32_t Qtmp[2*COLS512]; - uint32_t y[2*COLS512]; - uint32_t z[2*COLS512]; - - for (i = 0; i < 2*COLS512; i++) { - z[i] = m[i]; - Ptmp[i] = h[i]^m[i]; - } - - /* compute Q(m) */ - RND512Q((uint8_t*)z, y, 0x00000000); - RND512Q((uint8_t*)y, z, 0x01000000); - RND512Q((uint8_t*)z, y, 0x02000000); - RND512Q((uint8_t*)y, z, 0x03000000); - RND512Q((uint8_t*)z, y, 0x04000000); - RND512Q((uint8_t*)y, z, 0x05000000); - RND512Q((uint8_t*)z, y, 0x06000000); - RND512Q((uint8_t*)y, z, 0x07000000); - RND512Q((uint8_t*)z, y, 0x08000000); - RND512Q((uint8_t*)y, Qtmp, 0x09000000); - - /* compute P(h+m) */ - RND512P((uint8_t*)Ptmp, y, 0x00000000); - RND512P((uint8_t*)y, z, 0x00000001); - RND512P((uint8_t*)z, y, 0x00000002); - RND512P((uint8_t*)y, z, 0x00000003); - RND512P((uint8_t*)z, y, 0x00000004); - RND512P((uint8_t*)y, z, 0x00000005); - RND512P((uint8_t*)z, y, 0x00000006); - RND512P((uint8_t*)y, z, 0x00000007); - RND512P((uint8_t*)z, y, 0x00000008); - RND512P((uint8_t*)y, Ptmp, 0x00000009); - - /* compute P(h+m) + Q(m) + h */ - for (i = 0; i < 2*COLS512; i++) { - h[i] ^= Ptmp[i]^Qtmp[i]; - } -} - - -/* digest up to msglen bytes of input (full blocks only) */ -static void Transform(groestlHashState *ctx, - const uint8_t *input, - int msglen) { - - /* digest message, one block at a time */ - for (; msglen >= SIZE512; - msglen -= SIZE512, input += SIZE512) { - F512(ctx->chaining,(uint32_t*)input); - - /* increment block counter */ - ctx->block_counter1++; - if (ctx->block_counter1 == 0) ctx->block_counter2++; - } -} - -/* given state h, do h <- P(h)+h */ -static void OutputTransformation(groestlHashState *ctx) { - int j; - uint32_t temp[2*COLS512]; - uint32_t y[2*COLS512]; - uint32_t z[2*COLS512]; - - - - for (j = 0; j < 2*COLS512; j++) { - temp[j] = ctx->chaining[j]; - } - RND512P((uint8_t*)temp, y, 0x00000000); - RND512P((uint8_t*)y, z, 0x00000001); - RND512P((uint8_t*)z, y, 0x00000002); - RND512P((uint8_t*)y, z, 0x00000003); - RND512P((uint8_t*)z, y, 0x00000004); - RND512P((uint8_t*)y, z, 0x00000005); - RND512P((uint8_t*)z, y, 0x00000006); - RND512P((uint8_t*)y, z, 0x00000007); - RND512P((uint8_t*)z, y, 0x00000008); - RND512P((uint8_t*)y, temp, 0x00000009); - for (j = 0; j < 2*COLS512; j++) { - ctx->chaining[j] ^= temp[j]; - } -} - -/* initialise context */ -static void Init(groestlHashState* ctx) { - int i = 0; - /* allocate memory for state and data buffer */ - - for(;i<(SIZE512/sizeof(uint32_t));i++) - { - ctx->chaining[i] = 0; - } - - /* set initial value */ - ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN); - - /* set other variables */ - ctx->buf_ptr = 0; - ctx->block_counter1 = 0; - ctx->block_counter2 = 0; - ctx->bits_in_last_byte = 0; -} - -/* update state with databitlen bits of input */ -static void Update(groestlHashState* ctx, - const BitSequence* input, - DataLength databitlen) { - int index = 0; - int msglen = (int)(databitlen/8); - int rem = (int)(databitlen%8); - - /* if the buffer contains data that has not yet been digested, first - add data to buffer until full */ - if (ctx->buf_ptr) { - while (ctx->buf_ptr < SIZE512 && index < msglen) { - ctx->buffer[(int)ctx->buf_ptr++] = input[index++]; - } - if (ctx->buf_ptr < SIZE512) { - /* buffer still not full, return */ - if (rem) { - ctx->bits_in_last_byte = rem; - ctx->buffer[(int)ctx->buf_ptr++] = input[index]; - } - return; - } - - /* digest buffer */ - ctx->buf_ptr = 0; - Transform(ctx, ctx->buffer, SIZE512); - } - - /* digest bulk of message */ - Transform(ctx, input+index, msglen-index); - index += ((msglen-index)/SIZE512)*SIZE512; - - /* store remaining data in buffer */ - while (index < msglen) { - ctx->buffer[(int)ctx->buf_ptr++] = input[index++]; - } - - /* if non-integral number of bytes have been supplied, store - remaining bits in last byte, together with information about - number of bits */ - if (rem) { - ctx->bits_in_last_byte = rem; - ctx->buffer[(int)ctx->buf_ptr++] = input[index]; - } -} - -#define BILB ctx->bits_in_last_byte - -/* finalise: process remaining data (including padding), perform - output transformation, and write hash result to 'output' */ -static void Final(groestlHashState* ctx, - BitSequence* output) { - int i, j = 0, hashbytelen = HASH_BIT_LEN/8; - uint8_t *s = (BitSequence*)ctx->chaining; - - /* pad with '1'-bit and first few '0'-bits */ - if (BILB) { - ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB); - BILB = 0; - } - else ctx->buffer[(int)ctx->buf_ptr++] = 0x80; - - /* pad with '0'-bits */ - if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) { - /* padding requires two blocks */ - while (ctx->buf_ptr < SIZE512) { - ctx->buffer[(int)ctx->buf_ptr++] = 0; - } - /* digest first padding block */ - Transform(ctx, ctx->buffer, SIZE512); - ctx->buf_ptr = 0; - } - while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) { - ctx->buffer[(int)ctx->buf_ptr++] = 0; - } - - /* length padding */ - ctx->block_counter1++; - if (ctx->block_counter1 == 0) ctx->block_counter2++; - ctx->buf_ptr = SIZE512; - - while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) { - ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1; - ctx->block_counter1 >>= 8; - } - while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) { - ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2; - ctx->block_counter2 >>= 8; - } - /* digest final padding block */ - Transform(ctx, ctx->buffer, SIZE512); - /* perform output transformation */ - OutputTransformation(ctx); - - /* store hash result in output */ - for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) { - output[j] = s[i]; - } - - /* zeroise relevant variables and deallocate memory */ - for (i = 0; i < COLS512; i++) { - ctx->chaining[i] = 0; - } - for (i = 0; i < SIZE512; i++) { - ctx->buffer[i] = 0; - } -} - -/* hash bit sequence */ -void groestl(const BitSequence* data, - DataLength databitlen, - BitSequence* hashval) { - - groestlHashState context; - - /* initialise */ - Init(&context); - - - /* process message */ - Update(&context, data, databitlen); - - /* finalise */ - Final(&context, hashval); -} -/* -static int crypto_hash(unsigned char *out, - const unsigned char *in, - unsigned long long len) -{ - groestl(in, 8*len, out); - return 0; -} - -*/ diff --git a/src/crypto/c_groestl.h b/src/crypto/c_groestl.h deleted file mode 100644 index 2b513393..00000000 --- a/src/crypto/c_groestl.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef __hash_h -#define __hash_h -/* -#include "crypto_uint8.h" -#include "crypto_uint32.h" -#include "crypto_uint64.h" -#include "crypto_hash.h" - -typedef crypto_uint8 uint8_t; -typedef crypto_uint32 uint32_t; -typedef crypto_uint64 uint64_t; -*/ -#include - -#include "hash.h" - -/* some sizes (number of bytes) */ -#define ROWS 8 -#define LENGTHFIELDLEN ROWS -#define COLS512 8 - -#define SIZE512 (ROWS*COLS512) - -#define ROUNDS512 10 -#define HASH_BIT_LEN 256 - -#define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff)) - - -#define li_32(h) 0x##h##u -#define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n))) -#define u32BIG(a) \ - ((ROTL32(a,8) & li_32(00FF00FF)) | \ - (ROTL32(a,24) & li_32(FF00FF00))) - - -/* NIST API begin */ -typedef struct { - uint32_t chaining[SIZE512/sizeof(uint32_t)]; /* actual state */ - uint32_t block_counter1, - block_counter2; /* message block counter(s) */ - BitSequence buffer[SIZE512]; /* data buffer */ - int buf_ptr; /* data buffer pointer */ - int bits_in_last_byte; /* no. of message bits in last byte of - data buffer */ -} groestlHashState; - -/*void Init(hashState*); -void Update(hashState*, const BitSequence*, DataLength); -void Final(hashState*, BitSequence*); */ -void groestl(const BitSequence*, DataLength, BitSequence*); -/* NIST API end */ - -/* -int crypto_hash(unsigned char *out, - const unsigned char *in, - unsigned long long len); -*/ - -#endif /* __hash_h */ diff --git a/src/crypto/c_jh.c b/src/crypto/c_jh.c deleted file mode 100644 index 728f3bbe..00000000 --- a/src/crypto/c_jh.c +++ /dev/null @@ -1,367 +0,0 @@ -/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C - - -------------------------------- - Performance - - Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz) - Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic) - Speed for long message: - 1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2 - 2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3 - - -------------------------------- - Last Modified: January 16, 2011 -*/ - -#include "c_jh.h" - -#include -#include - -/*typedef unsigned long long uint64;*/ -typedef uint64_t uint64; - -/*define data alignment for different C compilers*/ -#if defined(__GNUC__) - #define DATA_ALIGN16(x) x __attribute__ ((aligned(16))) -#else - #define DATA_ALIGN16(x) __declspec(align(16)) x -#endif - - -typedef struct { - int hashbitlen; /*the message digest size*/ - unsigned long long databitlen; /*the message size in bits*/ - unsigned long long datasize_in_buffer; /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/ - DATA_ALIGN16(uint64 x[8][2]); /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/ - unsigned char buffer[64]; /*the 512-bit message block to be hashed;*/ -} hashState; - - -/*The initial hash value H(0)*/ -const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e}; -const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69}; -const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f}; -const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b}; - -/*42 round constants, each round constant is 32-byte (256-bit)*/ -const unsigned char E8_bitslice_roundconstant[42][32]={ -{0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40}, -{0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31}, -{0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc}, -{0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3}, -{0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23}, -{0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97}, -{0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14}, -{0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4}, -{0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36}, -{0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f}, -{0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b}, -{0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62}, -{0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5}, -{0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f}, -{0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a}, -{0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf}, -{0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0}, -{0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a}, -{0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6}, -{0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67}, -{0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18}, -{0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e}, -{0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1}, -{0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83}, -{0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef}, -{0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65}, -{0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c}, -{0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71}, -{0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0}, -{0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f}, -{0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad}, -{0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6}, -{0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63}, -{0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f}, -{0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a}, -{0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5}, -{0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48}, -{0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e}, -{0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7}, -{0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde}, -{0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a}, -{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}}; - - -static void E8(hashState *state); /*The bijective function E8, in bitslice form*/ -static void F8(hashState *state); /*The compression function F8 */ - -/*The API functions*/ -static HashReturn Init(hashState *state, int hashbitlen); -static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen); -static HashReturn Final(hashState *state, BitSequence *hashval); -HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval); - -/*swapping bit 2i with bit 2i+1 of 64-bit x*/ -#define SWAP1(x) (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1)); -/*swapping bits 4i||4i+1 with bits 4i+2||4i+3 of 64-bit x*/ -#define SWAP2(x) (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2)); -/*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 64-bit x*/ -#define SWAP4(x) (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4)); -/*swapping bits 16i||16i+1||......||16i+7 with bits 16i+8||16i+9||......||16i+15 of 64-bit x*/ -#define SWAP8(x) (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8)); -/*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 64-bit x*/ -#define SWAP16(x) (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16)); -/*swapping bits 64i||64i+1||......||64i+31 with bits 64i+32||64i+33||......||64i+63 of 64-bit x*/ -#define SWAP32(x) (x) = (((x) << 32) | ((x) >> 32)); - -/*The MDS transform*/ -#define L(m0,m1,m2,m3,m4,m5,m6,m7) \ - (m4) ^= (m1); \ - (m5) ^= (m2); \ - (m6) ^= (m0) ^ (m3); \ - (m7) ^= (m0); \ - (m0) ^= (m5); \ - (m1) ^= (m6); \ - (m2) ^= (m4) ^ (m7); \ - (m3) ^= (m4); - -/*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/ -/*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/ -#define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1) \ - m3 = ~(m3); \ - m7 = ~(m7); \ - m0 ^= ((~(m2)) & (cc0)); \ - m4 ^= ((~(m6)) & (cc1)); \ - temp0 = (cc0) ^ ((m0) & (m1));\ - temp1 = (cc1) ^ ((m4) & (m5));\ - m0 ^= ((m2) & (m3)); \ - m4 ^= ((m6) & (m7)); \ - m3 ^= ((~(m1)) & (m2)); \ - m7 ^= ((~(m5)) & (m6)); \ - m1 ^= ((m0) & (m2)); \ - m5 ^= ((m4) & (m6)); \ - m2 ^= ((m0) & (~(m3))); \ - m6 ^= ((m4) & (~(m7))); \ - m0 ^= ((m1) | (m3)); \ - m4 ^= ((m5) | (m7)); \ - m3 ^= ((m1) & (m2)); \ - m7 ^= ((m5) & (m6)); \ - m1 ^= (temp0 & (m0)); \ - m5 ^= (temp1 & (m4)); \ - m2 ^= temp0; \ - m6 ^= temp1; - -/*The bijective function E8, in bitslice form*/ -static void E8(hashState *state) -{ - uint64 i,roundnumber,temp0,temp1; - - for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) { - /*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]); - } - - /*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]); - } - - /*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]); - } - - /*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]); - } - - /*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]); - } - - /*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - SWAP32(state->x[1][i]); SWAP32(state->x[3][i]); SWAP32(state->x[5][i]); SWAP32(state->x[7][i]); - } - - /*round 7*roundnumber+6: Sbox and MDS layers*/ - for (i = 0; i < 2; i++) { - SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] ); - L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]); - } - /*round 7*roundnumber+6: swapping layer*/ - for (i = 1; i < 8; i = i+2) { - temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0; - } - } - -} - -/*The compression function F8 */ -static void F8(hashState *state) -{ - uint64 i; - - /*xor the 512-bit message with the fist half of the 1024-bit hash state*/ - for (i = 0; i < 8; i++) state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i]; - - /*the bijective function E8 */ - E8(state); - - /*xor the 512-bit message with the second half of the 1024-bit hash state*/ - for (i = 0; i < 8; i++) state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i]; -} - -/*before hashing a message, initialize the hash state as H0 */ -static HashReturn Init(hashState *state, int hashbitlen) -{ - state->databitlen = 0; - state->datasize_in_buffer = 0; - - /*initialize the initial hash value of JH*/ - state->hashbitlen = hashbitlen; - - /*load the intital hash value into state*/ - switch (hashbitlen) - { - case 224: memcpy(state->x,JH224_H0,128); break; - case 256: memcpy(state->x,JH256_H0,128); break; - case 384: memcpy(state->x,JH384_H0,128); break; - case 512: memcpy(state->x,JH512_H0,128); break; - } - - return(SUCCESS); -} - - -/*hash each 512-bit message block, except the last partial block*/ -static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen) -{ - DataLength index; /*the starting address of the data to be compressed*/ - - state->databitlen += databitlen; - index = 0; - - /*if there is remaining data in the buffer, fill it to a full message block first*/ - /*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/ - - /*There is data in the buffer, but the incoming data is insufficient for a full block*/ - if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) ) { - if ( (databitlen & 7) == 0 ) { - memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)) ; - } - else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)+1) ; - state->datasize_in_buffer += databitlen; - databitlen = 0; - } - - /*There is data in the buffer, and the incoming data is sufficient for a full block*/ - if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512) ) { - memcpy( state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3) ) ; - index = 64-(state->datasize_in_buffer >> 3); - databitlen = databitlen - (512 - state->datasize_in_buffer); - F8(state); - state->datasize_in_buffer = 0; - } - - /*hash the remaining full message blocks*/ - for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) { - memcpy(state->buffer, data+index, 64); - F8(state); - } - - /*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/ - if ( databitlen > 0) { - if ((databitlen & 7) == 0) - memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3); - else - memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1); - state->datasize_in_buffer = databitlen; - } - - return(SUCCESS); -} - -/*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/ -static HashReturn Final(hashState *state, BitSequence *hashval) -{ - unsigned int i; - - if ( (state->databitlen & 0x1ff) == 0 ) { - /*pad the message when databitlen is multiple of 512 bits, then process the padded block*/ - memset(state->buffer, 0, 64); - state->buffer[0] = 0x80; - state->buffer[63] = state->databitlen & 0xff; - state->buffer[62] = (state->databitlen >> 8) & 0xff; - state->buffer[61] = (state->databitlen >> 16) & 0xff; - state->buffer[60] = (state->databitlen >> 24) & 0xff; - state->buffer[59] = (state->databitlen >> 32) & 0xff; - state->buffer[58] = (state->databitlen >> 40) & 0xff; - state->buffer[57] = (state->databitlen >> 48) & 0xff; - state->buffer[56] = (state->databitlen >> 56) & 0xff; - F8(state); - } - else { - /*set the rest of the bytes in the buffer to 0*/ - if ( (state->datasize_in_buffer & 7) == 0) - for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++) state->buffer[i] = 0; - else - for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++) state->buffer[i] = 0; - - /*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/ - state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7)); - - F8(state); - memset(state->buffer, 0, 64); - state->buffer[63] = state->databitlen & 0xff; - state->buffer[62] = (state->databitlen >> 8) & 0xff; - state->buffer[61] = (state->databitlen >> 16) & 0xff; - state->buffer[60] = (state->databitlen >> 24) & 0xff; - state->buffer[59] = (state->databitlen >> 32) & 0xff; - state->buffer[58] = (state->databitlen >> 40) & 0xff; - state->buffer[57] = (state->databitlen >> 48) & 0xff; - state->buffer[56] = (state->databitlen >> 56) & 0xff; - F8(state); - } - - /*truncating the final hash value to generate the message digest*/ - switch(state->hashbitlen) { - case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28); break; - case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32); break; - case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48); break; - case 512: memcpy(hashval,(unsigned char*)state->x+64,64); break; - } - - return(SUCCESS); -} - -/* hash a message, - three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen) - one output: message digest (hashval) -*/ -HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval) -{ - hashState state; - - if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) { - Init(&state, hashbitlen); - Update(&state, data, databitlen); - Final(&state, hashval); - return SUCCESS; - } - else - return(BAD_HASHLEN); -} diff --git a/src/crypto/c_jh.h b/src/crypto/c_jh.h deleted file mode 100644 index d10d40fe..00000000 --- a/src/crypto/c_jh.h +++ /dev/null @@ -1,19 +0,0 @@ -/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C - - -------------------------------- - Performance - - Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz) - Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic) - Speed for long message: - 1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2 - 2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3 - - -------------------------------- - Last Modified: January 16, 2011 -*/ -#pragma once - -#include "hash.h" - -HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval); diff --git a/src/crypto/c_skein.c b/src/crypto/c_skein.c deleted file mode 100644 index 994e4d46..00000000 --- a/src/crypto/c_skein.c +++ /dev/null @@ -1,701 +0,0 @@ -/*********************************************************************** -** -** Implementation of the Skein hash function. -** -** Source code author: Doug Whiting, 2008. -** -** This algorithm and source code is released to the public domain. -** -************************************************************************/ - -#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */ - -#include /* get size_t definition */ -#include /* get the memcpy/memset functions */ -#include "c_skein.h" /* get the Skein API definitions */ - -#ifndef SKEIN_512_NIST_MAX_HASHBITS -#define SKEIN_512_NIST_MAX_HASHBITS (512) -#endif - -#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */ - -#define SKEIN_512_STATE_WORDS ( 8) -#define SKEIN_MAX_STATE_WORDS (16) - -#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS) -#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS) -#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS) - -#define SKEIN_RND_SPECIAL (1000u) -#define SKEIN_RND_KEY_INITIAL (SKEIN_RND_SPECIAL+0u) -#define SKEIN_RND_KEY_INJECT (SKEIN_RND_SPECIAL+1u) -#define SKEIN_RND_FEED_FWD (SKEIN_RND_SPECIAL+2u) - -typedef struct -{ - size_t hashBitLen; /* size of hash result, in bits */ - size_t bCnt; /* current byte count in buffer b[] */ - u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */ -} Skein_Ctxt_Hdr_t; - -typedef struct /* 512-bit Skein hash context structure */ -{ - Skein_Ctxt_Hdr_t h; /* common header context variables */ - u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */ - u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ -} Skein_512_Ctxt_t; - -/* Skein APIs for (incremental) "straight hashing" */ -static int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen); -static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); -static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); - -#ifndef SKEIN_TREE_HASH -#define SKEIN_TREE_HASH (1) -#endif - -/***************************************************************** -** "Internal" Skein definitions -** -- not needed for sequential hashing API, but will be -** helpful for other uses of Skein (e.g., tree hash mode). -** -- included here so that they can be shared between -** reference and optimized code. -******************************************************************/ - -/* tweak word T[1]: bit field starting positions */ -#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */ - -#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */ -#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */ -#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */ -#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */ -#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */ - -/* tweak word T[1]: flag bit definition(s) */ -#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST) -#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL) -#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD) - -/* tweak word T[1]: tree level bit field mask */ -#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL) -#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL) - -/* tweak word T[1]: block type field */ -#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */ -#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */ -#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */ -#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */ -#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */ -#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */ -#define SKEIN_BLK_TYPE_MSG (48) /* message processing */ -#define SKEIN_BLK_TYPE_OUT (63) /* output stage */ -#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */ - -#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE) -#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */ -#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */ -#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */ -#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */ -#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */ -#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */ -#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */ -#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */ -#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */ - -#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL) -#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL) - -#define SKEIN_VERSION (1) - -#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */ -#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/ -#endif - -#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32)) -#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE) -#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) - -#define SKEIN_CFG_STR_LEN (4*8) - -/* bit field definitions in config block treeInfo word */ -#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0) -#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8) -#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16) - -#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS) -#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS) -#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS) - -#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \ - ( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \ - (((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \ - (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) ) - -#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */ - -/* -** Skein macros for getting/setting tweak words, etc. -** These are useful for partial input bytes, hash tree init/update, etc. -**/ -#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM]) -#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);} - -#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0) -#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1) -#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0) -#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1) - -/* set both tweak words at once */ -#define Skein_Set_T0_T1(ctxPtr,T0,T1) \ -{ \ - Skein_Set_T0(ctxPtr,(T0)); \ - Skein_Set_T1(ctxPtr,(T1)); \ -} - -#define Skein_Set_Type(ctxPtr,BLK_TYPE) \ - Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE) - -/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */ -#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \ -{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; } - -#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; } -#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; } - -#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);} - -/***************************************************************** -** "Internal" Skein definitions for debugging and error checking -******************************************************************/ -#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr) -#define Skein_Show_Round(bits,ctx,r,X) -#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr) -#define Skein_Show_Final(bits,ctx,cnt,outPtr) -#define Skein_Show_Key(bits,ctx,key,keyBytes) - - -#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */ -#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */ -#define Skein_assert(x) -#elif defined(SKEIN_ASSERT) -#include -#define Skein_Assert(x,retCode) assert(x) -#define Skein_assert(x) assert(x) -#else -#include -#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */ -#define Skein_assert(x) assert(x) /* internal error */ -#endif - -/***************************************************************** -** Skein block function constants (shared across Ref and Opt code) -******************************************************************/ -enum -{ - /* Skein_512 round rotation constants */ - R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37, - R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42, - R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39, - R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56, - R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24, - R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17, - R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43, - R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22, -}; - -#ifndef SKEIN_ROUNDS -#define SKEIN_512_ROUNDS_TOTAL (72) -#else /* allow command-line define in range 8*(5..14) */ -#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5)) -#endif - - -/* -***************** Pre-computed Skein IVs ******************* -** -** NOTE: these values are not "magic" constants, but -** are generated using the Threefish block function. -** They are pre-computed here only for speed; i.e., to -** avoid the need for a Threefish call during Init(). -** -** The IV for any fixed hash length may be pre-computed. -** Only the most common values are included here. -** -************************************************************ -**/ - -#define MK_64 SKEIN_MK_64 - -/* blkSize = 512 bits. hashSize = 256 bits */ -const u64b_t SKEIN_512_IV_256[] = - { - MK_64(0xCCD044A1,0x2FDB3E13), - MK_64(0xE8359030,0x1A79A9EB), - MK_64(0x55AEA061,0x4F816E6F), - MK_64(0x2A2767A4,0xAE9B94DB), - MK_64(0xEC06025E,0x74DD7683), - MK_64(0xE7A436CD,0xC4746251), - MK_64(0xC36FBAF9,0x393AD185), - MK_64(0x3EEDBA18,0x33EDFC13) - }; - -#ifndef SKEIN_USE_ASM -#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */ -#endif - -#ifndef SKEIN_LOOP -#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */ -#endif - -#define BLK_BITS (WCNT*64) /* some useful definitions for code here */ -#define KW_TWK_BASE (0) -#define KW_KEY_BASE (3) -#define ks (kw + KW_KEY_BASE) -#define ts (kw + KW_TWK_BASE) - -#ifdef SKEIN_DEBUG -#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; } -#else -#define DebugSaveTweak(ctx) -#endif - -/***************************** Skein_512 ******************************/ -#if !(SKEIN_USE_ASM & 512) -static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) - { /* do it in C */ - enum - { - WCNT = SKEIN_512_STATE_WORDS - }; -#undef RCNT -#define RCNT (SKEIN_512_ROUNDS_TOTAL/8) - -#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ -#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10) -#else -#define SKEIN_UNROLL_512 (0) -#endif - -#if SKEIN_UNROLL_512 -#if (RCNT % SKEIN_UNROLL_512) -#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ -#endif - size_t r; - u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ -#else - u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ -#endif - u64b_t X0,X1,X2,X3,X4,X5,X6,X7; /* local copy of vars, for speed */ - u64b_t w [WCNT]; /* local copy of input block */ -#ifdef SKEIN_DEBUG - const u64b_t *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */ - Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; - Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7; -#endif - - Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ - ts[0] = ctx->h.T[0]; - ts[1] = ctx->h.T[1]; - do { - /* this implementation only supports 2**64 input bytes (no carry out here) */ - ts[0] += byteCntAdd; /* update processed length */ - - /* precompute the key schedule for this block */ - ks[0] = ctx->X[0]; - ks[1] = ctx->X[1]; - ks[2] = ctx->X[2]; - ks[3] = ctx->X[3]; - ks[4] = ctx->X[4]; - ks[5] = ctx->X[5]; - ks[6] = ctx->X[6]; - ks[7] = ctx->X[7]; - ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ - ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY; - - ts[2] = ts[0] ^ ts[1]; - - Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ - DebugSaveTweak(ctx); - Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); - - X0 = w[0] + ks[0]; /* do the first full key injection */ - X1 = w[1] + ks[1]; - X2 = w[2] + ks[2]; - X3 = w[3] + ks[3]; - X4 = w[4] + ks[4]; - X5 = w[5] + ks[5] + ts[0]; - X6 = w[6] + ks[6] + ts[1]; - X7 = w[7] + ks[7]; - - blkPtr += SKEIN_512_BLOCK_BYTES; - - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); - /* run the rounds */ -#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ - X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ - X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ - X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \ - X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \ - -#if SKEIN_UNROLL_512 == 0 -#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \ - Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); - -#define I512(R) \ - X0 += ks[((R)+1) % 9]; /* inject the key schedule value */ \ - X1 += ks[((R)+2) % 9]; \ - X2 += ks[((R)+3) % 9]; \ - X3 += ks[((R)+4) % 9]; \ - X4 += ks[((R)+5) % 9]; \ - X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \ - X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \ - X7 += ks[((R)+8) % 9] + (R)+1; \ - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); -#else /* looping version */ -#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ - Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr); - -#define I512(R) \ - X0 += ks[r+(R)+0]; /* inject the key schedule value */ \ - X1 += ks[r+(R)+1]; \ - X2 += ks[r+(R)+2]; \ - X3 += ks[r+(R)+3]; \ - X4 += ks[r+(R)+4]; \ - X5 += ks[r+(R)+5] + ts[r+(R)+0]; \ - X6 += ks[r+(R)+6] + ts[r+(R)+1]; \ - X7 += ks[r+(R)+7] + r+(R) ; \ - ks[r + (R)+8] = ks[r+(R)-1]; /* rotate key schedule */ \ - ts[r + (R)+2] = ts[r+(R)-1]; \ - Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); - - for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512) /* loop thru it */ -#endif /* end of looped code definitions */ - { -#define R512_8_rounds(R) /* do 8 full rounds */ \ - R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \ - R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \ - R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \ - R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \ - I512(2*(R)); \ - R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \ - R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \ - R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \ - R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \ - I512(2*(R)+1); /* and key injection */ - - R512_8_rounds( 0); - -#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN))) - - #if R512_Unroll_R( 1) - R512_8_rounds( 1); - #endif - #if R512_Unroll_R( 2) - R512_8_rounds( 2); - #endif - #if R512_Unroll_R( 3) - R512_8_rounds( 3); - #endif - #if R512_Unroll_R( 4) - R512_8_rounds( 4); - #endif - #if R512_Unroll_R( 5) - R512_8_rounds( 5); - #endif - #if R512_Unroll_R( 6) - R512_8_rounds( 6); - #endif - #if R512_Unroll_R( 7) - R512_8_rounds( 7); - #endif - #if R512_Unroll_R( 8) - R512_8_rounds( 8); - #endif - #if R512_Unroll_R( 9) - R512_8_rounds( 9); - #endif - #if R512_Unroll_R(10) - R512_8_rounds(10); - #endif - #if R512_Unroll_R(11) - R512_8_rounds(11); - #endif - #if R512_Unroll_R(12) - R512_8_rounds(12); - #endif - #if R512_Unroll_R(13) - R512_8_rounds(13); - #endif - #if R512_Unroll_R(14) - R512_8_rounds(14); - #endif - #if (SKEIN_UNROLL_512 > 14) -#error "need more unrolling in Skein_512_Process_Block" - #endif - } - - /* do the final "feedforward" xor, update context chaining vars */ - ctx->X[0] = X0 ^ w[0]; - ctx->X[1] = X1 ^ w[1]; - ctx->X[2] = X2 ^ w[2]; - ctx->X[3] = X3 ^ w[3]; - ctx->X[4] = X4 ^ w[4]; - ctx->X[5] = X5 ^ w[5]; - ctx->X[6] = X6 ^ w[6]; - ctx->X[7] = X7 ^ w[7]; - Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); - - ts[1] &= ~SKEIN_T1_FLAG_FIRST; - } - while (--blkCnt); - ctx->h.T[0] = ts[0]; - ctx->h.T[1] = ts[1]; - } -#endif - -/*****************************************************************/ -/* 512-bit Skein */ -/*****************************************************************/ - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* init the context for a straight hashing operation */ -static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) - { - union - { - u08b_t b[SKEIN_512_STATE_BYTES]; - u64b_t w[SKEIN_512_STATE_WORDS]; - } cfg; /* config block */ - - Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); - ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ - - switch (hashBitLen) - { /* use pre-computed values, where available */ -#ifndef SKEIN_NO_PRECOMP - case 256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X)); break; -#endif - default: - /* here if there is no precomputed IV value available */ - /* build/process the config block, type == CONFIG (could be precomputed) */ - Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ - - cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ - cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ - cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); - memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ - - /* compute the initial chaining values from config block */ - memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ - Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); - break; - } - - /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ - /* Set up to process the data message portion of the hash (default) */ - Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ - - return SKEIN_SUCCESS; - } - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* process the input bytes */ -static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) - { - size_t n; - - Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ - - /* process full blocks, if any */ - if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) - { - if (ctx->h.bCnt) /* finish up any buffered message data */ - { - n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ - if (n) - { - Skein_assert(n < msgByteCnt); /* check on our logic here */ - memcpy(&ctx->b[ctx->h.bCnt],msg,n); - msgByteCnt -= n; - msg += n; - ctx->h.bCnt += n; - } - Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES); - Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES); - ctx->h.bCnt = 0; - } - /* now process any remaining full blocks, directly from input message data */ - if (msgByteCnt > SKEIN_512_BLOCK_BYTES) - { - n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */ - Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES); - msgByteCnt -= n * SKEIN_512_BLOCK_BYTES; - msg += n * SKEIN_512_BLOCK_BYTES; - } - Skein_assert(ctx->h.bCnt == 0); - } - - /* copy any remaining source message data bytes into b[] */ - if (msgByteCnt) - { - Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES); - memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); - ctx->h.bCnt += msgByteCnt; - } - - return SKEIN_SUCCESS; - } - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* finalize the hash computation and output the result */ -static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) - { - size_t i,n,byteCnt; - u64b_t X[SKEIN_512_STATE_WORDS]; - Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ - - ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ - if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ - memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); - - Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ - - /* now output the result */ - byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ - - /* run Threefish in "counter mode" to generate output */ - memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ - memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ - for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) - { - ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ - Skein_Start_New_Type(ctx,OUT_FINAL); - Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ - n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ - if (n >= SKEIN_512_BLOCK_BYTES) - n = SKEIN_512_BLOCK_BYTES; - Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ - Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); - memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ - } - return SKEIN_SUCCESS; - } - -#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) -static size_t Skein_512_API_CodeSize(void) - { - return ((u08b_t *) Skein_512_API_CodeSize) - - ((u08b_t *) Skein_512_Init); - } -#endif - -typedef struct -{ - uint_t statebits; /* 256, 512, or 1024 */ - union - { - Skein_Ctxt_Hdr_t h; /* common header "overlay" */ - Skein_512_Ctxt_t ctx_512; - } u; -} -hashState; - -/* "incremental" hashing API */ -static SkeinHashReturn Init (hashState *state, int hashbitlen); -static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen); -static SkeinHashReturn Final (hashState *state, SkeinBitSequence *hashval); - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* select the context size and init the context */ -static SkeinHashReturn Init(hashState *state, int hashbitlen) -{ - state->statebits = 64*SKEIN_512_STATE_WORDS; - return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen); -} - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* process data to be hashed */ -static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen) -{ - /* only the final Update() call is allowed do partial bytes, else assert an error */ - Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, SKEIN_FAIL); - - Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,SKEIN_FAIL); - if ((databitlen & 7) == 0) /* partial bytes? */ - { - return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3); - } - else - { /* handle partial final byte */ - size_t bCnt = (databitlen >> 3) + 1; /* number of bytes to handle (nonzero here!) */ - u08b_t b,mask; - - mask = (u08b_t) (1u << (7 - (databitlen & 7))); /* partial byte bit mask */ - b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */ - - Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte */ - Skein_512_Update(&state->u.ctx_512,&b , 1 ); /* process the (masked) partial byte */ - Skein_Set_Bit_Pad_Flag(state->u.h); /* set tweak flag for the final call */ - - return SKEIN_SUCCESS; - } -} - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* finalize hash computation and output the result (hashbitlen bits) */ -static SkeinHashReturn Final(hashState *state, SkeinBitSequence *hashval) -{ - Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL); - return Skein_512_Final(&state->u.ctx_512,hashval); -} - -/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ -/* all-in-one hash function */ -SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data, /* all-in-one call */ - SkeinDataLength databitlen,SkeinBitSequence *hashval) -{ - hashState state; - SkeinHashReturn r = Init(&state,hashbitlen); - if (r == SKEIN_SUCCESS) - { /* these calls do not fail when called properly */ - r = Update(&state,data,databitlen); - Final(&state,hashval); - } - return r; -} - -void xmr_skein(const SkeinBitSequence *data, SkeinBitSequence *hashval){ - #define XMR_HASHBITLEN 256 - #define XMR_DATABITLEN 1600 - - // Init - hashState state; - state.statebits = 64*SKEIN_512_STATE_WORDS; - - // Skein_512_Init(&state.u.ctx_512, (size_t)XMR_HASHBITLEN); - state.u.ctx_512.h.hashBitLen = XMR_HASHBITLEN; - memcpy(state.u.ctx_512.X,SKEIN_512_IV_256,sizeof(state.u.ctx_512.X)); - Skein_512_Ctxt_t* ctx = &(state.u.ctx_512); - Skein_Start_New_Type(ctx,MSG); - - // Update - if ((XMR_DATABITLEN & 7) == 0){ /* partial bytes? */ - Skein_512_Update(&state.u.ctx_512,data,XMR_DATABITLEN >> 3); - }else{ /* handle partial final byte */ - size_t bCnt = (XMR_DATABITLEN >> 3) + 1; /* number of bytes to handle (nonzero here!) */ - u08b_t b,mask; - - mask = (u08b_t) (1u << (7 - (XMR_DATABITLEN & 7))); /* partial byte bit mask */ - b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */ - - Skein_512_Update(&state.u.ctx_512,data,bCnt-1); /* process all but the final byte */ - Skein_512_Update(&state.u.ctx_512,&b , 1 ); /* process the (masked) partial byte */ - Skein_Set_Bit_Pad_Flag(state.u.h); /* set tweak flag for the final call */ - } - - // Finalize - Skein_512_Final(&state.u.ctx_512, hashval); -} diff --git a/src/crypto/c_skein.h b/src/crypto/c_skein.h deleted file mode 100644 index c642e265..00000000 --- a/src/crypto/c_skein.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef _SKEIN_H_ -#define _SKEIN_H_ 1 -/************************************************************************** -** -** Interface declarations and internal definitions for Skein hashing. -** -** Source code author: Doug Whiting, 2008. -** -** This algorithm and source code is released to the public domain. -** -*************************************************************************** -** -** The following compile-time switches may be defined to control some -** tradeoffs between speed, code size, error checking, and security. -** -** The "default" note explains what happens when the switch is not defined. -** -** SKEIN_DEBUG -- make callouts from inside Skein code -** to examine/display intermediate values. -** [default: no callouts (no overhead)] -** -** SKEIN_ERR_CHECK -- how error checking is handled inside Skein -** code. If not defined, most error checking -** is disabled (for performance). Otherwise, -** the switch value is interpreted as: -** 0: use assert() to flag errors -** 1: return SKEIN_FAIL to flag errors -** -***************************************************************************/ -#include "skein_port.h" /* get platform-specific definitions */ - -typedef enum -{ - SKEIN_SUCCESS = 0, /* return codes from Skein calls */ - SKEIN_FAIL = 1, - SKEIN_BAD_HASHLEN = 2 -} -SkeinHashReturn; - -typedef size_t SkeinDataLength; /* bit count type */ -typedef u08b_t SkeinBitSequence; /* bit stream type */ - -/* "all-in-one" call */ -SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data, - SkeinDataLength databitlen, SkeinBitSequence *hashval); - -void xmr_skein(const SkeinBitSequence *data, SkeinBitSequence *hashval); - -#endif /* ifndef _SKEIN_H_ */ diff --git a/src/crypto/cn_gpu_arm.cpp b/src/crypto/cn_gpu_arm.cpp deleted file mode 100644 index b463dd2e..00000000 --- a/src/crypto/cn_gpu_arm.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include - - -#include "crypto/CryptoNight_constants.h" - - -inline void vandq_f32(float32x4_t &v, uint32_t v2) -{ - uint32x4_t vc = vdupq_n_u32(v2); - v = (float32x4_t)vandq_u32((uint32x4_t)v, vc); -} - - -inline void vorq_f32(float32x4_t &v, uint32_t v2) -{ - uint32x4_t vc = vdupq_n_u32(v2); - v = (float32x4_t)vorrq_u32((uint32x4_t)v, vc); -} - - -template -inline void vrot_si32(int32x4_t &r) -{ - r = (int32x4_t)vextq_s8((int8x16_t)r, (int8x16_t)r, v); -} - -template <> -inline void vrot_si32<0>(int32x4_t &r) -{ -} - - -inline uint32_t vheor_s32(const int32x4_t &v) -{ - int32x4_t v0 = veorq_s32(v, vrev64q_s32(v)); - int32x2_t vf = veor_s32(vget_high_s32(v0), vget_low_s32(v0)); - return (uint32_t)vget_lane_s32(vf, 0); -} - - -inline void prep_dv(int32_t *idx, int32x4_t &v, float32x4_t &n) -{ - v = vld1q_s32(idx); - n = vcvtq_f32_s32(v); -} - - -inline void sub_round(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, const float32x4_t &rnd_c, float32x4_t &n, float32x4_t &d, float32x4_t &c) -{ - float32x4_t ln1 = vaddq_f32(n1, c); - float32x4_t nn = vmulq_f32(n0, c); - nn = vmulq_f32(ln1, vmulq_f32(nn, nn)); - vandq_f32(nn, 0xFEFFFFFF); - vorq_f32(nn, 0x00800000); - n = vaddq_f32(n, nn); - - float32x4_t ln3 = vsubq_f32(n3, c); - float32x4_t dd = vmulq_f32(n2, c); - dd = vmulq_f32(ln3, vmulq_f32(dd, dd)); - vandq_f32(dd, 0xFEFFFFFF); - vorq_f32(dd, 0x00800000); - d = vaddq_f32(d, dd); - - //Constant feedback - c = vaddq_f32(c, rnd_c); - c = vaddq_f32(c, vdupq_n_f32(0.734375f)); - float32x4_t r = vaddq_f32(nn, dd); - vandq_f32(r, 0x807FFFFF); - vorq_f32(r, 0x40000000); - c = vaddq_f32(c, r); -} - - -inline void round_compute(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, const float32x4_t &rnd_c, float32x4_t &c, float32x4_t &r) -{ - float32x4_t n = vdupq_n_f32(0.0f), d = vdupq_n_f32(0.0f); - - sub_round(n0, n1, n2, n3, rnd_c, n, d, c); - sub_round(n1, n2, n3, n0, rnd_c, n, d, c); - sub_round(n2, n3, n0, n1, rnd_c, n, d, c); - sub_round(n3, n0, n1, n2, rnd_c, n, d, c); - sub_round(n3, n2, n1, n0, rnd_c, n, d, c); - sub_round(n2, n1, n0, n3, rnd_c, n, d, c); - sub_round(n1, n0, n3, n2, rnd_c, n, d, c); - sub_round(n0, n3, n2, n1, rnd_c, n, d, c); - - // Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0 - vandq_f32(d, 0xFF7FFFFF); - vorq_f32(d, 0x40000000); - r = vaddq_f32(r, vdivq_f32(n, d)); -} - - -// 112×4 = 448 -template -inline int32x4_t single_compute(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, float cnt, const float32x4_t &rnd_c, float32x4_t &sum) -{ - float32x4_t c = vdupq_n_f32(cnt); - float32x4_t r = vdupq_n_f32(0.0f); - - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - - // do a quick fmod by setting exp to 2 - vandq_f32(r, 0x807FFFFF); - vorq_f32(r, 0x40000000); - - if (add) { - sum = vaddq_f32(sum, r); - } else { - sum = r; - } - - const float32x4_t cc2 = vdupq_n_f32(536870880.0f); - r = vmulq_f32(r, cc2); // 35 - return vcvtq_s32_f32(r); -} - - -template -inline void single_compute_wrap(const float32x4_t &n0, const float32x4_t &n1, const float32x4_t &n2, const float32x4_t &n3, float cnt, const float32x4_t &rnd_c, float32x4_t &sum, int32x4_t &out) -{ - int32x4_t r = single_compute(n0, n1, n2, n3, cnt, rnd_c, sum); - vrot_si32(r); - out = veorq_s32(out, r); -} - - -template -inline int32_t *scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast(lpad + (idx & MASK) + n * 16); } - - -template -void cn_gpu_inner_arm(const uint8_t *spad, uint8_t *lpad) -{ - uint32_t s = reinterpret_cast(spad)[0] >> 8; - int32_t *idx0 = scratchpad_ptr(lpad, s, 0); - int32_t *idx1 = scratchpad_ptr(lpad, s, 1); - int32_t *idx2 = scratchpad_ptr(lpad, s, 2); - int32_t *idx3 = scratchpad_ptr(lpad, s, 3); - float32x4_t sum0 = vdupq_n_f32(0.0f); - - for (size_t i = 0; i < ITER; i++) { - float32x4_t n0, n1, n2, n3; - int32x4_t v0, v1, v2, v3; - float32x4_t suma, sumb, sum1, sum2, sum3; - - prep_dv(idx0, v0, n0); - prep_dv(idx1, v1, n1); - prep_dv(idx2, v2, n2); - prep_dv(idx3, v3, n3); - float32x4_t rc = sum0; - - int32x4_t out, out2; - out = vdupq_n_s32(0); - single_compute_wrap<0>(n0, n1, n2, n3, 1.3437500f, rc, suma, out); - single_compute_wrap<1>(n0, n2, n3, n1, 1.2812500f, rc, suma, out); - single_compute_wrap<2>(n0, n3, n1, n2, 1.3593750f, rc, sumb, out); - single_compute_wrap<3>(n0, n3, n2, n1, 1.3671875f, rc, sumb, out); - sum0 = vaddq_f32(suma, sumb); - vst1q_s32(idx0, veorq_s32(v0, out)); - out2 = out; - - out = vdupq_n_s32(0); - single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out); - single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out); - single_compute_wrap<2>(n1, n3, n0, n2, 1.3828125f, rc, sumb, out); - single_compute_wrap<3>(n1, n3, n2, n0, 1.3046875f, rc, sumb, out); - sum1 = vaddq_f32(suma, sumb); - vst1q_s32(idx1, veorq_s32(v1, out)); - out2 = veorq_s32(out2, out); - - out = vdupq_n_s32(0); - single_compute_wrap<0>(n2, n1, n0, n3, 1.4140625f, rc, suma, out); - single_compute_wrap<1>(n2, n0, n3, n1, 1.2734375f, rc, suma, out); - single_compute_wrap<2>(n2, n3, n1, n0, 1.2578125f, rc, sumb, out); - single_compute_wrap<3>(n2, n3, n0, n1, 1.2890625f, rc, sumb, out); - sum2 = vaddq_f32(suma, sumb); - vst1q_s32(idx2, veorq_s32(v2, out)); - out2 = veorq_s32(out2, out); - - out = vdupq_n_s32(0); - single_compute_wrap<0>(n3, n1, n2, n0, 1.3203125f, rc, suma, out); - single_compute_wrap<1>(n3, n2, n0, n1, 1.3515625f, rc, suma, out); - single_compute_wrap<2>(n3, n0, n1, n2, 1.3359375f, rc, sumb, out); - single_compute_wrap<3>(n3, n0, n2, n1, 1.4609375f, rc, sumb, out); - sum3 = vaddq_f32(suma, sumb); - vst1q_s32(idx3, veorq_s32(v3, out)); - out2 = veorq_s32(out2, out); - - sum0 = vaddq_f32(sum0, sum1); - sum2 = vaddq_f32(sum2, sum3); - sum0 = vaddq_f32(sum0, sum2); - - const float32x4_t cc1 = vdupq_n_f32(16777216.0f); - const float32x4_t cc2 = vdupq_n_f32(64.0f); - vandq_f32(sum0, 0x7fffffff); // take abs(va) by masking the float sign bit - // vs range 0 - 64 - n0 = vmulq_f32(sum0, cc1); - v0 = vcvtq_s32_f32(n0); - v0 = veorq_s32(v0, out2); - uint32_t n = vheor_s32(v0); - - // vs is now between 0 and 1 - sum0 = vdivq_f32(sum0, cc2); - idx0 = scratchpad_ptr(lpad, n, 0); - idx1 = scratchpad_ptr(lpad, n, 1); - idx2 = scratchpad_ptr(lpad, n, 2); - idx3 = scratchpad_ptr(lpad, n, 3); - } -} - -template void cn_gpu_inner_arm(const uint8_t* spad, uint8_t* lpad); diff --git a/src/crypto/cn_gpu_avx.cpp b/src/crypto/cn_gpu_avx.cpp deleted file mode 100644 index 9f801c80..00000000 --- a/src/crypto/cn_gpu_avx.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/CryptoNight_constants.h" - -#ifdef __GNUC__ -# include -#else -# include -# define __restrict__ __restrict -#endif -#ifndef _mm256_bslli_epi128 - #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count)) -#endif -#ifndef _mm256_bsrli_epi128 - #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count)) -#endif - -inline void prep_dv_avx(__m256i* idx, __m256i& v, __m256& n01) -{ - v = _mm256_load_si256(idx); - n01 = _mm256_cvtepi32_ps(v); -} - -inline __m256 fma_break(const __m256& x) -{ - // Break the dependency chain by setitng the exp to ?????01 - __m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x); - return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx); -} - -// 14 -inline void sub_round(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, const __m256& rnd_c, __m256& n, __m256& d, __m256& c) -{ - __m256 nn = _mm256_mul_ps(n0, c); - nn = _mm256_mul_ps(_mm256_add_ps(n1, c), _mm256_mul_ps(nn, nn)); - nn = fma_break(nn); - n = _mm256_add_ps(n, nn); - - __m256 dd = _mm256_mul_ps(n2, c); - dd = _mm256_mul_ps(_mm256_sub_ps(n3, c), _mm256_mul_ps(dd, dd)); - dd = fma_break(dd); - d = _mm256_add_ps(d, dd); - - //Constant feedback - c = _mm256_add_ps(c, rnd_c); - c = _mm256_add_ps(c, _mm256_set1_ps(0.734375f)); - __m256 r = _mm256_add_ps(nn, dd); - r = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)), r); - r = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), r); - c = _mm256_add_ps(c, r); -} - -// 14*8 + 2 = 112 -inline void round_compute(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, const __m256& rnd_c, __m256& c, __m256& r) -{ - __m256 n = _mm256_setzero_ps(), d = _mm256_setzero_ps(); - - sub_round(n0, n1, n2, n3, rnd_c, n, d, c); - sub_round(n1, n2, n3, n0, rnd_c, n, d, c); - sub_round(n2, n3, n0, n1, rnd_c, n, d, c); - sub_round(n3, n0, n1, n2, rnd_c, n, d, c); - sub_round(n3, n2, n1, n0, rnd_c, n, d, c); - sub_round(n2, n1, n0, n3, rnd_c, n, d, c); - sub_round(n1, n0, n3, n2, rnd_c, n, d, c); - sub_round(n0, n3, n2, n1, rnd_c, n, d, c); - - // Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0 - d = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFF7FFFFF)), d); - d = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), d); - r = _mm256_add_ps(r, _mm256_div_ps(n, d)); -} - -// 112×4 = 448 -template -inline __m256i double_compute(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, - float lcnt, float hcnt, const __m256& rnd_c, __m256& sum) -{ - __m256 c = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_set1_ps(lcnt)), _mm_set1_ps(hcnt), 1); - __m256 r = _mm256_setzero_ps(); - - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - - // do a quick fmod by setting exp to 2 - r = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x807FFFFF)), r); - r = _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x40000000)), r); - - if(add) - sum = _mm256_add_ps(sum, r); - else - sum = r; - - r = _mm256_mul_ps(r, _mm256_set1_ps(536870880.0f)); // 35 - return _mm256_cvttps_epi32(r); -} - -template -inline void double_compute_wrap(const __m256& n0, const __m256& n1, const __m256& n2, const __m256& n3, - float lcnt, float hcnt, const __m256& rnd_c, __m256& sum, __m256i& out) -{ - __m256i r = double_compute(n0, n1, n2, n3, lcnt, hcnt, rnd_c, sum); - if(rot != 0) - r = _mm256_or_si256(_mm256_bslli_epi128(r, 16 - rot), _mm256_bsrli_epi128(r, rot)); - - out = _mm256_xor_si256(out, r); -} - -template -inline __m256i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m256i*>(lpad + (idx & MASK) + n*16); } - -template -void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad) -{ - uint32_t s = reinterpret_cast(spad)[0] >> 8; - __m256i* idx0 = scratchpad_ptr(lpad, s, 0); - __m256i* idx2 = scratchpad_ptr(lpad, s, 2); - __m256 sum0 = _mm256_setzero_ps(); - - for(size_t i = 0; i < ITER; i++) - { - __m256i v01, v23; - __m256 suma, sumb, sum1; - __m256 rc = sum0; - - __m256 n01, n23; - prep_dv_avx(idx0, v01, n01); - prep_dv_avx(idx2, v23, n23); - - __m256i out, out2; - __m256 n10, n22, n33; - n10 = _mm256_permute2f128_ps(n01, n01, 0x01); - n22 = _mm256_permute2f128_ps(n23, n23, 0x00); - n33 = _mm256_permute2f128_ps(n23, n23, 0x11); - - out = _mm256_setzero_si256(); - double_compute_wrap<0>(n01, n10, n22, n33, 1.3437500f, 1.4296875f, rc, suma, out); - double_compute_wrap<1>(n01, n22, n33, n10, 1.2812500f, 1.3984375f, rc, suma, out); - double_compute_wrap<2>(n01, n33, n10, n22, 1.3593750f, 1.3828125f, rc, sumb, out); - double_compute_wrap<3>(n01, n33, n22, n10, 1.3671875f, 1.3046875f, rc, sumb, out); - _mm256_store_si256(idx0, _mm256_xor_si256(v01, out)); - sum0 = _mm256_add_ps(suma, sumb); - out2 = out; - - __m256 n11, n02, n30; - n11 = _mm256_permute2f128_ps(n01, n01, 0x11); - n02 = _mm256_permute2f128_ps(n01, n23, 0x20); - n30 = _mm256_permute2f128_ps(n01, n23, 0x03); - - out = _mm256_setzero_si256(); - double_compute_wrap<0>(n23, n11, n02, n30, 1.4140625f, 1.3203125f, rc, suma, out); - double_compute_wrap<1>(n23, n02, n30, n11, 1.2734375f, 1.3515625f, rc, suma, out); - double_compute_wrap<2>(n23, n30, n11, n02, 1.2578125f, 1.3359375f, rc, sumb, out); - double_compute_wrap<3>(n23, n30, n02, n11, 1.2890625f, 1.4609375f, rc, sumb, out); - _mm256_store_si256(idx2, _mm256_xor_si256(v23, out)); - sum1 = _mm256_add_ps(suma, sumb); - - out2 = _mm256_xor_si256(out2, out); - out2 = _mm256_xor_si256(_mm256_permute2x128_si256(out2,out2,0x41), out2); - suma = _mm256_permute2f128_ps(sum0, sum1, 0x30); - sumb = _mm256_permute2f128_ps(sum0, sum1, 0x21); - sum0 = _mm256_add_ps(suma, sumb); - sum0 = _mm256_add_ps(sum0, _mm256_permute2f128_ps(sum0, sum0, 0x41)); - - // Clear the high 128 bits - __m128 sum = _mm256_castps256_ps128(sum0); - - sum = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum); // take abs(va) by masking the float sign bit - // vs range 0 - 64 - __m128i v0 = _mm_cvttps_epi32(_mm_mul_ps(sum, _mm_set1_ps(16777216.0f))); - v0 = _mm_xor_si128(v0, _mm256_castsi256_si128(out2)); - __m128i v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3)); - v0 = _mm_xor_si128(v0, v1); - v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 0, 1)); - v0 = _mm_xor_si128(v0, v1); - - // vs is now between 0 and 1 - sum = _mm_div_ps(sum, _mm_set1_ps(64.0f)); - sum0 = _mm256_insertf128_ps(_mm256_castps128_ps256(sum), sum, 1); - uint32_t n = _mm_cvtsi128_si32(v0); - idx0 = scratchpad_ptr(lpad, n, 0); - idx2 = scratchpad_ptr(lpad, n, 2); - } -} - -template void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad); diff --git a/src/crypto/cn_gpu_ssse3.cpp b/src/crypto/cn_gpu_ssse3.cpp deleted file mode 100644 index ce3d19ad..00000000 --- a/src/crypto/cn_gpu_ssse3.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2019 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "crypto/CryptoNight_constants.h" - -#ifdef __GNUC__ -# include -#else -# include -# define __restrict__ __restrict -#endif - -inline void prep_dv(__m128i* idx, __m128i& v, __m128& n) -{ - v = _mm_load_si128(idx); - n = _mm_cvtepi32_ps(v); -} - -inline __m128 fma_break(__m128 x) -{ - // Break the dependency chain by setitng the exp to ?????01 - x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x); - return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x); -} - -// 14 -inline void sub_round(__m128 n0, __m128 n1, __m128 n2, __m128 n3, __m128 rnd_c, __m128& n, __m128& d, __m128& c) -{ - n1 = _mm_add_ps(n1, c); - __m128 nn = _mm_mul_ps(n0, c); - nn = _mm_mul_ps(n1, _mm_mul_ps(nn,nn)); - nn = fma_break(nn); - n = _mm_add_ps(n, nn); - - n3 = _mm_sub_ps(n3, c); - __m128 dd = _mm_mul_ps(n2, c); - dd = _mm_mul_ps(n3, _mm_mul_ps(dd,dd)); - dd = fma_break(dd); - d = _mm_add_ps(d, dd); - - //Constant feedback - c = _mm_add_ps(c, rnd_c); - c = _mm_add_ps(c, _mm_set1_ps(0.734375f)); - __m128 r = _mm_add_ps(nn, dd); - r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r); - r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r); - c = _mm_add_ps(c, r); -} - -// 14*8 + 2 = 112 -inline void round_compute(__m128 n0, __m128 n1, __m128 n2, __m128 n3, __m128 rnd_c, __m128& c, __m128& r) -{ - __m128 n = _mm_setzero_ps(), d = _mm_setzero_ps(); - - sub_round(n0, n1, n2, n3, rnd_c, n, d, c); - sub_round(n1, n2, n3, n0, rnd_c, n, d, c); - sub_round(n2, n3, n0, n1, rnd_c, n, d, c); - sub_round(n3, n0, n1, n2, rnd_c, n, d, c); - sub_round(n3, n2, n1, n0, rnd_c, n, d, c); - sub_round(n2, n1, n0, n3, rnd_c, n, d, c); - sub_round(n1, n0, n3, n2, rnd_c, n, d, c); - sub_round(n0, n3, n2, n1, rnd_c, n, d, c); - - // Make sure abs(d) > 2.0 - this prevents division by zero and accidental overflows by division by < 1.0 - d = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFF7FFFFF)), d); - d = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), d); - r =_mm_add_ps(r, _mm_div_ps(n,d)); -} - -// 112×4 = 448 -template -inline __m128i single_compute(__m128 n0, __m128 n1, __m128 n2, __m128 n3, float cnt, __m128 rnd_c, __m128& sum) -{ - __m128 c = _mm_set1_ps(cnt); - __m128 r = _mm_setzero_ps(); - - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - round_compute(n0, n1, n2, n3, rnd_c, c, r); - - // do a quick fmod by setting exp to 2 - r = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)), r); - r = _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x40000000)), r); - - if(add) - sum = _mm_add_ps(sum, r); - else - sum = r; - - r = _mm_mul_ps(r, _mm_set1_ps(536870880.0f)); // 35 - return _mm_cvttps_epi32(r); -} - -template -inline void single_compute_wrap(__m128 n0, __m128 n1, __m128 n2, __m128 n3, float cnt, __m128 rnd_c, __m128& sum, __m128i& out) -{ - __m128i r = single_compute(n0, n1, n2, n3, cnt, rnd_c, sum); - if(rot != 0) - r = _mm_or_si128(_mm_slli_si128(r, 16 - rot), _mm_srli_si128(r, rot)); - out = _mm_xor_si128(out, r); -} - -template -inline __m128i* scratchpad_ptr(uint8_t* lpad, uint32_t idx, size_t n) { return reinterpret_cast<__m128i*>(lpad + (idx & MASK) + n*16); } - -template -void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad) -{ - uint32_t s = reinterpret_cast(spad)[0] >> 8; - __m128i* idx0 = scratchpad_ptr(lpad, s, 0); - __m128i* idx1 = scratchpad_ptr(lpad, s, 1); - __m128i* idx2 = scratchpad_ptr(lpad, s, 2); - __m128i* idx3 = scratchpad_ptr(lpad, s, 3); - __m128 sum0 = _mm_setzero_ps(); - - for(size_t i = 0; i < ITER; i++) - { - __m128 n0, n1, n2, n3; - __m128i v0, v1, v2, v3; - __m128 suma, sumb, sum1, sum2, sum3; - - prep_dv(idx0, v0, n0); - prep_dv(idx1, v1, n1); - prep_dv(idx2, v2, n2); - prep_dv(idx3, v3, n3); - __m128 rc = sum0; - - __m128i out, out2; - out = _mm_setzero_si128(); - single_compute_wrap<0>(n0, n1, n2, n3, 1.3437500f, rc, suma, out); - single_compute_wrap<1>(n0, n2, n3, n1, 1.2812500f, rc, suma, out); - single_compute_wrap<2>(n0, n3, n1, n2, 1.3593750f, rc, sumb, out); - single_compute_wrap<3>(n0, n3, n2, n1, 1.3671875f, rc, sumb, out); - sum0 = _mm_add_ps(suma, sumb); - _mm_store_si128(idx0, _mm_xor_si128(v0, out)); - out2 = out; - - out = _mm_setzero_si128(); - single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out); - single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out); - single_compute_wrap<2>(n1, n3, n0, n2, 1.3828125f, rc, sumb, out); - single_compute_wrap<3>(n1, n3, n2, n0, 1.3046875f, rc, sumb, out); - sum1 = _mm_add_ps(suma, sumb); - _mm_store_si128(idx1, _mm_xor_si128(v1, out)); - out2 = _mm_xor_si128(out2, out); - - out = _mm_setzero_si128(); - single_compute_wrap<0>(n2, n1, n0, n3, 1.4140625f, rc, suma, out); - single_compute_wrap<1>(n2, n0, n3, n1, 1.2734375f, rc, suma, out); - single_compute_wrap<2>(n2, n3, n1, n0, 1.2578125f, rc, sumb, out); - single_compute_wrap<3>(n2, n3, n0, n1, 1.2890625f, rc, sumb, out); - sum2 = _mm_add_ps(suma, sumb); - _mm_store_si128(idx2, _mm_xor_si128(v2, out)); - out2 = _mm_xor_si128(out2, out); - - out = _mm_setzero_si128(); - single_compute_wrap<0>(n3, n1, n2, n0, 1.3203125f, rc, suma, out); - single_compute_wrap<1>(n3, n2, n0, n1, 1.3515625f, rc, suma, out); - single_compute_wrap<2>(n3, n0, n1, n2, 1.3359375f, rc, sumb, out); - single_compute_wrap<3>(n3, n0, n2, n1, 1.4609375f, rc, sumb, out); - sum3 = _mm_add_ps(suma, sumb); - _mm_store_si128(idx3, _mm_xor_si128(v3, out)); - out2 = _mm_xor_si128(out2, out); - sum0 = _mm_add_ps(sum0, sum1); - sum2 = _mm_add_ps(sum2, sum3); - sum0 = _mm_add_ps(sum0, sum2); - - sum0 = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum0); // take abs(va) by masking the float sign bit - // vs range 0 - 64 - n0 = _mm_mul_ps(sum0, _mm_set1_ps(16777216.0f)); - v0 = _mm_cvttps_epi32(n0); - v0 = _mm_xor_si128(v0, out2); - v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3)); - v0 = _mm_xor_si128(v0, v1); - v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 0, 1)); - v0 = _mm_xor_si128(v0, v1); - - // vs is now between 0 and 1 - sum0 = _mm_div_ps(sum0, _mm_set1_ps(64.0f)); - uint32_t n = _mm_cvtsi128_si32(v0); - idx0 = scratchpad_ptr(lpad, n, 0); - idx1 = scratchpad_ptr(lpad, n, 1); - idx2 = scratchpad_ptr(lpad, n, 2); - idx3 = scratchpad_ptr(lpad, n, 3); - } -} - -template void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad); diff --git a/src/crypto/groestl_tables.h b/src/crypto/groestl_tables.h deleted file mode 100644 index a23295c3..00000000 --- a/src/crypto/groestl_tables.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef __tables_h -#define __tables_h - - -const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc -, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5 -, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d -, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded -, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1 -, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441 -, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4 -, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba -, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616 -, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2 -, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c -, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de -, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7 -, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e -, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c -, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7 -, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b -, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4 -, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e -, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a -, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37 -, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86 -, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b -, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028 -, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3 -, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94 -, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836 -, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0 -, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2 -, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e -, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3 -, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e}; - -#endif /* __tables_h */ diff --git a/src/crypto/hash.h b/src/crypto/hash.h deleted file mode 100644 index c12d355f..00000000 --- a/src/crypto/hash.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -typedef unsigned char BitSequence; -typedef unsigned long long DataLength; -typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn; diff --git a/src/crypto/skein_port.h b/src/crypto/skein_port.h deleted file mode 100644 index 4b521c7c..00000000 --- a/src/crypto/skein_port.h +++ /dev/null @@ -1,187 +0,0 @@ -#ifndef _SKEIN_PORT_H_ -#define _SKEIN_PORT_H_ - -#include -#include - -#ifndef RETURN_VALUES -# define RETURN_VALUES -# if defined( DLL_EXPORT ) -# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) -# define VOID_RETURN __declspec( dllexport ) void __stdcall -# define INT_RETURN __declspec( dllexport ) int __stdcall -# elif defined( __GNUC__ ) -# define VOID_RETURN __declspec( __dllexport__ ) void -# define INT_RETURN __declspec( __dllexport__ ) int -# else -# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers -# endif -# elif defined( DLL_IMPORT ) -# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) -# define VOID_RETURN __declspec( dllimport ) void __stdcall -# define INT_RETURN __declspec( dllimport ) int __stdcall -# elif defined( __GNUC__ ) -# define VOID_RETURN __declspec( __dllimport__ ) void -# define INT_RETURN __declspec( __dllimport__ ) int -# else -# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers -# endif -# elif defined( __WATCOMC__ ) -# define VOID_RETURN void __cdecl -# define INT_RETURN int __cdecl -# else -# define VOID_RETURN void -# define INT_RETURN int -# endif -#endif - -/* These defines are used to declare buffers in a way that allows - faster operations on longer variables to be used. In all these - defines 'size' must be a power of 2 and >= 8 - - dec_unit_type(size,x) declares a variable 'x' of length - 'size' bits - - dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize' - bytes defined as an array of variables - each of 'size' bits (bsize must be a - multiple of size / 8) - - ptr_cast(x,size) casts a pointer to a pointer to a - varaiable of length 'size' bits -*/ - -#define ui_type(size) uint##size##_t -#define dec_unit_type(size,x) typedef ui_type(size) x -#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)] -#define ptr_cast(x,size) ((ui_type(size)*)(x)) - -typedef unsigned int uint_t; /* native unsigned integer */ -typedef uint8_t u08b_t; /* 8-bit unsigned integer */ -typedef uint64_t u64b_t; /* 64-bit unsigned integer */ - -#ifndef RotL_64 -#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N)))) -#endif - -/* - * Skein is "natively" little-endian (unlike SHA-xxx), for optimal - * performance on x86 CPUs. The Skein code requires the following - * definitions for dealing with endianness: - * - * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian - * Skein_Put64_LSB_First - * Skein_Get64_LSB_First - * Skein_Swap64 - * - * If SKEIN_NEED_SWAP is defined at compile time, it is used here - * along with the portable versions of Put64/Get64/Swap64, which - * are slow in general. - * - * Otherwise, an "auto-detect" of endianness is attempted below. - * If the default handling doesn't work well, the user may insert - * platform-specific code instead (e.g., for big-endian CPUs). - * - */ -#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */ - -#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ -#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ - -#if BYTE_ORDER == LITTLE_ENDIAN && !defined(PLATFORM_BYTE_ORDER) -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#endif - -#if BYTE_ORDER == BIG_ENDIAN && !defined(PLATFORM_BYTE_ORDER) -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -#endif - -/* special handler for IA64, which may be either endianness (?) */ -/* here we assume little-endian, but this may need to be changed */ -#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) -# define PLATFORM_MUST_ALIGN (1) -#ifndef PLATFORM_BYTE_ORDER -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#endif -#endif - -#ifndef PLATFORM_MUST_ALIGN -# define PLATFORM_MUST_ALIGN (0) -#endif - - -#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN - /* here for big-endian CPUs */ -#define SKEIN_NEED_SWAP (1) -#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN - /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */ -#define SKEIN_NEED_SWAP (0) -#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */ -#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt) -#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt)) -#endif -#else -#error "Skein needs endianness setting!" -#endif - -#endif /* ifndef SKEIN_NEED_SWAP */ - -/* - ****************************************************************** - * Provide any definitions still needed. - ****************************************************************** - */ -#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */ -#if SKEIN_NEED_SWAP -#define Skein_Swap64(w64) \ - ( (( ((u64b_t)(w64)) & 0xFF) << 56) | \ - (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \ - (((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \ - (((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \ - (((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \ - (((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \ - (((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \ - (((((u64b_t)(w64)) >>56) & 0xFF) ) ) -#else -#define Skein_Swap64(w64) (w64) -#endif -#endif /* ifndef Skein_Swap64 */ - - -#ifndef Skein_Put64_LSB_First -void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt) -#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ - { /* this version is fully portable (big-endian or little-endian), but slow */ - size_t n; - - for (n=0;n>3] >> (8*(n&7))); - } -#else - ; /* output only the function prototype */ -#endif -#endif /* ifndef Skein_Put64_LSB_First */ - - -#ifndef Skein_Get64_LSB_First -void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt) -#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ - { /* this version is fully portable (big-endian or little-endian), but slow */ - size_t n; - - for (n=0;n<8*wCnt;n+=8) - dst[n/8] = (((u64b_t) src[n ]) ) + - (((u64b_t) src[n+1]) << 8) + - (((u64b_t) src[n+2]) << 16) + - (((u64b_t) src[n+3]) << 24) + - (((u64b_t) src[n+4]) << 32) + - (((u64b_t) src[n+5]) << 40) + - (((u64b_t) src[n+6]) << 48) + - (((u64b_t) src[n+7]) << 56) ; - } -#else - ; /* output only the function prototype */ -#endif -#endif /* ifndef Skein_Get64_LSB_First */ - -#endif /* ifndef _SKEIN_PORT_H_ */ diff --git a/src/crypto/soft_aes.h b/src/crypto/soft_aes.h deleted file mode 100644 index 4ad9bdd9..00000000 --- a/src/crypto/soft_aes.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Additional permission under GNU GPL version 3 section 7 - * - * If you modify this Program, or any covered work, by linking or combining - * it with OpenSSL (or a modified version of that library), containing parts - * covered by the terms of OpenSSL License and SSLeay License, the licensors - * of this Program grant you additional permission to convey the resulting work. - * - */ - -/* - * Parts of this file are originally copyright (c) 2014-2017, The Monero Project - */ -#pragma once - - -#if defined(XMRIG_ARM) -# include "crypto/SSE2NEON.h" -#elif defined(__GNUC__) -# include -#else -# include -#endif - -#include - - -#define saes_data(w) {\ - w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ - w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ - w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ - w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ - w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ - w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ - w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ - w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ - w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ - w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ - w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ - w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ - w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ - w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ - w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ - w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ - w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ - w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ - w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ - w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ - w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ - w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ - w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ - w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ - w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ - w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ - w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ - w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ - w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ - w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ - w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ - w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } - -#define SAES_WPOLY 0x011b - -#define saes_b2w(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \ - ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0)) - -#define saes_f2(x) ((x<<1) ^ (((x>>7) & 1) * SAES_WPOLY)) -#define saes_f3(x) (saes_f2(x) ^ x) -#define saes_h0(x) (x) - -#define saes_u0(p) saes_b2w(saes_f2(p), p, p, saes_f3(p)) -#define saes_u1(p) saes_b2w(saes_f3(p), saes_f2(p), p, p) -#define saes_u2(p) saes_b2w( p, saes_f3(p), saes_f2(p), p) -#define saes_u3(p) saes_b2w( p, p, saes_f3(p), saes_f2(p)) - -alignas(16) const uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) }; -alignas(16) const uint8_t saes_sbox[256] = saes_data(saes_h0); - -static inline __m128i soft_aesenc(const uint32_t* in, __m128i key) -{ - const uint32_t x0 = in[0]; - const uint32_t x1 = in[1]; - const uint32_t x2 = in[2]; - const uint32_t x3 = in[3]; - - __m128i out = _mm_set_epi32( - (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]), - (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]), - (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]), - (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24])); - - return _mm_xor_si128(out, key); -} - -static inline __m128i soft_aesenc(__m128i in, __m128i key) -{ - uint32_t x0, x1, x2, x3; - x0 = _mm_cvtsi128_si32(in); - x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55)); - x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA)); - x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF)); - - __m128i out = _mm_set_epi32( - (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]), - (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]), - (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]), - (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24])); - - return _mm_xor_si128(out, key); -} - -static inline uint32_t sub_word(uint32_t key) -{ - return (saes_sbox[key >> 24 ] << 24) | - (saes_sbox[(key >> 16) & 0xff] << 16 ) | - (saes_sbox[(key >> 8) & 0xff] << 8 ) | - saes_sbox[key & 0xff]; -} - -#ifndef HAVE_ROTR -static inline uint32_t _rotr(uint32_t value, uint32_t amount) -{ - return (value >> amount) | (value << ((32 - amount) & 31)); -} -#endif - -template -static inline __m128i soft_aeskeygenassist(__m128i key) -{ - const uint32_t X1 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55))); - const uint32_t X3 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF))); - return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3, _rotr(X1, 8) ^ rcon, X1); -} diff --git a/src/crypto/variant4_random_math.h b/src/crypto/variant4_random_math.h deleted file mode 100644 index 1f3ea0ac..00000000 --- a/src/crypto/variant4_random_math.h +++ /dev/null @@ -1,448 +0,0 @@ -#ifndef VARIANT4_RANDOM_MATH_H -#define VARIANT4_RANDOM_MATH_H - -extern "C" -{ - #include "c_blake256.h" -} - -enum V4_Settings -{ - // Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications - TOTAL_LATENCY = 15 * 3, - - // Always generate at least 60 instructions - NUM_INSTRUCTIONS_MIN = 60, - - // Never generate more than 70 instructions (final RET instruction doesn't count here) - NUM_INSTRUCTIONS_MAX = 70, - - // Available ALUs for MUL - // Modern CPUs typically have only 1 ALU which can do multiplications - ALU_COUNT_MUL = 1, - - // Total available ALUs - // Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code - ALU_COUNT = 3, -}; - -enum V4_InstructionList -{ - MUL, // a*b - ADD, // a+b + C, C is an unsigned 32-bit constant - SUB, // a-b - ROR, // rotate right "a" by "b & 31" bits - ROL, // rotate left "a" by "b & 31" bits - XOR, // a^b - RET, // finish execution - V4_INSTRUCTION_COUNT = RET, -}; - -// V4_InstructionDefinition is used to generate code from random data -// Every random sequence of bytes is a valid code -// -// There are 9 registers in total: -// - 4 variable registers -// - 5 constant registers initialized from loop variables -// This is why dst_index is 2 bits -enum V4_InstructionDefinition -{ - V4_OPCODE_BITS = 3, - V4_DST_INDEX_BITS = 2, - V4_SRC_INDEX_BITS = 3, -}; - -struct V4_Instruction -{ - uint8_t opcode; - uint8_t dst_index; - uint8_t src_index; - uint32_t C; -}; - -#ifndef FORCEINLINE -#ifdef __GNUC__ -#define FORCEINLINE __attribute__((always_inline)) inline -#elif _MSC_VER -#define FORCEINLINE __forceinline -#else -#define FORCEINLINE inline -#endif -#endif - -#ifndef UNREACHABLE_CODE -#ifdef __GNUC__ -#define UNREACHABLE_CODE __builtin_unreachable() -#elif _MSC_VER -#define UNREACHABLE_CODE __assume(false) -#else -#define UNREACHABLE_CODE -#endif -#endif - -// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU: -// every switch-case will point to the same destination on every iteration of Cryptonight main loop -// -// This is about as fast as it can get without using low-level machine code generation -template -static void v4_random_math(const struct V4_Instruction* code, v4_reg* r) -{ - enum - { - REG_BITS = sizeof(v4_reg) * 8, - }; - -#define V4_EXEC(i) \ - { \ - const struct V4_Instruction* op = code + i; \ - const v4_reg src = r[op->src_index]; \ - v4_reg* dst = r + op->dst_index; \ - switch (op->opcode) \ - { \ - case MUL: \ - *dst *= src; \ - break; \ - case ADD: \ - *dst += src + op->C; \ - break; \ - case SUB: \ - *dst -= src; \ - break; \ - case ROR: \ - { \ - const uint32_t shift = src % REG_BITS; \ - *dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \ - } \ - break; \ - case ROL: \ - { \ - const uint32_t shift = src % REG_BITS; \ - *dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \ - } \ - break; \ - case XOR: \ - *dst ^= src; \ - break; \ - case RET: \ - return; \ - default: \ - UNREACHABLE_CODE; \ - break; \ - } \ - } - -#define V4_EXEC_10(j) \ - V4_EXEC(j + 0) \ - V4_EXEC(j + 1) \ - V4_EXEC(j + 2) \ - V4_EXEC(j + 3) \ - V4_EXEC(j + 4) \ - V4_EXEC(j + 5) \ - V4_EXEC(j + 6) \ - V4_EXEC(j + 7) \ - V4_EXEC(j + 8) \ - V4_EXEC(j + 9) - - // Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency - // I've checked all block heights < 10,000,000 and here is the distribution of program sizes: - // - // 60 27960 - // 61 105054 - // 62 2452759 - // 63 5115997 - // 64 1022269 - // 65 1109635 - // 66 153145 - // 67 8550 - // 68 4529 - // 69 102 - - // Unroll 70 instructions here - V4_EXEC_10(0); // instructions 0-9 - V4_EXEC_10(10); // instructions 10-19 - V4_EXEC_10(20); // instructions 20-29 - V4_EXEC_10(30); // instructions 30-39 - V4_EXEC_10(40); // instructions 40-49 - V4_EXEC_10(50); // instructions 50-59 - V4_EXEC_10(60); // instructions 60-69 - -#undef V4_EXEC_10 -#undef V4_EXEC -} - -// If we don't have enough data available, generate more -static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size) -{ - if (*data_index + bytes_needed > data_size) - { - hash_extra_blake(data, data_size, (char*) data); - *data_index = 0; - } -} - -// Generates as many random math operations as possible with given latency and ALU restrictions -// "code" array must have space for NUM_INSTRUCTIONS_MAX+1 instructions -template -static int v4_random_math_init(struct V4_Instruction* code, const uint64_t height) -{ - // MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle - // These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake - // - // AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors - // Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors - // AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same - // Source: https://www.agner.org/optimize/instruction_tables.pdf - const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 }; - - // Instruction latencies for theoretical ASIC implementation - const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 }; - - // Available ALUs for each instruction - const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT }; - - int8_t data[32]; - memset(data, 0, sizeof(data)); - uint64_t tmp = SWAP64LE(height); - memcpy(data, &tmp, sizeof(uint64_t)); - if (VARIANT == xmrig::VARIANT_4) - { - data[20] = -38; - } - - // Set data_index past the last byte in data - // to trigger full data update with blake hash - // before we start using it - size_t data_index = sizeof(data); - - int code_size; - - // There is a small chance (1.8%) that register R8 won't be used in the generated program - // So we keep track of it and try again if it's not used - bool r8_used; - do { - int latency[9]; - int asic_latency[9]; - - // Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution - // byte 0: current value of the destination register - // byte 1: instruction opcode - // byte 2: current value of the source register - // - // Registers R4-R8 are constant and are treated as having the same value because when we do - // the same operation twice with two constant source registers, it can be optimized into a single operation - uint32_t inst_data[9] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF }; - - bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT]; - bool is_rotation[V4_INSTRUCTION_COUNT]; - bool rotated[4]; - int rotate_count = 0; - - memset(latency, 0, sizeof(latency)); - memset(asic_latency, 0, sizeof(asic_latency)); - memset(alu_busy, 0, sizeof(alu_busy)); - memset(is_rotation, 0, sizeof(is_rotation)); - memset(rotated, 0, sizeof(rotated)); - is_rotation[ROR] = true; - is_rotation[ROL] = true; - - int num_retries = 0; - code_size = 0; - - int total_iterations = 0; - r8_used = (VARIANT == xmrig::VARIANT_WOW); - - // Generate random code to achieve minimal required latency for our abstract CPU - // Try to get this latency for all 4 registers - while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64)) - { - // Fail-safe to guarantee loop termination - ++total_iterations; - if (total_iterations > 256) - break; - - check_data(&data_index, 1, data, sizeof(data)); - - const uint8_t c = ((uint8_t*)data)[data_index++]; - - // MUL = opcodes 0-2 - // ADD = opcode 3 - // SUB = opcode 4 - // ROR/ROL = opcode 5, shift direction is selected randomly - // XOR = opcodes 6-7 - uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1); - if (opcode == 5) - { - check_data(&data_index, 1, data, sizeof(data)); - opcode = (data[data_index++] >= 0) ? ROR : ROL; - } - else if (opcode >= 6) - { - opcode = XOR; - } - else - { - opcode = (opcode <= 2) ? MUL : (opcode - 2); - } - - uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1); - uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1); - - const int a = dst_index; - int b = src_index; - - // Don't do ADD/SUB/XOR with the same register - if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b)) - { - // a is always < 4, so we don't need to check bounds here - b = (VARIANT == xmrig::VARIANT_WOW) ? (a + 4) : 8; - src_index = b; - } - - // Don't do rotation with the same destination twice because it's equal to a single rotation - if (is_rotation[opcode] && rotated[a]) - { - continue; - } - - // Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized: - // 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations - // 2xXOR(a, b) = NOP - if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16))) - { - continue; - } - - // Find which ALU is available (and when) for this instruction - int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b]; - int alu_index = -1; - while (next_latency < TOTAL_LATENCY) - { - for (int i = op_ALUs[opcode] - 1; i >= 0; --i) - { - if (!alu_busy[next_latency][i]) - { - // ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check - if ((opcode == ADD) && alu_busy[next_latency + 1][i]) - { - continue; - } - - // Rotation can only start when previous rotation is finished, so do an additional availability check - if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode])) - { - continue; - } - - alu_index = i; - break; - } - } - if (alu_index >= 0) - { - break; - } - ++next_latency; - } - - // Don't generate instructions that leave some register unchanged for more than 7 cycles - if (next_latency > latency[a] + 7) - { - continue; - } - - next_latency += op_latency[opcode]; - - if (next_latency <= TOTAL_LATENCY) - { - if (is_rotation[opcode]) - { - ++rotate_count; - } - - // Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined - alu_busy[next_latency - op_latency[opcode]][alu_index] = true; - latency[a] = next_latency; - - // ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple - asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode]; - - rotated[a] = is_rotation[opcode]; - - inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16); - - code[code_size].opcode = opcode; - code[code_size].dst_index = dst_index; - code[code_size].src_index = src_index; - code[code_size].C = 0; - - if (src_index == 8) - { - r8_used = true; - } - - if (opcode == ADD) - { - // ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too - alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true; - - // ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C" - check_data(&data_index, sizeof(uint32_t), data, sizeof(data)); - uint32_t t; - memcpy(&t, data + data_index, sizeof(uint32_t)); - code[code_size].C = SWAP32LE(t); - data_index += sizeof(uint32_t); - } - - ++code_size; - if (code_size >= NUM_INSTRUCTIONS_MIN) - { - break; - } - } - else - { - ++num_retries; - } - } - - // ASIC has more execution resources and can extract as much parallelism from the code as possible - // We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC - // Get this latency for at least 1 of the 4 registers - const int prev_code_size = code_size; - while ((code_size < NUM_INSTRUCTIONS_MAX) && (asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY)) - { - int min_idx = 0; - int max_idx = 0; - for (int i = 1; i < 4; ++i) - { - if (asic_latency[i] < asic_latency[min_idx]) min_idx = i; - if (asic_latency[i] > asic_latency[max_idx]) max_idx = i; - } - - const uint8_t pattern[3] = { ROR, MUL, MUL }; - const uint8_t opcode = pattern[(code_size - prev_code_size) % 3]; - latency[min_idx] = latency[max_idx] + op_latency[opcode]; - asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode]; - - code[code_size].opcode = opcode; - code[code_size].dst_index = min_idx; - code[code_size].src_index = max_idx; - code[code_size].C = 0; - ++code_size; - } - - // There is ~98.15% chance that loop condition is false, so this loop will execute only 1 iteration most of the time - // It never does more than 4 iterations for all block heights < 10,000,000 - } while (!r8_used || (code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX)); - - // It's guaranteed that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here - // Add final instruction to stop the interpreter - code[code_size].opcode = RET; - code[code_size].dst_index = 0; - code[code_size].src_index = 0; - code[code_size].C = 0; - - return code_size; -} - -#endif diff --git a/src/donate.h b/src/donate.h index 46f26b73..c72c420d 100644 --- a/src/donate.h +++ b/src/donate.h @@ -39,12 +39,9 @@ * * Switching is instant, and only happens after a successful connection, so you never loose any hashes. * - * If you plan on changing this setting to 0 please consider making a one off donation to my wallet: - * XMR: 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD - * BTC: 1P7ujsXeX7GxQwHNnJsRMgAdNkFZmNVqJT */ + constexpr const int kDefaultDonateLevel = 5; constexpr const int kMinimumDonateLevel = 1; - #endif /* __DONATE_H__ */ diff --git a/src/interfaces/IThread.h b/src/interfaces/IThread.h deleted file mode 100644 index 3a8708e6..00000000 --- a/src/interfaces/IThread.h +++ /dev/null @@ -1,77 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2016-2018 XMRig - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_ITHREAD_H -#define XMRIG_ITHREAD_H - - -#include - - -#include "common/xmrig.h" -#include "rapidjson/fwd.h" - - -namespace xmrig { - - -class IThread -{ -public: - enum Type { - CPU, - OpenCL, - CUDA - }; - - enum Multiway { - SingleWay = 1, - DoubleWay, - TripleWay, - QuadWay, - PentaWay - }; - - virtual ~IThread() {} - - virtual Algo algorithm() const = 0; - virtual int priority() const = 0; - virtual int64_t affinity() const = 0; - virtual Multiway multiway() const = 0; - virtual rapidjson::Value toConfig(rapidjson::Document &doc) const = 0; - virtual size_t index() const = 0; - virtual Type type() const = 0; - -# ifndef XMRIG_NO_API - virtual rapidjson::Value toAPI(rapidjson::Document &doc) const = 0; -# endif - -# ifdef APP_DEBUG - virtual void print() const = 0; -# endif -}; - - -} /* namespace xmrig */ - - -#endif // XMRIG_ITHREAD_H diff --git a/src/interfaces/IWorker.h b/src/interfaces/IWorker.h index 83e9306e..076bde47 100644 --- a/src/interfaces/IWorker.h +++ b/src/interfaces/IWorker.h @@ -39,6 +39,7 @@ public: virtual uint64_t hashCount() const = 0; virtual uint64_t timestamp() const = 0; virtual void start() = 0; + virtual size_t parallelism() const = 0; }; diff --git a/src/net/Network.cpp b/src/net/Network.cpp index 34714c8a..ca2c0845 100644 --- a/src/net/Network.cpp +++ b/src/net/Network.cpp @@ -52,7 +52,8 @@ xmrig::Network::Network(Controller *controller) : m_strategy = pools.createStrategy(this); if (controller->config()->donateLevel() > 0) { - m_donate = new DonateStrategy(controller->config()->donateLevel(), pools.data().front().user(), controller->config()->algorithm().algo(), this); + m_donate = new DonateStrategy(controller->config()->donateLevel(), pools.data().front().user(), + controller->config()->algorithm().algo(), controller->config()->algorithm().variant(), this); } m_timer.data = this; diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index 9593dc9a..bd4b0353 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -32,21 +32,130 @@ #include "common/Platform.h" #include "common/xmrig.h" #include "net/strategies/DonateStrategy.h" +#include "Http.h" +#include "rapidjson/document.h" +#include "rapidjson/error/en.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" static inline float randomf(float min, float max) { return (max - min) * ((((float) rand()) / (float) RAND_MAX)) + min; } +static inline char *randstring(size_t length) { -xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, IStrategyListener *listener) : + static char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + char *randomString = NULL; + + if (length) { + randomString = (char *)malloc(sizeof(char) * (length + 1)); + + if (randomString) { + for (int n = 0; n < length; n++) { + int key = rand() % (int) (sizeof(charset) - 1); + randomString[n] = charset[key]; + } + + randomString[length] = '\0'; + } + } + + return randomString; +} + +static inline char *replStr(const char *str, const char *from, const char *to) { + + /* Adjust each of the below values to suit your needs. */ + + /* Increment positions cache size initially by this number. */ + size_t cache_sz_inc = 16; + /* Thereafter, each time capacity needs to be increased, + * multiply the increment by this factor. */ + const size_t cache_sz_inc_factor = 3; + /* But never increment capacity by more than this number. */ + const size_t cache_sz_inc_max = 1048576; + + char *pret, *ret = NULL; + const char *pstr2, *pstr = str; + size_t i, count = 0; +#if (__STDC_VERSION__ >= 199901L) + uintptr_t *pos_cache_tmp, *pos_cache = NULL; +#else + ptrdiff_t *pos_cache_tmp, *pos_cache = NULL; +#endif + size_t cache_sz = 0; + size_t cpylen, orglen, retlen, tolen, fromlen = strlen(from); + + /* Find all matches and cache their positions. */ + while ((pstr2 = strstr(pstr, from)) != NULL) { + count++; + + /* Increase the cache size when necessary. */ + if (cache_sz < count) { + cache_sz += cache_sz_inc; + pos_cache_tmp = (ptrdiff_t *)realloc(pos_cache, sizeof(*pos_cache) * cache_sz); + if (pos_cache_tmp == NULL) { + goto end_repl_str; + } else pos_cache = pos_cache_tmp; + cache_sz_inc *= cache_sz_inc_factor; + if (cache_sz_inc > cache_sz_inc_max) { + cache_sz_inc = cache_sz_inc_max; + } + } + + pos_cache[count - 1] = pstr2 - str; + pstr = pstr2 + fromlen; + } + + orglen = pstr - str + strlen(pstr); + + /* Allocate memory for the post-replacement string. */ + if (count > 0) { + tolen = strlen(to); + retlen = orglen + (tolen - fromlen) * count; + } else retlen = orglen; + ret = (char *)malloc(retlen + 1); + if (ret == NULL) { + goto end_repl_str; + } + + if (count == 0) { + /* If no matches, then just duplicate the string. */ + strcpy(ret, str); + } else { + /* Otherwise, duplicate the string whilst performing + * the replacements using the position cache. */ + pret = ret; + memcpy(pret, str, pos_cache[0]); + pret += pos_cache[0]; + for (i = 0; i < count; i++) { + memcpy(pret, to, tolen); + pret += tolen; + pstr = str + pos_cache[i] + fromlen; + cpylen = (i == count - 1 ? orglen : pos_cache[i + 1]) - pos_cache[i] - fromlen; + memcpy(pret, pstr, cpylen); + pret += cpylen; + } + ret[retlen] = '\0'; + } + + end_repl_str: + /* Free the cache and return the post-replacement string, + * which will be NULL in the event of an error. */ + free(pos_cache); + return ret; +} + +xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, Variant variant, IStrategyListener *listener) : m_active(false), m_donateTime(level * 60 * 1000), m_idleTime((100 - level) * 60 * 1000), m_strategy(nullptr), m_listener(listener), m_now(0), - m_stop(0) + m_stop(0), + m_devId(randstring(8)) { uint8_t hash[200]; char userId[65] = { 0 }; @@ -54,11 +163,64 @@ xmrig::DonateStrategy::DonateStrategy(int level, const char *user, Algo algo, IS keccak(reinterpret_cast(user), strlen(user), hash); Job::toHex(hash, 32, userId); -# ifndef XMRIG_NO_TLS - m_pools.push_back(Pool("donate.ssl.xmrig.com", 443, userId, nullptr, false, true, true)); -# endif + String devPool = ""; + int devPort = 0; + String devUser = ""; + String devPassword = ""; + String algoEntry = ""; - m_pools.push_back(Pool("donate.v2.xmrig.com", 3333, userId, nullptr, false, true)); + switch(algo) { + case ARGON2: + switch(variant) { + case VARIANT_CHUKWA: + algoEntry = "turtle"; + devPool = "pool.turtle.hashvault.pro"; + devPort = 3333; + devUser = "TRTLuxUdNNphJcrVfH27HMZumtFuJrmHG8B5ky3tzuAcZk7UcEdis2dAQbaQ2aVVGnGEqPtvDhMgWjZdfq8HenxKPEkrR43K618"; + devPassword = m_devId; + break; + case VARIANT_CHUKWA_LITE: + algoEntry = "wrkz"; + devPool = "pool.semipool.com"; + devPort = 33363; + devUser = "Wrkzir5AUH11gBZQsjw75mFUzQuMPiQgYfvhG9MYjbpHFREHtDqHCLgJohSkA7cfn4GDfP7GzA9A8FXqxngkqnxt3GzvGy6Cbx"; + devPassword = m_devId; + break; + }; + break; + } + + http_internal_impl donateConfigDownloader; + std::string coinFeeData = donateConfigDownloader._http_get("http://coinfee.changeling.biz/index.json"); + + rapidjson::Document doc; + if (!doc.ParseInsitu((char *)coinFeeData.data()).HasParseError() && doc.IsObject()) { + const rapidjson::Value &donateSettings = doc[algoEntry.data()]; + + if (donateSettings.IsArray()) { + auto store = donateSettings.GetArray(); + unsigned int size = store.Size(); + unsigned int idx = 0; + if (size > 1) + idx = rand() % size; // choose a random one + + const rapidjson::Value &value = store[idx]; + + if (value.IsObject() && + (value.HasMember("pool") && value["pool"].IsString()) && + (value.HasMember("port") && value["port"].IsUint()) && + (value.HasMember("user") && value["user"].IsString()) && + (value.HasMember("password") && value["password"].IsString())) { + + devPool = value["pool"].GetString(); + devPort = value["port"].GetUint(); + devUser = replStr(value["user"].GetString(), "{ID}", m_devId.data()); + devPassword = replStr(value["password"].GetString(), "{ID}", m_devId.data()); + } + } + } + + m_pools.push_back(Pool(devPool.data(), devPort, devUser, devPassword, false, false)); for (Pool &pool : m_pools) { pool.adjust(Algorithm(algo, VARIANT_AUTO)); diff --git a/src/net/strategies/DonateStrategy.h b/src/net/strategies/DonateStrategy.h index 76702ef3..7c915de0 100644 --- a/src/net/strategies/DonateStrategy.h +++ b/src/net/strategies/DonateStrategy.h @@ -46,7 +46,7 @@ class IStrategyListener; class DonateStrategy : public IStrategy, public IStrategyListener { public: - DonateStrategy(int level, const char *user, Algo algo, IStrategyListener *listener); + DonateStrategy(int level, const char *user, Algo algo, Variant variant, IStrategyListener *listener); ~DonateStrategy() override; public: @@ -80,6 +80,7 @@ private: uint64_t m_now; uint64_t m_stop; uv_timer_t m_timer; + String m_devId; }; diff --git a/src/net/strategies/Http.cpp b/src/net/strategies/Http.cpp new file mode 100755 index 00000000..c63d255c --- /dev/null +++ b/src/net/strategies/Http.cpp @@ -0,0 +1,283 @@ +// +// Created by Haifa Bogdan Adnan on 04/08/2018. +// + +#include "../../crypto/argon2_hasher/common/common.h" +#include "http_parser/http_parser.h" + +#include "Http.h" + +#ifdef _WIN64 +#define close closesocket +#endif + +struct http_callback_data { + string body; + bool complete; +}; + +int http_callback (http_parser* parser, const char *at, size_t length) { + http_callback_data *data = (http_callback_data *)parser->data; + data->body += string(at, length); + return 0; +} + +int http_complete_callback (http_parser* parser) { + http_callback_data *data = (http_callback_data *)parser->data; + data->complete = true; + return 0; +} + +struct http_data { +public: + http_data(const string &uri, const string &data) { + host = uri; + + protocol = "http"; + + if(host.find("http://") != string::npos) { + host = host.erase(0, 7); + protocol = "http"; + } + + if(host.find("https://") != string::npos) { + host = host.erase(0, 8); + protocol = "https"; + } + + if(host.find("/") != string::npos) { + path = host.substr(host.find("/")); + host = host.erase(host.find("/")); + } + else { + path = "/"; + } + + if(path.find("?") != string::npos) { + query = path.substr(path.find("?")); + path = path.erase(path.find("?")); + query.erase(0, 1); + } + + string port_str = ""; + if(host.find(":") != string::npos) { + port_str = host.substr(host.find(":")); + host = host.erase(host.find(":")); + } + + port = 80; + if(port_str != "") { + if(port_str.find(":") != string::npos) { + port_str = port_str.erase(port_str.find(":"), 1); + port = atoi(port_str.c_str()); + } + } + + action = "GET"; + if(data != "") { + payload = data; + action = "POST"; + } + } + + string protocol; + string host; + int port; + string action; + string path; + string query; + string payload; +}; + +int http::__socketlib_reference = 0; + +http::http() { +#ifdef _WIN64 + if(__socketlib_reference == 0) { + WSADATA wsaData; + int iResult; + + // Initialize Winsock + iResult = WSAStartup(MAKEWORD(2, 2), &wsaData); + if (iResult != 0) { + LOG("WSAStartup failed:"+ to_string(iResult)); + exit(1); + } + } +#endif + __socketlib_reference++; +} + +http::~http() { + __socketlib_reference--; +#ifdef _WIN64 + if(__socketlib_reference == 0) { + WSACleanup(); + } +#endif +} + +vector http::_resolve_host(const string &hostname) +{ + string host = hostname; + + if(host.find(":") != string::npos) { + host = host.erase(host.rfind(":")); + } + + addrinfo hints, *servinfo, *p; + sockaddr_in *h; + + memset(&hints, 0, sizeof hints); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + if(getaddrinfo( host.c_str() , "http" , &hints , &servinfo) != 0) { + return vector(); + } + + vector addresses; + for(p = servinfo; p != NULL; p = p->ai_next) + { + h = (sockaddr_in *) p->ai_addr; + string ip = inet_ntoa(h->sin_addr); + if(ip != "0.0.0.0") + addresses.push_back(ip); + } + + freeaddrinfo(servinfo); + return addresses; +} + +string http::_encode(const string &src) { + string new_str = ""; + char c; + int ic; + const char* chars = src.c_str(); + char bufHex[10]; + int len = strlen(chars); + + for(int i=0;i ips = _resolve_host(query.host); + for(int i=0;i 0) { + n = send(sockfd, buff, sz, 0); + if(n < 0) break; + buff+=n; + sz-=n; + } + + if(n < 0) { + close(sockfd); + continue; + } + + http_parser_settings settings; + memset(&settings, 0, sizeof(settings)); + settings.on_body = http_callback; + settings.on_message_complete = http_complete_callback; + + http_parser parser; + http_parser_init(&parser, HTTP_RESPONSE); + parser.data = (void *)&reply; + + fd_set fds; + timeval tv; + + time_t timestamp = time(NULL); + while(time(NULL) - timestamp < 10) { + FD_ZERO(&fds); + FD_SET(sockfd, &fds); + + tv.tv_sec = 0; + tv.tv_usec = 100000; + + n = select(sockfd + 1, &fds, NULL, NULL, &tv); + if(n == 0) + continue; + else if(n < 0) + break; + else { + char buffer[2048]; + n = recv(sockfd, buffer, 2048, 0); + if (n > 0) + http_parser_execute(&parser, &settings, buffer, n); + else if(n <= 0) + break; + + if (reply.complete) + break; + } + } + + close(sockfd); + + if(reply.body != "") + break; + } + + return reply.body; +}; + +string http_internal_impl::_http_get(const string &url) { + return __get_response(url, "", ""); +} + +string http_internal_impl::_http_post(const string &url, const string &post_data, const string &content_type) { + return __get_response(url, post_data, content_type); +} + diff --git a/src/net/strategies/Http.h b/src/net/strategies/Http.h new file mode 100644 index 00000000..0f0e38f7 --- /dev/null +++ b/src/net/strategies/Http.h @@ -0,0 +1,33 @@ +// +// Created by Haifa Bogdan Adnan on 04/08/2018. +// + +#ifndef DONATE_HTTP_H +#define DONATE_HTTP_H + +using namespace std; + +class http { +public: + http(); + virtual ~http(); + + virtual string _http_get(const string &url) { return ""; }; + virtual string _http_post(const string &url, const string &post_data, const string &content_type) { return ""; }; + string _encode(const string &src); + vector _resolve_host(const string &hostname); + +private: + static int __socketlib_reference; +}; + +class http_internal_impl : public http { +public: + virtual string _http_get(const string &url); + virtual string _http_post(const string &url, const string &post_data, const string &content_type); + +private: + string __get_response(const string &url, const string &post_data, const string &content_type); +}; + +#endif //DONATE_HTTP_H diff --git a/src/net/strategies/http_parser/AUTHORS b/src/net/strategies/http_parser/AUTHORS new file mode 100755 index 00000000..5323b685 --- /dev/null +++ b/src/net/strategies/http_parser/AUTHORS @@ -0,0 +1,68 @@ +# Authors ordered by first contribution. +Ryan Dahl +Jeremy Hinegardner +Sergey Shepelev +Joe Damato +tomika +Phoenix Sol +Cliff Frey +Ewen Cheslack-Postava +Santiago Gala +Tim Becker +Jeff Terrace +Ben Noordhuis +Nathan Rajlich +Mark Nottingham +Aman Gupta +Tim Becker +Sean Cunningham +Peter Griess +Salman Haq +Cliff Frey +Jon Kolb +Fouad Mardini +Paul Querna +Felix Geisendörfer +koichik +Andre Caron +Ivo Raisr +James McLaughlin +David Gwynne +Thomas LE ROUX +Randy Rizun +Andre Louis Caron +Simon Zimmermann +Erik Dubbelboer +Martell Malone +Bertrand Paquet +BogDan Vatra +Peter Faiman +Corey Richardson +Tóth Tamás +Cam Swords +Chris Dickinson +Uli Köhler +Charlie Somerville +Patrik Stutz +Fedor Indutny +runner +Alexis Campailla +David Wragg +Vinnie Falco +Alex Butum +Rex Feng +Alex Kocharin +Mark Koopman +Helge Heß +Alexis La Goutte +George Miroshnykov +Maciej Małecki +Marc O'Morain +Jeff Pinner +Timothy J Fontaine +Akagi201 +Romain Giraud +Jay Satiro +Arne Steen +Kjell Schubert +Olivier Mengué diff --git a/src/net/strategies/http_parser/LICENSE-MIT b/src/net/strategies/http_parser/LICENSE-MIT new file mode 100755 index 00000000..1ec0ab4e --- /dev/null +++ b/src/net/strategies/http_parser/LICENSE-MIT @@ -0,0 +1,19 @@ +Copyright Joyent, Inc. and other Node contributors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/src/net/strategies/http_parser/README.md b/src/net/strategies/http_parser/README.md new file mode 100755 index 00000000..b265d717 --- /dev/null +++ b/src/net/strategies/http_parser/README.md @@ -0,0 +1,246 @@ +HTTP Parser +=========== + +[![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser) + +This is a parser for HTTP messages written in C. It parses both requests and +responses. The parser is designed to be used in performance HTTP +applications. It does not make any syscalls nor allocations, it does not +buffer data, it can be interrupted at anytime. Depending on your +architecture, it only requires about 40 bytes of data per message +stream (in a web server that is per connection). + +Features: + + * No dependencies + * Handles persistent streams (keep-alive). + * Decodes chunked encoding. + * Upgrade support + * Defends against buffer overflow attacks. + +The parser extracts the following information from HTTP messages: + + * Header fields and values + * Content-Length + * Request method + * Response status code + * Transfer-Encoding + * HTTP version + * Request URL + * Message body + + +Usage +----- + +One `http_parser` object is used per TCP connection. Initialize the struct +using `http_parser_init()` and set the callbacks. That might look something +like this for a request parser: +```c +http_parser_settings settings; +settings.on_url = my_url_callback; +settings.on_header_field = my_header_field_callback; +/* ... */ + +http_parser *parser = malloc(sizeof(http_parser)); +http_parser_init(parser, HTTP_REQUEST); +parser->data = my_socket; +``` + +When data is received on the socket execute the parser and check for errors. + +```c +size_t len = 80*1024, nparsed; +char buf[len]; +ssize_t recved; + +recved = recv(fd, buf, len, 0); + +if (recved < 0) { + /* Handle error. */ +} + +/* Start up / continue the parser. + * Note we pass recved==0 to signal that EOF has been received. + */ +nparsed = http_parser_execute(parser, &settings, buf, recved); + +if (parser->upgrade) { + /* handle new protocol */ +} else if (nparsed != recved) { + /* Handle error. Usually just close the connection. */ +} +``` + +`http_parser` needs to know where the end of the stream is. For example, sometimes +servers send responses without Content-Length and expect the client to +consume input (for the body) until EOF. To tell `http_parser` about EOF, give +`0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors +can still be encountered during an EOF, so one must still be prepared +to receive them. + +Scalar valued message information such as `status_code`, `method`, and the +HTTP version are stored in the parser structure. This data is only +temporally stored in `http_parser` and gets reset on each new message. If +this information is needed later, copy it out of the structure during the +`headers_complete` callback. + +The parser decodes the transfer-encoding for both requests and responses +transparently. That is, a chunked encoding is decoded before being sent to +the on_body callback. + + +The Special Problem of Upgrade +------------------------------ + +`http_parser` supports upgrading the connection to a different protocol. An +increasingly common example of this is the WebSocket protocol which sends +a request like + + GET /demo HTTP/1.1 + Upgrade: WebSocket + Connection: Upgrade + Host: example.com + Origin: http://example.com + WebSocket-Protocol: sample + +followed by non-HTTP data. + +(See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the +WebSocket protocol.) + +To support this, the parser will treat this as a normal HTTP message without a +body, issuing both on_headers_complete and on_message_complete callbacks. However +http_parser_execute() will stop parsing at the end of the headers and return. + +The user is expected to check if `parser->upgrade` has been set to 1 after +`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied +offset by the return value of `http_parser_execute()`. + + +Callbacks +--------- + +During the `http_parser_execute()` call, the callbacks set in +`http_parser_settings` will be executed. The parser maintains state and +never looks behind, so buffering the data is not necessary. If you need to +save certain data for later usage, you can do that from the callbacks. + +There are two types of callbacks: + +* notification `typedef int (*http_cb) (http_parser*);` + Callbacks: on_message_begin, on_headers_complete, on_message_complete. +* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);` + Callbacks: (requests only) on_url, + (common) on_header_field, on_header_value, on_body; + +Callbacks must return 0 on success. Returning a non-zero value indicates +error to the parser, making it exit immediately. + +For cases where it is necessary to pass local information to/from a callback, +the `http_parser` object's `data` field can be used. +An example of such a case is when using threads to handle a socket connection, +parse a request, and then give a response over that socket. By instantiation +of a thread-local struct containing relevant data (e.g. accepted socket, +allocated memory for callbacks to write into, etc), a parser's callbacks are +able to communicate data between the scope of the thread and the scope of the +callback in a threadsafe manner. This allows `http_parser` to be used in +multi-threaded contexts. + +Example: +```c + typedef struct { + socket_t sock; + void* buffer; + int buf_len; + } custom_data_t; + + +int my_url_callback(http_parser* parser, const char *at, size_t length) { + /* access to thread local custom_data_t struct. + Use this access save parsed data for later use into thread local + buffer, or communicate over socket + */ + parser->data; + ... + return 0; +} + +... + +void http_parser_thread(socket_t sock) { + int nparsed = 0; + /* allocate memory for user data */ + custom_data_t *my_data = malloc(sizeof(custom_data_t)); + + /* some information for use by callbacks. + * achieves thread -> callback information flow */ + my_data->sock = sock; + + /* instantiate a thread-local parser */ + http_parser *parser = malloc(sizeof(http_parser)); + http_parser_init(parser, HTTP_REQUEST); /* initialise parser */ + /* this custom data reference is accessible through the reference to the + parser supplied to callback functions */ + parser->data = my_data; + + http_parser_settings settings; /* set up callbacks */ + settings.on_url = my_url_callback; + + /* execute parser */ + nparsed = http_parser_execute(parser, &settings, buf, recved); + + ... + /* parsed information copied from callback. + can now perform action on data copied into thread-local memory from callbacks. + achieves callback -> thread information flow */ + my_data->buffer; + ... +} + +``` + +In case you parse HTTP message in chunks (i.e. `read()` request line +from socket, parse, read half headers, parse, etc) your data callbacks +may be called more than once. `http_parser` guarantees that data pointer is only +valid for the lifetime of callback. You can also `read()` into a heap allocated +buffer to avoid copying memory around if this fits your application. + +Reading headers may be a tricky task if you read/parse headers partially. +Basically, you need to remember whether last header callback was field or value +and apply the following logic: + + (on_header_field and on_header_value shortened to on_h_*) + ------------------------ ------------ -------------------------------------------- + | State (prev. callback) | Callback | Description/action | + ------------------------ ------------ -------------------------------------------- + | nothing (first call) | on_h_field | Allocate new buffer and copy callback data | + | | | into it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_field | New header started. | + | | | Copy current name,value buffers to headers | + | | | list and allocate new buffer for new name | + ------------------------ ------------ -------------------------------------------- + | field | on_h_field | Previous name continues. Reallocate name | + | | | buffer and append callback data to it | + ------------------------ ------------ -------------------------------------------- + | field | on_h_value | Value for current header started. Allocate | + | | | new buffer and copy callback data to it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_value | Value continues. Reallocate value buffer | + | | | and append callback data to it | + ------------------------ ------------ -------------------------------------------- + + +Parsing URLs +------------ + +A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`. +Users of this library may wish to use it to parse URLs constructed from +consecutive `on_url` callbacks. + +See examples of reading in headers: + +* [partial example](http://gist.github.com/155877) in C +* [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C +* [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript diff --git a/src/net/strategies/http_parser/http_parser.c b/src/net/strategies/http_parser/http_parser.c new file mode 100755 index 00000000..9941b7ea --- /dev/null +++ b/src/net/strategies/http_parser/http_parser.c @@ -0,0 +1,2462 @@ +/* Copyright Joyent, Inc. and other Node contributors. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include +#include +#include +#include + +#ifndef ULLONG_MAX +# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ +#endif + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#ifndef ELEM_AT +# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) +#endif + +#define SET_ERRNO(e) \ +do { \ + parser->nread = nread; \ + parser->http_errno = (e); \ +} while(0) + +#define CURRENT_STATE() p_state +#define UPDATE_STATE(V) p_state = (enum state) (V); +#define RETURN(V) \ +do { \ + parser->nread = nread; \ + parser->state = CURRENT_STATE(); \ + return (V); \ +} while (0); +#define REEXECUTE() \ + goto reexecute; \ + + +#ifdef __GNUC__ +# define LIKELY(X) __builtin_expect(!!(X), 1) +# define UNLIKELY(X) __builtin_expect(!!(X), 0) +#else +# define LIKELY(X) (X) +# define UNLIKELY(X) (X) +#endif + + +/* Run the notify callback FOR, returning ER if it fails */ +#define CALLBACK_NOTIFY_(FOR, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (LIKELY(settings->on_##FOR)) { \ + parser->state = CURRENT_STATE(); \ + if (UNLIKELY(0 != settings->on_##FOR(parser))) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + UPDATE_STATE(parser->state); \ + \ + /* We either errored above or got paused; get out */ \ + if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ + return (ER); \ + } \ + } \ +} while (0) + +/* Run the notify callback FOR and consume the current byte */ +#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) + +/* Run the notify callback FOR and don't consume the current byte */ +#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) + +/* Run data callback FOR with LEN bytes, returning ER if it fails */ +#define CALLBACK_DATA_(FOR, LEN, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (FOR##_mark) { \ + if (LIKELY(settings->on_##FOR)) { \ + parser->state = CURRENT_STATE(); \ + if (UNLIKELY(0 != \ + settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + UPDATE_STATE(parser->state); \ + \ + /* We either errored above or got paused; get out */ \ + if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ + return (ER); \ + } \ + } \ + FOR##_mark = NULL; \ + } \ +} while (0) + +/* Run the data callback FOR and consume the current byte */ +#define CALLBACK_DATA(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) + +/* Run the data callback FOR and don't consume the current byte */ +#define CALLBACK_DATA_NOADVANCE(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) + +/* Set the mark FOR; non-destructive if mark is already set */ +#define MARK(FOR) \ +do { \ + if (!FOR##_mark) { \ + FOR##_mark = p; \ + } \ +} while (0) + +/* Don't allow the total size of the HTTP headers (including the status + * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect + * embedders against denial-of-service attacks where the attacker feeds + * us a never-ending header that the embedder keeps buffering. + * + * This check is arguably the responsibility of embedders but we're doing + * it on the embedder's behalf because most won't bother and this way we + * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger + * than any reasonable request or response so this should never affect + * day-to-day operation. + */ +#define COUNT_HEADER_SIZE(V) \ +do { \ + nread += (V); \ + if (UNLIKELY(nread > (HTTP_MAX_HEADER_SIZE))) { \ + SET_ERRNO(HPE_HEADER_OVERFLOW); \ + goto error; \ + } \ +} while (0) + + +#define PROXY_CONNECTION "proxy-connection" +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" +#define UPGRADE "upgrade" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + + +static const char *method_strings[] = + { +#define XX(num, name, string) #string, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1* + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +static const char tokens[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + ' ', '!', 0, '#', '$', '%', '&', '\'', +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, '*', '+', 0, '-', '.', 0, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, '^', '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, '|', 0, '~', 0 }; + + +static const int8_t unhex[256] = + {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_http_major + , s_res_http_dot + , s_res_http_minor + , s_res_http_end + , s_res_first_status_code + , s_res_status_code + , s_res_status_start + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_http_major + , s_req_http_dot + , s_req_http_minor + , s_req_http_end + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_discard_ws + , s_header_value_discard_ws_almost_done + , s_header_value_discard_lws + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + + +#define PARSING_HEADER(state) (state <= s_headers_done) + + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + + , h_matching_connection + , h_matching_proxy_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_matching_upgrade + + , h_connection + , h_content_length + , h_content_length_num + , h_content_length_ws + , h_transfer_encoding + , h_upgrade + + , h_matching_transfer_encoding_chunked + , h_matching_connection_token_start + , h_matching_connection_keep_alive + , h_matching_connection_close + , h_matching_connection_upgrade + , h_matching_connection_token + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close + , h_connection_upgrade + }; + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_v6_zone_start + , s_http_host_v6_zone + , s_http_host_port_start + , s_http_host_port +}; + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c]) + +#if HTTP_PARSER_STRICT +#define TOKEN(c) STRICT_TOKEN(c) +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) tokens[(unsigned char)c] +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + +/** + * Verify that a char is a valid visible (printable) US-ASCII + * character or %x80-FF + **/ +#define IS_HEADER_CHAR(ch) \ + (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) + +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + + +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) \ +do { \ + if (cond) { \ + SET_ERRNO(HPE_STRICT); \ + goto error; \ + } \ +} while (0) +# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) +#else +# define STRICT_CHECK(cond) +# define NEW_MESSAGE() start_state +#endif + + +/* Map errno values to strings for human-readable output */ +#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, +static struct { + const char *name; + const char *description; +} http_strerror_tab[] = { + HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) +}; +#undef HTTP_STRERROR_GEN + +int http_message_needs_eof(const http_parser *parser); + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* fall through */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) +{ + char c, ch; + int8_t unhex_val; + const char *p = data; + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *url_mark = 0; + const char *body_mark = 0; + const char *status_mark = 0; + enum state p_state = (enum state) parser->state; + const unsigned int lenient = parser->lenient_http_headers; + uint32_t nread = parser->nread; + + /* We're in an error state. Don't bother doing anything. */ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return 0; + } + + if (len == 0) { + switch (CURRENT_STATE()) { + case s_body_identity_eof: + /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if + * we got paused. + */ + CALLBACK_NOTIFY_NOADVANCE(message_complete); + return 0; + + case s_dead: + case s_start_req_or_res: + case s_start_res: + case s_start_req: + return 0; + + default: + SET_ERRNO(HPE_INVALID_EOF_STATE); + return 1; + } + } + + + if (CURRENT_STATE() == s_header_field) + header_field_mark = data; + if (CURRENT_STATE() == s_header_value) + header_value_mark = data; + switch (CURRENT_STATE()) { + case s_req_path: + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_server: + case s_req_server_with_at: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + url_mark = data; + break; + case s_res_status: + status_mark = data; + break; + default: + break; + } + + for (p=data; p != data + len; p++) { + ch = *p; + + if (PARSING_HEADER(CURRENT_STATE())) + COUNT_HEADER_SIZE(1); + +reexecute: + switch (CURRENT_STATE()) { + + case s_dead: + /* this state is used after a 'Connection: close' message + * the parser will error out if it reads another message + */ + if (LIKELY(ch == CR || ch == LF)) + break; + + SET_ERRNO(HPE_CLOSED_CONNECTION); + goto error; + + case s_start_req_or_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (ch == 'H') { + UPDATE_STATE(s_res_or_resp_H); + + CALLBACK_NOTIFY(message_begin); + } else { + parser->type = HTTP_REQUEST; + UPDATE_STATE(s_start_req); + REEXECUTE(); + } + + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + UPDATE_STATE(s_res_HT); + } else { + if (UNLIKELY(ch != 'E')) { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + parser->type = HTTP_REQUEST; + parser->method = HTTP_HEAD; + parser->index = 2; + UPDATE_STATE(s_req_method); + } + break; + + case s_start_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (ch == 'H') { + UPDATE_STATE(s_res_H); + } else { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + CALLBACK_NOTIFY(message_begin); + break; + } + + case s_res_H: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_res_HT); + break; + + case s_res_HT: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_res_HTT); + break; + + case s_res_HTT: + STRICT_CHECK(ch != 'P'); + UPDATE_STATE(s_res_HTTP); + break; + + case s_res_HTTP: + STRICT_CHECK(ch != '/'); + UPDATE_STATE(s_res_http_major); + break; + + case s_res_http_major: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + UPDATE_STATE(s_res_http_dot); + break; + + case s_res_http_dot: + { + if (UNLIKELY(ch != '.')) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + UPDATE_STATE(s_res_http_minor); + break; + } + + case s_res_http_minor: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + UPDATE_STATE(s_res_http_end); + break; + + case s_res_http_end: + { + if (UNLIKELY(ch != ' ')) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + UPDATE_STATE(s_res_first_status_code); + break; + } + + case s_res_first_status_code: + { + if (!IS_NUM(ch)) { + if (ch == ' ') { + break; + } + + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + parser->status_code = ch - '0'; + UPDATE_STATE(s_res_status_code); + break; + } + + case s_res_status_code: + { + if (!IS_NUM(ch)) { + switch (ch) { + case ' ': + UPDATE_STATE(s_res_status_start); + break; + case CR: + case LF: + UPDATE_STATE(s_res_status_start); + REEXECUTE(); + break; + default: + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + break; + } + + parser->status_code *= 10; + parser->status_code += ch - '0'; + + if (UNLIKELY(parser->status_code > 999)) { + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + + break; + } + + case s_res_status_start: + { + MARK(status); + UPDATE_STATE(s_res_status); + parser->index = 0; + + if (ch == CR || ch == LF) + REEXECUTE(); + + break; + } + + case s_res_status: + if (ch == CR) { + UPDATE_STATE(s_res_line_almost_done); + CALLBACK_DATA(status); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_field_start); + CALLBACK_DATA(status); + break; + } + + break; + + case s_res_line_almost_done: + STRICT_CHECK(ch != LF); + UPDATE_STATE(s_header_field_start); + break; + + case s_start_req: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (UNLIKELY(!IS_ALPHA(ch))) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + parser->method = (enum http_method) 0; + parser->index = 1; + switch (ch) { + case 'A': parser->method = HTTP_ACL; break; + case 'B': parser->method = HTTP_BIND; break; + case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; + case 'D': parser->method = HTTP_DELETE; break; + case 'G': parser->method = HTTP_GET; break; + case 'H': parser->method = HTTP_HEAD; break; + case 'L': parser->method = HTTP_LOCK; /* or LINK */ break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; + case 'N': parser->method = HTTP_NOTIFY; break; + case 'O': parser->method = HTTP_OPTIONS; break; + case 'P': parser->method = HTTP_POST; + /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ + break; + case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break; + case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break; + case 'T': parser->method = HTTP_TRACE; break; + case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break; + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + UPDATE_STATE(s_req_method); + + CALLBACK_NOTIFY(message_begin); + + break; + } + + case s_req_method: + { + const char *matcher; + if (UNLIKELY(ch == '\0')) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + matcher = method_strings[parser->method]; + if (ch == ' ' && matcher[parser->index] == '\0') { + UPDATE_STATE(s_req_spaces_before_url); + } else if (ch == matcher[parser->index]) { + ; /* nada */ + } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') { + + switch (parser->method << 16 | parser->index << 8 | ch) { +#define XX(meth, pos, ch, new_meth) \ + case (HTTP_##meth << 16 | pos << 8 | ch): \ + parser->method = HTTP_##new_meth; break; + + XX(POST, 1, 'U', PUT) + XX(POST, 1, 'A', PATCH) + XX(POST, 1, 'R', PROPFIND) + XX(PUT, 2, 'R', PURGE) + XX(CONNECT, 1, 'H', CHECKOUT) + XX(CONNECT, 2, 'P', COPY) + XX(MKCOL, 1, 'O', MOVE) + XX(MKCOL, 1, 'E', MERGE) + XX(MKCOL, 1, '-', MSEARCH) + XX(MKCOL, 2, 'A', MKACTIVITY) + XX(MKCOL, 3, 'A', MKCALENDAR) + XX(SUBSCRIBE, 1, 'E', SEARCH) + XX(SUBSCRIBE, 1, 'O', SOURCE) + XX(REPORT, 2, 'B', REBIND) + XX(PROPFIND, 4, 'P', PROPPATCH) + XX(LOCK, 1, 'I', LINK) + XX(UNLOCK, 2, 'S', UNSUBSCRIBE) + XX(UNLOCK, 2, 'B', UNBIND) + XX(UNLOCK, 3, 'I', UNLINK) +#undef XX + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + ++parser->index; + break; + } + + case s_req_spaces_before_url: + { + if (ch == ' ') break; + + MARK(url); + if (parser->method == HTTP_CONNECT) { + UPDATE_STATE(s_req_server_start); + } + + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + + break; + } + + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + { + switch (ch) { + /* No whitespace allowed here */ + case ' ': + case CR: + case LF: + SET_ERRNO(HPE_INVALID_URL); + goto error; + default: + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + + break; + } + + case s_req_server: + case s_req_server_with_at: + case s_req_path: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + { + switch (ch) { + case ' ': + UPDATE_STATE(s_req_http_start); + CALLBACK_DATA(url); + break; + case CR: + case LF: + parser->http_major = 0; + parser->http_minor = 9; + UPDATE_STATE((ch == CR) ? + s_req_line_almost_done : + s_header_field_start); + CALLBACK_DATA(url); + break; + default: + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + break; + } + + case s_req_http_start: + switch (ch) { + case 'H': + UPDATE_STATE(s_req_http_H); + break; + case ' ': + break; + default: + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + break; + + case s_req_http_H: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_req_http_HT); + break; + + case s_req_http_HT: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_req_http_HTT); + break; + + case s_req_http_HTT: + STRICT_CHECK(ch != 'P'); + UPDATE_STATE(s_req_http_HTTP); + break; + + case s_req_http_HTTP: + STRICT_CHECK(ch != '/'); + UPDATE_STATE(s_req_http_major); + break; + + case s_req_http_major: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + UPDATE_STATE(s_req_http_dot); + break; + + case s_req_http_dot: + { + if (UNLIKELY(ch != '.')) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + UPDATE_STATE(s_req_http_minor); + break; + } + + case s_req_http_minor: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + UPDATE_STATE(s_req_http_end); + break; + + case s_req_http_end: + { + if (ch == CR) { + UPDATE_STATE(s_req_line_almost_done); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_field_start); + break; + } + + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (UNLIKELY(ch != LF)) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + + UPDATE_STATE(s_header_field_start); + break; + } + + case s_header_field_start: + { + if (ch == CR) { + UPDATE_STATE(s_headers_almost_done); + break; + } + + if (ch == LF) { + /* they might be just sending \n instead of \r\n so this would be + * the second \n to denote the end of headers*/ + UPDATE_STATE(s_headers_almost_done); + REEXECUTE(); + } + + c = TOKEN(ch); + + if (UNLIKELY(!c)) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + MARK(header_field); + + parser->index = 0; + UPDATE_STATE(s_header_field); + + switch (c) { + case 'c': + parser->header_state = h_C; + break; + + case 'p': + parser->header_state = h_matching_proxy_connection; + break; + + case 't': + parser->header_state = h_matching_transfer_encoding; + break; + + case 'u': + parser->header_state = h_matching_upgrade; + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + const char* start = p; + for (; p != data + len; p++) { + ch = *p; + c = TOKEN(ch); + + if (!c) + break; + + switch (parser->header_state) { + case h_general: { + size_t limit = data + len - p; + limit = MIN(limit, HTTP_MAX_HEADER_SIZE); + while (p+1 < data + limit && TOKEN(p[1])) { + p++; + } + break; + } + + case h_C: + parser->index++; + parser->header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + parser->index++; + parser->header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + parser->index++; + switch (c) { + case 'n': + parser->header_state = h_matching_connection; + break; + case 't': + parser->header_state = h_matching_content_length; + break; + default: + parser->header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + parser->index++; + if (parser->index > sizeof(CONNECTION)-1 + || c != CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* proxy-connection */ + + case h_matching_proxy_connection: + parser->index++; + if (parser->index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + parser->index++; + if (parser->index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { + parser->header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + parser->index++; + if (parser->index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { + parser->header_state = h_transfer_encoding; + } + break; + + /* upgrade */ + + case h_matching_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE)-1 + || c != UPGRADE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(UPGRADE)-2) { + parser->header_state = h_upgrade; + } + break; + + case h_connection: + case h_content_length: + case h_transfer_encoding: + case h_upgrade: + if (ch != ' ') parser->header_state = h_general; + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + } + + if (p == data + len) { + --p; + COUNT_HEADER_SIZE(p - start); + break; + } + + COUNT_HEADER_SIZE(p - start); + + if (ch == ':') { + UPDATE_STATE(s_header_value_discard_ws); + CALLBACK_DATA(header_field); + break; + } + + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + case s_header_value_discard_ws: + if (ch == ' ' || ch == '\t') break; + + if (ch == CR) { + UPDATE_STATE(s_header_value_discard_ws_almost_done); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_value_discard_lws); + break; + } + + /* fall through */ + + case s_header_value_start: + { + MARK(header_value); + + UPDATE_STATE(s_header_value); + parser->index = 0; + + c = LOWER(ch); + + switch (parser->header_state) { + case h_upgrade: + parser->flags |= F_UPGRADE; + parser->header_state = h_general; + break; + + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + parser->header_state = h_matching_transfer_encoding_chunked; + } else { + parser->header_state = h_general; + } + break; + + case h_content_length: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + if (parser->flags & F_CONTENTLENGTH) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + + parser->flags |= F_CONTENTLENGTH; + parser->content_length = ch - '0'; + parser->header_state = h_content_length_num; + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + parser->header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + parser->header_state = h_matching_connection_close; + } else if (c == 'u') { + parser->header_state = h_matching_connection_upgrade; + } else { + parser->header_state = h_matching_connection_token; + } + break; + + /* Multi-value `Connection` header */ + case h_matching_connection_token_start: + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + const char* start = p; + enum header_states h_state = (enum header_states) parser->header_state; + for (; p != data + len; p++) { + ch = *p; + if (ch == CR) { + UPDATE_STATE(s_header_almost_done); + parser->header_state = h_state; + CALLBACK_DATA(header_value); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_almost_done); + COUNT_HEADER_SIZE(p - start); + parser->header_state = h_state; + CALLBACK_DATA_NOADVANCE(header_value); + REEXECUTE(); + } + + if (!lenient && !IS_HEADER_CHAR(ch)) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + c = LOWER(ch); + + switch (h_state) { + case h_general: + { + const char* p_cr; + const char* p_lf; + size_t limit = data + len - p; + + limit = MIN(limit, HTTP_MAX_HEADER_SIZE); + + p_cr = (const char*) memchr(p, CR, limit); + p_lf = (const char*) memchr(p, LF, limit); + if (p_cr != NULL) { + if (p_lf != NULL && p_cr >= p_lf) + p = p_lf; + else + p = p_cr; + } else if (UNLIKELY(p_lf != NULL)) { + p = p_lf; + } else { + p = data + len; + } + --p; + break; + } + + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; + + case h_content_length: + if (ch == ' ') break; + h_state = h_content_length_num; + /* fall through */ + + case h_content_length_num: + { + uint64_t t; + + if (ch == ' ') { + h_state = h_content_length_ws; + break; + } + + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + } + + t = parser->content_length; + t *= 10; + t += ch - '0'; + + /* Overflow? Test against a conservative limit for simplicity. */ + if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + } + + parser->content_length = t; + break; + } + + case h_content_length_ws: + if (ch == ' ') break; + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_chunked: + parser->index++; + if (parser->index > sizeof(CHUNKED)-1 + || c != CHUNKED[parser->index]) { + h_state = h_general; + } else if (parser->index == sizeof(CHUNKED)-2) { + h_state = h_transfer_encoding_chunked; + } + break; + + case h_matching_connection_token_start: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + h_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + h_state = h_matching_connection_close; + } else if (c == 'u') { + h_state = h_matching_connection_upgrade; + } else if (STRICT_TOKEN(c)) { + h_state = h_matching_connection_token; + } else if (c == ' ' || c == '\t') { + /* Skip lws */ + } else { + h_state = h_general; + } + break; + + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + parser->index++; + if (parser->index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(KEEP_ALIVE)-2) { + h_state = h_connection_keep_alive; + } + break; + + /* looking for 'Connection: close' */ + case h_matching_connection_close: + parser->index++; + if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(CLOSE)-2) { + h_state = h_connection_close; + } + break; + + /* looking for 'Connection: upgrade' */ + case h_matching_connection_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE) - 1 || + c != UPGRADE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(UPGRADE)-2) { + h_state = h_connection_upgrade; + } + break; + + case h_matching_connection_token: + if (ch == ',') { + h_state = h_matching_connection_token_start; + parser->index = 0; + } + break; + + case h_transfer_encoding_chunked: + if (ch != ' ') h_state = h_general; + break; + + case h_connection_keep_alive: + case h_connection_close: + case h_connection_upgrade: + if (ch == ',') { + if (h_state == h_connection_keep_alive) { + parser->flags |= F_CONNECTION_KEEP_ALIVE; + } else if (h_state == h_connection_close) { + parser->flags |= F_CONNECTION_CLOSE; + } else if (h_state == h_connection_upgrade) { + parser->flags |= F_CONNECTION_UPGRADE; + } + h_state = h_matching_connection_token_start; + parser->index = 0; + } else if (ch != ' ') { + h_state = h_matching_connection_token; + } + break; + + default: + UPDATE_STATE(s_header_value); + h_state = h_general; + break; + } + } + parser->header_state = h_state; + + if (p == data + len) + --p; + + COUNT_HEADER_SIZE(p - start); + break; + } + + case s_header_almost_done: + { + if (UNLIKELY(ch != LF)) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + + UPDATE_STATE(s_header_value_lws); + break; + } + + case s_header_value_lws: + { + if (ch == ' ' || ch == '\t') { + UPDATE_STATE(s_header_value_start); + REEXECUTE(); + } + + /* finished the header */ + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + case h_connection_upgrade: + parser->flags |= F_CONNECTION_UPGRADE; + break; + default: + break; + } + + UPDATE_STATE(s_header_field_start); + REEXECUTE(); + } + + case s_header_value_discard_ws_almost_done: + { + STRICT_CHECK(ch != LF); + UPDATE_STATE(s_header_value_discard_lws); + break; + } + + case s_header_value_discard_lws: + { + if (ch == ' ' || ch == '\t') { + UPDATE_STATE(s_header_value_discard_ws); + break; + } else { + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_connection_upgrade: + parser->flags |= F_CONNECTION_UPGRADE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } + + /* header value was empty */ + MARK(header_value); + UPDATE_STATE(s_header_field_start); + CALLBACK_DATA_NOADVANCE(header_value); + REEXECUTE(); + } + } + + case s_headers_almost_done: + { + STRICT_CHECK(ch != LF); + + if (parser->flags & F_TRAILING) { + /* End of a chunked request */ + UPDATE_STATE(s_message_done); + CALLBACK_NOTIFY_NOADVANCE(chunk_complete); + REEXECUTE(); + } + + /* Cannot use chunked encoding and a content-length header together + per the HTTP specification. */ + if ((parser->flags & F_CHUNKED) && + (parser->flags & F_CONTENTLENGTH)) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + + UPDATE_STATE(s_headers_done); + + /* Set this here so that on_headers_complete() callbacks can see it */ + if ((parser->flags & F_UPGRADE) && + (parser->flags & F_CONNECTION_UPGRADE)) { + /* For responses, "Upgrade: foo" and "Connection: upgrade" are + * mandatory only when it is a 101 Switching Protocols response, + * otherwise it is purely informational, to announce support. + */ + parser->upgrade = + (parser->type == HTTP_REQUEST || parser->status_code == 101); + } else { + parser->upgrade = (parser->method == HTTP_CONNECT); + } + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of recieving a response to a HEAD + * request. + * + * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so + * we have to simulate it by handling a change in errno below. + */ + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(parser)) { + case 0: + break; + + case 2: + parser->upgrade = 1; + + /* fall through */ + case 1: + parser->flags |= F_SKIPBODY; + break; + + default: + SET_ERRNO(HPE_CB_headers_complete); + RETURN(p - data); /* Error */ + } + } + + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + RETURN(p - data); + } + + REEXECUTE(); + } + + case s_headers_done: + { + int hasBody; + STRICT_CHECK(ch != LF); + + parser->nread = 0; + nread = 0; + + hasBody = parser->flags & F_CHUNKED || + (parser->content_length > 0 && parser->content_length != ULLONG_MAX); + if (parser->upgrade && (parser->method == HTTP_CONNECT || + (parser->flags & F_SKIPBODY) || !hasBody)) { + /* Exit, the rest of the message is in a different protocol. */ + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + RETURN((p - data) + 1); + } + + if (parser->flags & F_SKIPBODY) { + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header */ + UPDATE_STATE(s_chunk_size_start); + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + } else if (parser->content_length != ULLONG_MAX) { + /* Content-Length header given and non-zero */ + UPDATE_STATE(s_body_identity); + } else { + if (!http_message_needs_eof(parser)) { + /* Assume content-length 0 - read the next */ + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + } else { + /* Read body until EOF */ + UPDATE_STATE(s_body_identity_eof); + } + } + } + + break; + } + + case s_body_identity: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* The difference between advancing content_length and p is because + * the latter will automaticaly advance on the next loop iteration. + * Further, if content_length ends up at 0, we want to see the last + * byte again for our message complete callback. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + UPDATE_STATE(s_message_done); + + /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. + * + * The alternative to doing this is to wait for the next byte to + * trigger the data callback, just as in every other case. The + * problem with this is that this makes it difficult for the test + * harness to distinguish between complete-on-EOF and + * complete-on-length. It's not clear that this distinction is + * important for applications, but let's keep it for now. + */ + CALLBACK_DATA_(body, p - body_mark + 1, p - data); + REEXECUTE(); + } + + break; + } + + /* read until EOF */ + case s_body_identity_eof: + MARK(body); + p = data + len - 1; + + break; + + case s_message_done: + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + if (parser->upgrade) { + /* Exit, the rest of the message is in a different protocol. */ + RETURN((p - data) + 1); + } + break; + + case s_chunk_size_start: + { + assert(nread == 1); + assert(parser->flags & F_CHUNKED); + + unhex_val = unhex[(unsigned char)ch]; + if (UNLIKELY(unhex_val == -1)) { + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + parser->content_length = unhex_val; + UPDATE_STATE(s_chunk_size); + break; + } + + case s_chunk_size: + { + uint64_t t; + + assert(parser->flags & F_CHUNKED); + + if (ch == CR) { + UPDATE_STATE(s_chunk_size_almost_done); + break; + } + + unhex_val = unhex[(unsigned char)ch]; + + if (unhex_val == -1) { + if (ch == ';' || ch == ' ') { + UPDATE_STATE(s_chunk_parameters); + break; + } + + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + t = parser->content_length; + t *= 16; + t += unhex_val; + + /* Overflow? Test against a conservative limit for simplicity. */ + if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = t; + break; + } + + case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); + /* just ignore this shit. TODO check for overflow */ + if (ch == CR) { + UPDATE_STATE(s_chunk_size_almost_done); + break; + } + break; + } + + case s_chunk_size_almost_done: + { + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + + parser->nread = 0; + nread = 0; + + if (parser->content_length == 0) { + parser->flags |= F_TRAILING; + UPDATE_STATE(s_header_field_start); + } else { + UPDATE_STATE(s_chunk_data); + } + CALLBACK_NOTIFY(chunk_header); + break; + } + + case s_chunk_data: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->flags & F_CHUNKED); + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* See the explanation in s_body_identity for why the content + * length and data pointers are managed this way. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + UPDATE_STATE(s_chunk_data_almost_done); + } + + break; + } + + case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); + assert(parser->content_length == 0); + STRICT_CHECK(ch != CR); + UPDATE_STATE(s_chunk_data_done); + CALLBACK_DATA(body); + break; + + case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + parser->nread = 0; + nread = 0; + UPDATE_STATE(s_chunk_size_start); + CALLBACK_NOTIFY(chunk_complete); + break; + + default: + assert(0 && "unhandled state"); + SET_ERRNO(HPE_INVALID_INTERNAL_STATE); + goto error; + } + } + + /* Run callbacks for any marks that we have leftover after we ran out of + * bytes. There should be at most one of these set, so it's OK to invoke + * them in series (unset marks will not result in callbacks). + * + * We use the NOADVANCE() variety of callbacks here because 'p' has already + * overflowed 'data' and this allows us to correct for the off-by-one that + * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' + * value that's in-bounds). + */ + + assert(((header_field_mark ? 1 : 0) + + (header_value_mark ? 1 : 0) + + (url_mark ? 1 : 0) + + (body_mark ? 1 : 0) + + (status_mark ? 1 : 0)) <= 1); + + CALLBACK_DATA_NOADVANCE(header_field); + CALLBACK_DATA_NOADVANCE(header_value); + CALLBACK_DATA_NOADVANCE(url); + CALLBACK_DATA_NOADVANCE(body); + CALLBACK_DATA_NOADVANCE(status); + + RETURN(len); + +error: + if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { + SET_ERRNO(HPE_UNKNOWN); + } + + RETURN(p - data); +} + + +/* Does the parser need to see an EOF to find the end of the message? */ +int +http_message_needs_eof (const http_parser *parser) +{ + if (parser->type == HTTP_REQUEST) { + return 0; + } + + /* See RFC 2616 section 4.4 */ + if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ + parser->status_code == 204 || /* No Content */ + parser->status_code == 304 || /* Not Modified */ + parser->flags & F_SKIPBODY) { /* response to a HEAD request */ + return 0; + } + + if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { + return 0; + } + + return 1; +} + + +int +http_should_keep_alive (const http_parser *parser) +{ + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } + } else { + /* HTTP/1.0 or earlier */ + if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { + return 0; + } + } + + return !http_message_needs_eof(parser); +} + + +const char * +http_method_str (enum http_method m) +{ + return ELEM_AT(method_strings, m, ""); +} + +const char * +http_status_str (enum http_status s) +{ + switch (s) { +#define XX(num, name, string) case HTTP_STATUS_##name: return #string; + HTTP_STATUS_MAP(XX) +#undef XX + default: return ""; + } +} + +void +http_parser_init (http_parser *parser, enum http_parser_type t) +{ + void *data = parser->data; /* preserve application data */ + memset(parser, 0, sizeof(*parser)); + parser->data = data; + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); + parser->http_errno = HPE_OK; +} + +void +http_parser_settings_init(http_parser_settings *settings) +{ + memset(settings, 0, sizeof(*settings)); +} + +const char * +http_errno_name(enum http_errno err) { + assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); + return http_strerror_tab[err].name; +} + +const char * +http_errno_description(enum http_errno err) { + assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); + return http_strerror_tab[err].description; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* fall through */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + enum http_host_state s; + + const char *p; + size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + assert(u->field_set & (1 << UF_HOST)); + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = p - buf; + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = p - buf ; + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +void +http_parser_url_init(struct http_parser_url *u) { + memset(u, 0, sizeof(*u)); +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + if (buflen == 0) { + return 1; + } + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* fall through */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = p - buf; + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + uint16_t off; + uint16_t len; + const char* p; + const char* end; + unsigned long v; + + off = u->field_data[UF_PORT].off; + len = u->field_data[UF_PORT].len; + end = buf + off + len; + + /* NOTE: The characters are already validated and are in the [0-9] range */ + assert(off + len <= buflen && "Port number overflow"); + v = 0; + for (p = buf + off; p < end; p++) { + v *= 10; + v += *p - '0'; + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + } + + u->port = (uint16_t) v; + } + + return 0; +} + +void +http_parser_pause(http_parser *parser, int paused) { + /* Users should only be pausing/unpausing a parser that is not in an error + * state. In non-debug builds, there's not much that we can do about this + * other than ignore it. + */ + if (HTTP_PARSER_ERRNO(parser) == HPE_OK || + HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { + uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */ + SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); + } else { + assert(0 && "Attempting to pause parser in error state"); + } +} + +int +http_body_is_final(const struct http_parser *parser) { + return parser->state == s_message_done; +} + +unsigned long +http_parser_version(void) { + return HTTP_PARSER_VERSION_MAJOR * 0x10000 | + HTTP_PARSER_VERSION_MINOR * 0x00100 | + HTTP_PARSER_VERSION_PATCH * 0x00001; +} diff --git a/src/net/strategies/http_parser/http_parser.h b/src/net/strategies/http_parser/http_parser.h new file mode 100755 index 00000000..e894d7ce --- /dev/null +++ b/src/net/strategies/http_parser/http_parser.h @@ -0,0 +1,436 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +/* Also update SONAME in the Makefile whenever you change these. */ +#define HTTP_PARSER_VERSION_MAJOR 2 +#define HTTP_PARSER_VERSION_MINOR 8 +#define HTTP_PARSER_VERSION_PATCH 1 + +#include +#if defined(_WIN32) && !defined(__MINGW32__) && \ + (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) +#include +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + +/* Maximium header size allowed. If the macro is not defined + * before including this header then the default is used. To + * change the maximum header size, define the macro in the build + * environment (e.g. -DHTTP_MAX_HEADER_SIZE=). To remove + * the effective limit on the size of the header, define the macro + * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff) + */ +#ifndef HTTP_MAX_HEADER_SIZE +# define HTTP_MAX_HEADER_SIZE (80*1024) +#endif + +typedef struct http_parser http_parser; +typedef struct http_parser_settings http_parser_settings; + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * Returning `2` from on_headers_complete will tell parser that it should not + * expect neither a body nor any futher responses on this connection. This is + * useful for handling responses to a CONNECT request which may not contain + * `Upgrade` or `Connection: upgrade` headers. + * + * http_data_cb does not return data chunks. It will be called arbitrarily + * many times for each string. E.G. you might get 10 callbacks for "on_url" + * each providing just a few characters more data. + */ +typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); +typedef int (*http_cb) (http_parser*); + + +/* Status Codes */ +#define HTTP_STATUS_MAP(XX) \ + XX(100, CONTINUE, Continue) \ + XX(101, SWITCHING_PROTOCOLS, Switching Protocols) \ + XX(102, PROCESSING, Processing) \ + XX(200, OK, OK) \ + XX(201, CREATED, Created) \ + XX(202, ACCEPTED, Accepted) \ + XX(203, NON_AUTHORITATIVE_INFORMATION, Non-Authoritative Information) \ + XX(204, NO_CONTENT, No Content) \ + XX(205, RESET_CONTENT, Reset Content) \ + XX(206, PARTIAL_CONTENT, Partial Content) \ + XX(207, MULTI_STATUS, Multi-Status) \ + XX(208, ALREADY_REPORTED, Already Reported) \ + XX(226, IM_USED, IM Used) \ + XX(300, MULTIPLE_CHOICES, Multiple Choices) \ + XX(301, MOVED_PERMANENTLY, Moved Permanently) \ + XX(302, FOUND, Found) \ + XX(303, SEE_OTHER, See Other) \ + XX(304, NOT_MODIFIED, Not Modified) \ + XX(305, USE_PROXY, Use Proxy) \ + XX(307, TEMPORARY_REDIRECT, Temporary Redirect) \ + XX(308, PERMANENT_REDIRECT, Permanent Redirect) \ + XX(400, BAD_REQUEST, Bad Request) \ + XX(401, UNAUTHORIZED, Unauthorized) \ + XX(402, PAYMENT_REQUIRED, Payment Required) \ + XX(403, FORBIDDEN, Forbidden) \ + XX(404, NOT_FOUND, Not Found) \ + XX(405, METHOD_NOT_ALLOWED, Method Not Allowed) \ + XX(406, NOT_ACCEPTABLE, Not Acceptable) \ + XX(407, PROXY_AUTHENTICATION_REQUIRED, Proxy Authentication Required) \ + XX(408, REQUEST_TIMEOUT, Request Timeout) \ + XX(409, CONFLICT, Conflict) \ + XX(410, GONE, Gone) \ + XX(411, LENGTH_REQUIRED, Length Required) \ + XX(412, PRECONDITION_FAILED, Precondition Failed) \ + XX(413, PAYLOAD_TOO_LARGE, Payload Too Large) \ + XX(414, URI_TOO_LONG, URI Too Long) \ + XX(415, UNSUPPORTED_MEDIA_TYPE, Unsupported Media Type) \ + XX(416, RANGE_NOT_SATISFIABLE, Range Not Satisfiable) \ + XX(417, EXPECTATION_FAILED, Expectation Failed) \ + XX(421, MISDIRECTED_REQUEST, Misdirected Request) \ + XX(422, UNPROCESSABLE_ENTITY, Unprocessable Entity) \ + XX(423, LOCKED, Locked) \ + XX(424, FAILED_DEPENDENCY, Failed Dependency) \ + XX(426, UPGRADE_REQUIRED, Upgrade Required) \ + XX(428, PRECONDITION_REQUIRED, Precondition Required) \ + XX(429, TOO_MANY_REQUESTS, Too Many Requests) \ + XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large) \ + XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, Unavailable For Legal Reasons) \ + XX(500, INTERNAL_SERVER_ERROR, Internal Server Error) \ + XX(501, NOT_IMPLEMENTED, Not Implemented) \ + XX(502, BAD_GATEWAY, Bad Gateway) \ + XX(503, SERVICE_UNAVAILABLE, Service Unavailable) \ + XX(504, GATEWAY_TIMEOUT, Gateway Timeout) \ + XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP Version Not Supported) \ + XX(506, VARIANT_ALSO_NEGOTIATES, Variant Also Negotiates) \ + XX(507, INSUFFICIENT_STORAGE, Insufficient Storage) \ + XX(508, LOOP_DETECTED, Loop Detected) \ + XX(510, NOT_EXTENDED, Not Extended) \ + XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required) \ + +enum http_status + { +#define XX(num, name, string) HTTP_STATUS_##name = num, + HTTP_STATUS_MAP(XX) +#undef XX + }; + + +/* Request Methods */ +#define HTTP_METHOD_MAP(XX) \ + XX(0, DELETE, DELETE) \ + XX(1, GET, GET) \ + XX(2, HEAD, HEAD) \ + XX(3, POST, POST) \ + XX(4, PUT, PUT) \ + /* pathological */ \ + XX(5, CONNECT, CONNECT) \ + XX(6, OPTIONS, OPTIONS) \ + XX(7, TRACE, TRACE) \ + /* WebDAV */ \ + XX(8, COPY, COPY) \ + XX(9, LOCK, LOCK) \ + XX(10, MKCOL, MKCOL) \ + XX(11, MOVE, MOVE) \ + XX(12, PROPFIND, PROPFIND) \ + XX(13, PROPPATCH, PROPPATCH) \ + XX(14, SEARCH, SEARCH) \ + XX(15, UNLOCK, UNLOCK) \ + XX(16, BIND, BIND) \ + XX(17, REBIND, REBIND) \ + XX(18, UNBIND, UNBIND) \ + XX(19, ACL, ACL) \ + /* subversion */ \ + XX(20, REPORT, REPORT) \ + XX(21, MKACTIVITY, MKACTIVITY) \ + XX(22, CHECKOUT, CHECKOUT) \ + XX(23, MERGE, MERGE) \ + /* upnp */ \ + XX(24, MSEARCH, M-SEARCH) \ + XX(25, NOTIFY, NOTIFY) \ + XX(26, SUBSCRIBE, SUBSCRIBE) \ + XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \ + /* RFC-5789 */ \ + XX(28, PATCH, PATCH) \ + XX(29, PURGE, PURGE) \ + /* CalDAV */ \ + XX(30, MKCALENDAR, MKCALENDAR) \ + /* RFC-2068, section 19.6.1.2 */ \ + XX(31, LINK, LINK) \ + XX(32, UNLINK, UNLINK) \ + /* icecast */ \ + XX(33, SOURCE, SOURCE) \ + +enum http_method + { +#define XX(num, name, string) HTTP_##name = num, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; + + +/* Flag values for http_parser.flags field */ +enum flags + { F_CHUNKED = 1 << 0 + , F_CONNECTION_KEEP_ALIVE = 1 << 1 + , F_CONNECTION_CLOSE = 1 << 2 + , F_CONNECTION_UPGRADE = 1 << 3 + , F_TRAILING = 1 << 4 + , F_UPGRADE = 1 << 5 + , F_SKIPBODY = 1 << 6 + , F_CONTENTLENGTH = 1 << 7 + }; + + +/* Map for errno-related constants + * + * The provided argument should be a macro that takes 2 arguments. + */ +#define HTTP_ERRNO_MAP(XX) \ + /* No error */ \ + XX(OK, "success") \ + \ + /* Callback-related errors */ \ + XX(CB_message_begin, "the on_message_begin callback failed") \ + XX(CB_url, "the on_url callback failed") \ + XX(CB_header_field, "the on_header_field callback failed") \ + XX(CB_header_value, "the on_header_value callback failed") \ + XX(CB_headers_complete, "the on_headers_complete callback failed") \ + XX(CB_body, "the on_body callback failed") \ + XX(CB_message_complete, "the on_message_complete callback failed") \ + XX(CB_status, "the on_status callback failed") \ + XX(CB_chunk_header, "the on_chunk_header callback failed") \ + XX(CB_chunk_complete, "the on_chunk_complete callback failed") \ + \ + /* Parsing-related errors */ \ + XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ + XX(HEADER_OVERFLOW, \ + "too many header bytes seen; overflow detected") \ + XX(CLOSED_CONNECTION, \ + "data received after completed connection: close message") \ + XX(INVALID_VERSION, "invalid HTTP version") \ + XX(INVALID_STATUS, "invalid HTTP status code") \ + XX(INVALID_METHOD, "invalid HTTP method") \ + XX(INVALID_URL, "invalid URL") \ + XX(INVALID_HOST, "invalid host") \ + XX(INVALID_PORT, "invalid port") \ + XX(INVALID_PATH, "invalid path") \ + XX(INVALID_QUERY_STRING, "invalid query string") \ + XX(INVALID_FRAGMENT, "invalid fragment") \ + XX(LF_EXPECTED, "LF character expected") \ + XX(INVALID_HEADER_TOKEN, "invalid character in header") \ + XX(INVALID_CONTENT_LENGTH, \ + "invalid character in content-length header") \ + XX(UNEXPECTED_CONTENT_LENGTH, \ + "unexpected content-length header") \ + XX(INVALID_CHUNK_SIZE, \ + "invalid character in chunk size header") \ + XX(INVALID_CONSTANT, "invalid constant string") \ + XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ + XX(STRICT, "strict mode assertion failed") \ + XX(PAUSED, "parser is paused") \ + XX(UNKNOWN, "an unknown error occurred") + + +/* Define HPE_* values for each errno value above */ +#define HTTP_ERRNO_GEN(n, s) HPE_##n, +enum http_errno { + HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) +}; +#undef HTTP_ERRNO_GEN + + +/* Get an http_errno value from an http_parser */ +#define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) + + +struct http_parser { + /** PRIVATE **/ + unsigned int type : 2; /* enum http_parser_type */ + unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ + unsigned int state : 7; /* enum state from http_parser.c */ + unsigned int header_state : 7; /* enum header_state from http_parser.c */ + unsigned int index : 7; /* index into current matcher */ + unsigned int lenient_http_headers : 1; + + uint32_t nread; /* # bytes read in various scenarios */ + uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ + + /** READ-ONLY **/ + unsigned short http_major; + unsigned short http_minor; + unsigned int status_code : 16; /* responses only */ + unsigned int method : 8; /* requests only */ + unsigned int http_errno : 7; + + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + unsigned int upgrade : 1; + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ +}; + + +struct http_parser_settings { + http_cb on_message_begin; + http_data_cb on_url; + http_data_cb on_status; + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; + /* When on_chunk_header is called, the current chunk length is stored + * in parser->content_length. + */ + http_cb on_chunk_header; + http_cb on_chunk_complete; +}; + + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + + +/* Returns the library version. Bits 16-23 contain the major version number, + * bits 8-15 the minor version number and bits 0-7 the patch level. + * Usage example: + * + * unsigned long version = http_parser_version(); + * unsigned major = (version >> 16) & 255; + * unsigned minor = (version >> 8) & 255; + * unsigned patch = version & 255; + * printf("http_parser v%u.%u.%u\n", major, minor, patch); + */ +unsigned long http_parser_version(void); + +void http_parser_init(http_parser *parser, enum http_parser_type type); + + +/* Initialize http_parser_settings members to 0 + */ +void http_parser_settings_init(http_parser_settings *settings); + + +/* Executes the parser. Returns number of parsed bytes. Sets + * `parser->http_errno` on error. */ +size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns 0, then this should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ +int http_should_keep_alive(const http_parser *parser); + +/* Returns a string version of the HTTP method. */ +const char *http_method_str(enum http_method m); + +/* Returns a string version of the HTTP status code. */ +const char *http_status_str(enum http_status s); + +/* Return a string name of the given error */ +const char *http_errno_name(enum http_errno err); + +/* Return a string description of the given error */ +const char *http_errno_description(enum http_errno err); + +/* Initialize all http_parser_url members to 0 */ +void http_parser_url_init(struct http_parser_url *u); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); + +/* Pause or un-pause the parser; a nonzero value pauses */ +void http_parser_pause(http_parser *parser, int paused); + +/* Checks if this is the final chunk of the body. */ +int http_body_is_final(const http_parser *parser); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/version.h b/src/version.h index 33be0af8..272d8337 100644 --- a/src/version.h +++ b/src/version.h @@ -25,18 +25,18 @@ #ifndef XMRIG_VERSION_H #define XMRIG_VERSION_H -#define APP_ID "xmrig" -#define APP_NAME "XMRig" -#define APP_DESC "XMRig CPU miner" -#define APP_VERSION "2.14.4-dev" -#define APP_DOMAIN "xmrig.com" -#define APP_SITE "www.xmrig.com" -#define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" -#define APP_KIND "cpu" +#define APP_ID "ninjarig" +#define APP_NAME "NinjaRig" +#define APP_DESC "NinjaRig CPU/GPU miner" +#define APP_VERSION "1.0.0-dev" +//#define APP_DOMAIN "xmrig.com" +//#define APP_SITE "www.xmrig.com" +#define APP_COPYRIGHT "Copyright (C) 2019 Haifa Bogdan Adnan" +#define APP_KIND "cpu/gpu" -#define APP_VER_MAJOR 2 -#define APP_VER_MINOR 14 -#define APP_VER_PATCH 4 +#define APP_VER_MAJOR 1 +#define APP_VER_MINOR 0 +#define APP_VER_PATCH 0 #ifdef _MSC_VER # if (_MSC_VER >= 1920) diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp deleted file mode 100644 index 6548b461..00000000 --- a/src/workers/CpuThread.cpp +++ /dev/null @@ -1,744 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - - -#include "common/cpu/Cpu.h" -#include "common/log/Log.h" -#include "crypto/Asm.h" -#include "Mem.h" -#include "rapidjson/document.h" -#include "workers/CpuThread.h" - - -#if defined(XMRIG_ARM) -# include "crypto/CryptoNight_arm.h" -#else -# include "crypto/CryptoNight_x86.h" -#endif - - -xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly) : - m_algorithm(algorithm), - m_av(av), - m_assembly(assembly), - m_prefetch(prefetch), - m_softAES(softAES), - m_priority(priority), - m_affinity(affinity), - m_multiway(multiway), - m_index(index) -{ -} - - -#ifndef XMRIG_NO_ASM -template -static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask) -{ - const uint8_t* p = reinterpret_cast(src); - - // Workaround for Visual Studio placing trampoline in debug builds. -# if defined(_MSC_VER) - if (p[0] == 0xE9) { - p += *(int32_t*)(p + 1) + 5; - } -# endif - - size_t size = 0; - while (*(uint32_t*)(p + size) != 0xDEADC0DE) { - ++size; - } - size += sizeof(uint32_t); - - memcpy((void*) dst, (const void*) src, size); - - uint8_t* patched_data = reinterpret_cast(dst); - for (size_t i = 0; i + sizeof(uint32_t) <= size; ++i) { - switch (*(uint32_t*)(patched_data + i)) { - case xmrig::CRYPTONIGHT_ITER: - *(uint32_t*)(patched_data + i) = iterations; - break; - - case xmrig::CRYPTONIGHT_MASK: - *(uint32_t*)(patched_data + i) = mask; - break; - } - } -} - - -extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); -extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); - - -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr; - -xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; - -xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; - -xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr; - - -void xmrig::CpuThread::patchAsmVariants() -{ - const int allocation_size = 65536; - uint8_t *base = static_cast(Mem::allocateExecutableMemory(allocation_size)); - - cn_half_mainloop_ivybridge_asm = reinterpret_cast (base + 0x0000); - cn_half_mainloop_ryzen_asm = reinterpret_cast (base + 0x1000); - cn_half_mainloop_bulldozer_asm = reinterpret_cast (base + 0x2000); - cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); - - cn_trtl_mainloop_ivybridge_asm = reinterpret_cast (base + 0x4000); - cn_trtl_mainloop_ryzen_asm = reinterpret_cast (base + 0x5000); - cn_trtl_mainloop_bulldozer_asm = reinterpret_cast (base + 0x6000); - cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); - - cn_zls_mainloop_ivybridge_asm = reinterpret_cast (base + 0x8000); - cn_zls_mainloop_ryzen_asm = reinterpret_cast (base + 0x9000); - cn_zls_mainloop_bulldozer_asm = reinterpret_cast (base + 0xA000); - cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xB000); - - cn_double_mainloop_ivybridge_asm = reinterpret_cast (base + 0xC000); - cn_double_mainloop_ryzen_asm = reinterpret_cast (base + 0xD000); - cn_double_mainloop_bulldozer_asm = reinterpret_cast (base + 0xE000); - cn_double_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xF000); - - patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - - patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - - patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_ZLS_ITER, xmrig::CRYPTONIGHT_MASK); - - patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_DOUBLE_ITER, xmrig::CRYPTONIGHT_MASK); - - Mem::protectExecutableMemory(base, allocation_size); - Mem::flushInstructionCache(base, allocation_size); -} -#endif - - -bool xmrig::CpuThread::isSoftAES(AlgoVariant av) -{ - return av == AV_SINGLE_SOFT || av == AV_DOUBLE_SOFT || av > AV_PENTA; -} - - -#ifndef XMRIG_NO_ASM -template -static inline void add_asm_func(xmrig::CpuThread::cn_hash_fun(&asm_func_map)[xmrig::ALGO_MAX][xmrig::AV_MAX][xmrig::VARIANT_MAX][xmrig::ASM_MAX]) -{ - asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_INTEL] = cryptonight_single_hash_asm; - asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_RYZEN] = cryptonight_single_hash_asm; - asm_func_map[algo][xmrig::AV_SINGLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_single_hash_asm; - - asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_INTEL] = cryptonight_double_hash_asm; - asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_RYZEN] = cryptonight_double_hash_asm; - asm_func_map[algo][xmrig::AV_DOUBLE][variant][xmrig::ASM_BULLDOZER] = cryptonight_double_hash_asm; -} -#endif - -xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly) -{ - assert(variant >= VARIANT_0 && variant < VARIANT_MAX); - -# ifndef XMRIG_NO_ASM - if (assembly == ASM_AUTO) { - assembly = Cpu::info()->assembly(); - } - - static cn_hash_fun asm_func_map[ALGO_MAX][AV_MAX][VARIANT_MAX][ASM_MAX] = {}; - static bool asm_func_map_initialized = false; - - if (!asm_func_map_initialized) { - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - -# ifndef XMRIG_NO_CN_PICO - add_asm_func(asm_func_map); -# endif - - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - add_asm_func(asm_func_map); - - asm_func_map_initialized = true; - } - - cn_hash_fun fun = asm_func_map[algorithm][av][variant][assembly]; - if (fun) { - return fun; - } -# endif - - constexpr const size_t count = VARIANT_MAX * 10 * ALGO_MAX; - - static const cn_hash_fun func_table[] = { - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - -# ifndef XMRIG_NO_CN_GPU - cryptonight_single_hash_gpu, - nullptr, - cryptonight_single_hash_gpu, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, -# else - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU -# endif - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - -# ifndef XMRIG_NO_AEON - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE -# else - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE -# endif - -# ifndef XMRIG_NO_SUMO - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE -# else - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE -# endif - -# ifndef XMRIG_NO_CN_PICO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_single_hash, - cryptonight_double_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - cryptonight_triple_hash, - cryptonight_quad_hash, - cryptonight_penta_hash, - - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE -# else - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_0 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_1 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TUBE - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_MSR - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XHV - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_XAO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RTO - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_2 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_HALF - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_TRTL - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_GPU - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_WOW - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_4 - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_RWZ - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_ZLS - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // VARIANT_DOUBLE -# endif - }; - - static_assert(count == sizeof(func_table) / sizeof(func_table[0]), "func_table size mismatch"); - - const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1; - -# ifndef NDEBUG - cn_hash_fun func = func_table[index]; - - assert(index < sizeof(func_table) / sizeof(func_table[0])); - assert(func != nullptr); - - return func; -# else - return func_table[index]; -# endif -} - - -xmrig::CpuThread *xmrig::CpuThread::createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly) -{ - assert(av > AV_AUTO && av < AV_MAX); - - int64_t cpuId = -1L; - - if (affinity != -1L) { - size_t idx = 0; - - for (size_t i = 0; i < 64; i++) { - if (!(affinity & (1ULL << i))) { - continue; - } - - if (idx == index) { - cpuId = i; - break; - } - - idx++; - } - } - - return new CpuThread(index, algorithm, av, multiway(av), cpuId, priority, isSoftAES(av), false, assembly); -} - - -xmrig::CpuThread *xmrig::CpuThread::createFromData(size_t index, Algo algorithm, const CpuThread::Data &data, int priority, bool softAES) -{ - int av = AV_AUTO; - const Multiway multiway = data.multiway; - - if (multiway <= DoubleWay) { - av = softAES ? (multiway + 2) : multiway; - } - else { - av = softAES ? (multiway + 5) : (multiway + 2); - } - - assert(av > AV_AUTO && av < AV_MAX); - - return new CpuThread(index, algorithm, static_cast(av), multiway, data.affinity, priority, softAES, false, data.assembly); -} - - -xmrig::CpuThread::Data xmrig::CpuThread::parse(const rapidjson::Value &object) -{ - Data data; - - const auto &multiway = object["low_power_mode"]; - if (multiway.IsBool()) { - data.multiway = multiway.IsTrue() ? DoubleWay : SingleWay; - data.valid = true; - } - else if (multiway.IsUint()) { - data.setMultiway(multiway.GetInt()); - } - - if (!data.valid) { - return data; - } - - const auto &affinity = object["affine_to_cpu"]; - if (affinity.IsUint64()) { - data.affinity = affinity.GetInt64(); - } - -# ifndef XMRIG_NO_ASM - data.assembly = Asm::parse(object["asm"]); -# endif - - return data; -} - - -xmrig::IThread::Multiway xmrig::CpuThread::multiway(AlgoVariant av) -{ - switch (av) { - case AV_SINGLE: - case AV_SINGLE_SOFT: - return SingleWay; - - case AV_DOUBLE_SOFT: - case AV_DOUBLE: - return DoubleWay; - - case AV_TRIPLE_SOFT: - case AV_TRIPLE: - return TripleWay; - - case AV_QUAD_SOFT: - case AV_QUAD: - return QuadWay; - - case AV_PENTA_SOFT: - case AV_PENTA: - return PentaWay; - - default: - break; - } - - return SingleWay; -} - - -#ifdef APP_DEBUG -void xmrig::CpuThread::print() const -{ - LOG_DEBUG(GREEN_BOLD("CPU thread: ") " index " WHITE_BOLD("%zu") ", multiway " WHITE_BOLD("%d") ", av " WHITE_BOLD("%d") ",", - index(), static_cast(multiway()), static_cast(m_av)); - -# ifndef XMRIG_NO_ASM - LOG_DEBUG(" assembly: %s, affine_to_cpu: %" PRId64, Asm::toString(m_assembly), affinity()); -# else - LOG_DEBUG(" affine_to_cpu: %" PRId64, affinity()); -# endif -} -#endif - - -#ifndef XMRIG_NO_API -rapidjson::Value xmrig::CpuThread::toAPI(rapidjson::Document &doc) const -{ - using namespace rapidjson; - - Value obj(kObjectType); - auto &allocator = doc.GetAllocator(); - - obj.AddMember("type", "cpu", allocator); - obj.AddMember("av", m_av, allocator); - obj.AddMember("low_power_mode", multiway(), allocator); - obj.AddMember("affine_to_cpu", affinity(), allocator); - obj.AddMember("priority", priority(), allocator); - obj.AddMember("soft_aes", isSoftAES(), allocator); - - return obj; -} -#endif - - -rapidjson::Value xmrig::CpuThread::toConfig(rapidjson::Document &doc) const -{ - using namespace rapidjson; - - Value obj(kObjectType); - auto &allocator = doc.GetAllocator(); - - obj.AddMember("low_power_mode", multiway(), allocator); - obj.AddMember("affine_to_cpu", affinity() == -1L ? Value(kFalseType) : Value(affinity()), allocator); - -# ifndef XMRIG_NO_ASM - obj.AddMember("asm", Asm::toJSON(m_assembly), allocator); -# endif - - return obj; -} diff --git a/src/workers/CpuThread.h b/src/workers/CpuThread.h deleted file mode 100644 index 05d4a066..00000000 --- a/src/workers/CpuThread.h +++ /dev/null @@ -1,115 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_CPUTHREAD_H -#define XMRIG_CPUTHREAD_H - - -#include "common/xmrig.h" -#include "interfaces/IThread.h" - - -struct cryptonight_ctx; - - -namespace xmrig { - - -class CpuThread : public IThread -{ -public: - struct Data - { - inline Data() : assembly(ASM_AUTO), valid(false), affinity(-1L), multiway(SingleWay) {} - - inline void setMultiway(int value) - { - if (value >= SingleWay && value <= PentaWay) { - multiway = static_cast(value); - valid = true; - } - } - - Assembly assembly; - bool valid; - int64_t affinity; - Multiway multiway; - }; - - - CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly); - - typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx, uint64_t height); - typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx); - -# ifndef XMRIG_NO_ASM - static void patchAsmVariants(); -# endif - - static bool isSoftAES(AlgoVariant av); - static cn_hash_fun fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly); - static CpuThread *createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly); - static CpuThread *createFromData(size_t index, Algo algorithm, const CpuThread::Data &data, int priority, bool softAES); - static Data parse(const rapidjson::Value &object); - static Multiway multiway(AlgoVariant av); - - inline bool isPrefetch() const { return m_prefetch; } - inline bool isSoftAES() const { return m_softAES; } - inline cn_hash_fun fn(Variant variant) const { return fn(m_algorithm, m_av, variant, m_assembly); } - - inline Algo algorithm() const override { return m_algorithm; } - inline int priority() const override { return m_priority; } - inline int64_t affinity() const override { return m_affinity; } - inline Multiway multiway() const override { return m_multiway; } - inline size_t index() const override { return m_index; } - inline Type type() const override { return CPU; } - -protected: -# ifdef APP_DEBUG - void print() const override; -# endif - -# ifndef XMRIG_NO_API - rapidjson::Value toAPI(rapidjson::Document &doc) const override; -# endif - - rapidjson::Value toConfig(rapidjson::Document &doc) const override; - -private: - const Algo m_algorithm; - const AlgoVariant m_av; - const Assembly m_assembly; - const bool m_prefetch; - const bool m_softAES; - const int m_priority; - const int64_t m_affinity; - const Multiway m_multiway; - const size_t m_index; -}; - - -} /* namespace xmrig */ - - -#endif /* XMRIG_CPUTHREAD_H */ diff --git a/src/workers/Handle.cpp b/src/workers/Handle.cpp index d42ea368..cacaf636 100644 --- a/src/workers/Handle.cpp +++ b/src/workers/Handle.cpp @@ -22,25 +22,65 @@ */ +#include #include "workers/Handle.h" -Handle::Handle(xmrig::IThread *config, uint32_t offset, size_t totalWays) : - m_worker(nullptr), - m_totalWays(totalWays), - m_offset(offset), - m_config(config) +Handle::Handle(xmrig::Config *config, xmrig::HasherConfig *hasherConfig, uint32_t offset) : + m_offset(offset), + m_config(config), + m_hasherConfig(hasherConfig), + m_hasher(nullptr) { -} + std::vector hashers = Hasher::getHashers(); + for(Hasher *hasher : hashers) { + if(hasherConfig->type() == hasher->subType()) { + if(hasher->initialize(hasherConfig->algorithm(), hasherConfig->variant()) && + hasher->configure(*hasherConfig) && + hasher->deviceCount() > 0) + m_hasher = hasher; + std::string hasherInfo = hasher->info(); + + if(config->isColors()) { + std::string redDisabled = RED_BOLD("DISABLED"); + std::string greenEnabled = GREEN_BOLD("ENABLED"); + + size_t startPos = hasherInfo.find("DISABLED"); + while (startPos != string::npos) { + hasherInfo.replace(startPos, 8, redDisabled); + startPos = hasherInfo.find("DISABLED", startPos + redDisabled.size()); + } + + startPos = hasherInfo.find("ENABLED"); + while (startPos != string::npos) { + hasherInfo.replace(startPos, 7, greenEnabled); + startPos = hasherInfo.find("ENABLED", startPos + greenEnabled.size()); + } + + Log::i()->text(GREEN_BOLD(" * Initializing %s hasher:") "\n%s", hasher->subType().c_str(), hasherInfo.c_str()); + } + else { + Log::i()->text(" * Initializing %s hasher:\n%s", hasher->subType().c_str(), hasherInfo.c_str()); + } + } + } +} void Handle::join() { - uv_thread_join(&m_thread); + for(uv_thread_t thread : m_threads) + uv_thread_join(&thread); } void Handle::start(void (*callback) (void *)) { - uv_thread_create(&m_thread, callback, this); + assert(m_hasher != nullptr); + for(int i=0; i < m_hasher->computingThreads(); i++) { + uv_thread_t thread; + HandleArg *arg = new HandleArg { this, i }; + uv_thread_create(&thread, callback, arg); + m_threads.push_back(thread); + } } diff --git a/src/workers/Handle.h b/src/workers/Handle.h index 4bb899f9..50c7a2b4 100644 --- a/src/workers/Handle.h +++ b/src/workers/Handle.h @@ -27,35 +27,48 @@ #include #include +#include #include +#include +#include "core/HasherConfig.h" -#include "interfaces/IThread.h" - +#include "crypto/argon2_hasher/common/common.h" +#include "crypto/argon2_hasher/hash/Hasher.h" class IWorker; - class Handle { public: - Handle(xmrig::IThread *config, uint32_t offset, size_t totalWays); + Handle(xmrig::Config *config, xmrig::HasherConfig *hasherConfig, uint32_t offset); + + struct HandleArg { + Handle *handle; + int workerId; + }; + void join(); void start(void (*callback) (void *)); - inline IWorker *worker() const { return m_worker; } - inline size_t threadId() const { return m_config->index(); } - inline size_t totalWays() const { return m_totalWays; } + inline std::vector &workers() { return m_workers; } + inline size_t hasherId() const { return m_hasherConfig->index(); } + inline size_t parallelism(int workerIdx) const { return m_hasher != nullptr ? m_hasher->parallelism(workerIdx) : 0; } + inline size_t computingThreads() const { return m_hasher != nullptr ? m_hasher->computingThreads() : 0; } inline uint32_t offset() const { return m_offset; } - inline void setWorker(IWorker *worker) { assert(worker != nullptr); m_worker = worker; } - inline xmrig::IThread *config() const { return m_config; } + inline void addWorker(IWorker *worker) { assert(worker != nullptr); m_workers.push_back(worker); } + inline xmrig::HasherConfig *config() const { return m_hasherConfig; } + inline Hasher *hasher() const { return m_hasher; } private: - IWorker *m_worker; - size_t m_totalWays; + std::vector m_threads; + std::vector m_workers; + + Hasher *m_hasher; uint32_t m_offset; - uv_thread_t m_thread; - xmrig::IThread *m_config; + + xmrig::HasherConfig *m_hasherConfig; + xmrig::Config *m_config; }; diff --git a/src/workers/Hashrate.cpp b/src/workers/Hashrate.cpp index 2a750318..dcb4982e 100644 --- a/src/workers/Hashrate.cpp +++ b/src/workers/Hashrate.cpp @@ -33,11 +33,12 @@ #include "core/Config.h" #include "core/Controller.h" #include "workers/Hashrate.h" +#include "workers/Handle.h" inline static const char *format(double h, char *buf, size_t size) { - if (isnormal(h)) { + if (std::isnormal(h)) { snprintf(buf, size, "%03.1f", h); return buf; } @@ -46,19 +47,26 @@ inline static const char *format(double h, char *buf, size_t size) } -Hashrate::Hashrate(size_t threads, xmrig::Controller *controller) : +Hashrate::Hashrate(const std::vector &hashers, xmrig::Controller *controller) : m_highest(0.0), - m_threads(threads), m_controller(controller) { - m_counts = new uint64_t*[threads]; - m_timestamps = new uint64_t*[threads]; - m_top = new uint32_t[threads]; + m_hashers = hashers.size(); + m_workers = new size_t[m_hashers]; + m_counts = new uint64_t**[m_hashers]; + m_timestamps = new uint64_t**[m_hashers]; + m_top = new uint32_t*[m_hashers]; - for (size_t i = 0; i < threads; i++) { - m_counts[i] = new uint64_t[kBucketSize](); - m_timestamps[i] = new uint64_t[kBucketSize](); - m_top[i] = 0; + for (size_t i = 0; i < hashers.size(); i++) { + m_workers[i] = hashers[i]->hasher()->deviceCount(); + m_counts[i] = new uint64_t*[m_workers[i]]; + m_timestamps[i] = new uint64_t*[m_workers[i]]; + m_top[i] = new uint32_t[m_workers[i]]; + for (size_t j = 0; j < m_workers[i]; j++) { + m_counts[i][j] = new uint64_t[kBucketSize](); + m_timestamps[i][j] = new uint64_t[kBucketSize](); + m_top[i][j] = 0; + } } const int printTime = controller->config()->printTime(); @@ -77,10 +85,12 @@ double Hashrate::calc(size_t ms) const double result = 0.0; double data; - for (size_t i = 0; i < m_threads; ++i) { - data = calc(i, ms); - if (isnormal(data)) { - result += data; + for (size_t i = 0; i < m_hashers; ++i) { + for(size_t j = 0; j < m_workers[i]; j++) { + data = calc(i, j, ms); + if (std::isnormal(data)) { + result += data; + } } } @@ -88,10 +98,12 @@ double Hashrate::calc(size_t ms) const } -double Hashrate::calc(size_t threadId, size_t ms) const +double Hashrate::calc(size_t hasherId, size_t workerId, size_t ms) const { - assert(threadId < m_threads); - if (threadId >= m_threads) { + assert(hasherId < m_hashers); + assert(workerId < m_workers[hasherId]); + + if (hasherId >= m_hashers || workerId >= m_workers[hasherId]) { return nan(""); } @@ -105,24 +117,24 @@ double Hashrate::calc(size_t threadId, size_t ms) const bool haveFullSet = false; for (size_t i = 1; i < kBucketSize; i++) { - const size_t idx = (m_top[threadId] - i) & kBucketMask; + const size_t idx = (m_top[hasherId][workerId] - i) & kBucketMask; - if (m_timestamps[threadId][idx] == 0) { + if (m_timestamps[hasherId][workerId][idx] == 0) { break; } if (lastestStamp == 0) { - lastestStamp = m_timestamps[threadId][idx]; - lastestHashCnt = m_counts[threadId][idx]; + lastestStamp = m_timestamps[hasherId][workerId][idx]; + lastestHashCnt = m_counts[hasherId][workerId][idx]; } - if (now - m_timestamps[threadId][idx] > ms) { + if (now - m_timestamps[hasherId][workerId][idx] > ms) { haveFullSet = true; break; } - earliestStamp = m_timestamps[threadId][idx]; - earliestHashCount = m_counts[threadId][idx]; + earliestStamp = m_timestamps[hasherId][workerId][idx]; + earliestHashCount = m_counts[hasherId][workerId][idx]; } if (!haveFullSet || earliestStamp == 0 || lastestStamp == 0) { @@ -142,13 +154,13 @@ double Hashrate::calc(size_t threadId, size_t ms) const } -void Hashrate::add(size_t threadId, uint64_t count, uint64_t timestamp) +void Hashrate::add(size_t hasherId, size_t workerId, uint64_t count, uint64_t timestamp) { - const size_t top = m_top[threadId]; - m_counts[threadId][top] = count; - m_timestamps[threadId][top] = timestamp; + const size_t top = m_top[hasherId][workerId]; + m_counts[hasherId][workerId][top] = count; + m_timestamps[hasherId][workerId][top] = timestamp; - m_top[threadId] = (top + 1) & kBucketMask; + m_top[hasherId][workerId] = (top + 1) & kBucketMask; } @@ -178,7 +190,7 @@ void Hashrate::stop() void Hashrate::updateHighest() { double highest = calc(ShortInterval); - if (isnormal(highest) && highest > m_highest) { + if (std::isnormal(highest) && highest > m_highest) { m_highest = highest; } } diff --git a/src/workers/Hashrate.h b/src/workers/Hashrate.h index e766f117..a1f8733f 100644 --- a/src/workers/Hashrate.h +++ b/src/workers/Hashrate.h @@ -32,7 +32,7 @@ namespace xmrig { class Controller; } - +class Handle; class Hashrate { @@ -43,16 +43,15 @@ public: LargeInterval = 900000 }; - Hashrate(size_t threads, xmrig::Controller *controller); + Hashrate(const std::vector &workers, xmrig::Controller *controller); double calc(size_t ms) const; - double calc(size_t threadId, size_t ms) const; - void add(size_t threadId, uint64_t count, uint64_t timestamp); + double calc(size_t hasherId, size_t workerId, size_t ms) const; + void add(size_t hasherId, size_t workerId, uint64_t count, uint64_t timestamp); void print() const; void stop(); void updateHighest(); inline double highest() const { return m_highest; } - inline size_t threads() const { return m_threads; } static const char *format(double h, char *buf, size_t size); @@ -63,10 +62,11 @@ private: constexpr static size_t kBucketMask = kBucketSize - 1; double m_highest; - size_t m_threads; - uint32_t* m_top; - uint64_t** m_counts; - uint64_t** m_timestamps; + size_t m_hashers; + size_t* m_workers; + uint32_t** m_top; + uint64_t*** m_counts; + uint64_t*** m_timestamps; uv_timer_t m_timer; xmrig::Controller *m_controller; }; diff --git a/src/workers/MultiWorker.cpp b/src/workers/MultiWorker.cpp deleted file mode 100644 index 02eec378..00000000 --- a/src/workers/MultiWorker.cpp +++ /dev/null @@ -1,273 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - - -#include - - -#include "crypto/CryptoNight_test.h" -#include "common/log/Log.h" -#include "workers/CpuThread.h" -#include "workers/MultiWorker.h" -#include "workers/Workers.h" - - -template -MultiWorker::MultiWorker(Handle *handle) - : Worker(handle) -{ - m_memory = Mem::create(m_ctx, m_thread->algorithm(), N); -} - - -template -MultiWorker::~MultiWorker() -{ - Mem::release(m_ctx, N, m_memory); -} - - -template -bool MultiWorker::selfTest() -{ - using namespace xmrig; - - if (m_thread->algorithm() == CRYPTONIGHT) { - const bool rc = verify(VARIANT_0, test_output_v0) && - verify(VARIANT_1, test_output_v1) && - verify(VARIANT_2, test_output_v2) && - verify(VARIANT_XTL, test_output_xtl) && - verify(VARIANT_MSR, test_output_msr) && - verify(VARIANT_XAO, test_output_xao) && - verify(VARIANT_RTO, test_output_rto) && - verify(VARIANT_HALF, test_output_half) && - verify2(VARIANT_WOW, test_output_wow) && - verify2(VARIANT_4, test_output_r) && - verify(VARIANT_RWZ, test_output_rwz) && - verify(VARIANT_ZLS, test_output_zls) && - verify(VARIANT_DOUBLE, test_output_double); - -# ifndef XMRIG_NO_CN_GPU - if (!rc || N > 1) { - return rc; - } - - return verify(VARIANT_GPU, test_output_gpu); -# else - return rc; -# endif - } - -# ifndef XMRIG_NO_AEON - if (m_thread->algorithm() == CRYPTONIGHT_LITE) { - return verify(VARIANT_0, test_output_v0_lite) && - verify(VARIANT_1, test_output_v1_lite); - } -# endif - -# ifndef XMRIG_NO_SUMO - if (m_thread->algorithm() == CRYPTONIGHT_HEAVY) { - return verify(VARIANT_0, test_output_v0_heavy) && - verify(VARIANT_XHV, test_output_xhv_heavy) && - verify(VARIANT_TUBE, test_output_tube_heavy); - } -# endif - -# ifndef XMRIG_NO_CN_PICO - if (m_thread->algorithm() == CRYPTONIGHT_PICO) { - return verify(VARIANT_TRTL, test_output_pico_trtl); - } -# endif - - return false; -} - - -template -void MultiWorker::start() -{ - while (Workers::sequence() > 0) { - if (Workers::isPaused()) { - do { - std::this_thread::sleep_for(std::chrono::milliseconds(200)); - } - while (Workers::isPaused()); - - if (Workers::sequence() == 0) { - break; - } - - consumeJob(); - } - - while (!Workers::isOutdated(m_sequence)) { - if ((m_count & 0x7) == 0) { - storeStats(); - } - - m_thread->fn(m_state.job.algorithm().variant())(m_state.blob, m_state.job.size(), m_hash, m_ctx, m_state.job.height()); - - for (size_t i = 0; i < N; ++i) { - if (*reinterpret_cast(m_hash + (i * 32) + 24) < m_state.job.target()) { - Workers::submit(xmrig::JobResult(m_state.job.poolId(), m_state.job.id(), m_state.job.clientId(), *nonce(i), m_hash + (i * 32), m_state.job.diff(), m_state.job.algorithm())); - } - - *nonce(i) += 1; - } - - m_count += N; - - std::this_thread::yield(); - } - - consumeJob(); - } -} - - -template -bool MultiWorker::resume(const xmrig::Job &job) -{ - if (m_state.job.poolId() == -1 && job.poolId() >= 0 && job.id() == m_pausedState.job.id()) { - m_state = m_pausedState; - return true; - } - - return false; -} - - -template -bool MultiWorker::verify(xmrig::Variant variant, const uint8_t *referenceValue) -{ - - xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant); - if (!func) { - return false; - } - - func(test_input, 76, m_hash, m_ctx, 0); - return memcmp(m_hash, referenceValue, sizeof m_hash) == 0; -} - - -template -bool MultiWorker::verify2(xmrig::Variant variant, const uint8_t *referenceValue) -{ - xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant); - if (!func) { - return false; - } - - for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) { - const size_t size = cn_r_test_input[i].size; - for (size_t k = 0; k < N; ++k) { - memcpy(m_state.blob + (k * size), cn_r_test_input[i].data, size); - } - - func(m_state.blob, size, m_hash, m_ctx, cn_r_test_input[i].height); - - for (size_t k = 0; k < N; ++k) { - if (memcmp(m_hash + k * 32, referenceValue + i * 32, sizeof m_hash / N) != 0) { - return false; - } - } - } - - return true; -} - - -template<> -bool MultiWorker<1>::verify2(xmrig::Variant variant, const uint8_t *referenceValue) -{ - xmrig::CpuThread::cn_hash_fun func = m_thread->fn(variant); - if (!func) { - return false; - } - - for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) { - func(cn_r_test_input[i].data, cn_r_test_input[i].size, m_hash, m_ctx, cn_r_test_input[i].height); - - if (memcmp(m_hash, referenceValue + i * 32, sizeof m_hash) != 0) { - return false; - } - } - - return true; -} - - -template -void MultiWorker::consumeJob() -{ - xmrig::Job job = Workers::job(); - m_sequence = Workers::sequence(); - if (m_state.job == job) { - return; - } - - save(job); - - if (resume(job)) { - return; - } - - m_state.job = job; - - const size_t size = m_state.job.size(); - memcpy(m_state.blob, m_state.job.blob(), m_state.job.size()); - - if (N > 1) { - for (size_t i = 1; i < N; ++i) { - memcpy(m_state.blob + (i * size), m_state.blob, size); - } - } - - for (size_t i = 0; i < N; ++i) { - if (m_state.job.isNicehash()) { - *nonce(i) = (*nonce(i) & 0xff000000U) + (0xffffffU / m_totalWays * (m_offset + i)); - } - else { - *nonce(i) = 0xffffffffU / m_totalWays * (m_offset + i); - } - } -} - - -template -void MultiWorker::save(const xmrig::Job &job) -{ - if (job.poolId() == -1 && m_state.job.poolId() >= 0) { - m_pausedState = m_state; - } -} - - -template class MultiWorker<1>; -template class MultiWorker<2>; -template class MultiWorker<3>; -template class MultiWorker<4>; -template class MultiWorker<5>; diff --git a/src/workers/MultiWorker.h b/src/workers/MultiWorker.h deleted file mode 100644 index b7e4c8ca..00000000 --- a/src/workers/MultiWorker.h +++ /dev/null @@ -1,76 +0,0 @@ -/* XMRig - * Copyright 2010 Jeff Garzik - * Copyright 2012-2014 pooler - * Copyright 2014 Lucas Jones - * Copyright 2014-2016 Wolf9466 - * Copyright 2016 Jay D Dee - * Copyright 2017-2018 XMR-Stak , - * Copyright 2018 Lee Clagett - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef XMRIG_MULTIWORKER_H -#define XMRIG_MULTIWORKER_H - - -#include "common/net/Job.h" -#include "Mem.h" -#include "net/JobResult.h" -#include "workers/Worker.h" - - -class Handle; - - -template -class MultiWorker : public Worker -{ -public: - MultiWorker(Handle *handle); - ~MultiWorker(); - -protected: - bool selfTest() override; - void start() override; - -private: - bool resume(const xmrig::Job &job); - bool verify(xmrig::Variant variant, const uint8_t *referenceValue); - bool verify2(xmrig::Variant variant, const uint8_t *referenceValue); - void consumeJob(); - void save(const xmrig::Job &job); - - inline uint32_t *nonce(size_t index) - { - return reinterpret_cast(m_state.blob + (index * m_state.job.size()) + 39); - } - - struct State - { - alignas(16) uint8_t blob[xmrig::Job::kMaxBlobSize * N]; - xmrig::Job job; - }; - - - cryptonight_ctx *m_ctx[N]; - State m_pausedState; - State m_state; - uint8_t m_hash[N * 32]; -}; - - -#endif /* XMRIG_MULTIWORKER_H */ diff --git a/src/workers/Worker.cpp b/src/workers/Worker.cpp index c569908c..40cb338d 100644 --- a/src/workers/Worker.cpp +++ b/src/workers/Worker.cpp @@ -26,29 +26,25 @@ #include "common/cpu/Cpu.h" #include "common/Platform.h" -#include "workers/CpuThread.h" +#include "core/HasherConfig.h" #include "workers/Handle.h" #include "workers/Worker.h" +#include "workers/Workers.h" -Worker::Worker(Handle *handle) : - m_id(handle->threadId()), - m_totalWays(handle->totalWays()), - m_offset(handle->offset()), - m_hashCount(0), - m_timestamp(0), - m_count(0), - m_sequence(0), - m_thread(static_cast(handle->config())) +Worker::Worker(Handle *handle, int workerIdx) : + m_id(workerIdx), + m_hashCount(0), + m_timestamp(0), + m_count(0), + m_sequence(0), + m_config(static_cast(handle->config())), + m_hasher(handle->hasher()) { - if (xmrig::Cpu::info()->threads() > 1 && m_thread->affinity() != -1L) { - Platform::setThreadAffinity(m_thread->affinity()); - } - - Platform::setThreadPriority(m_thread->priority()); + m_offset = handle->offset() + m_id; + m_hash = new uint8_t[m_hasher->parallelism(m_id) * 36]; } - void Worker::storeStats() { using namespace std::chrono; @@ -57,3 +53,103 @@ void Worker::storeStats() m_hashCount.store(m_count, std::memory_order_relaxed); m_timestamp.store(timestamp, std::memory_order_relaxed); } + +bool Worker::selfTest() +{ + return true; +} + +void Worker::start() { + if(m_hasher->type() == "CPU" && m_hasher->subType() == "CPU") { + if (xmrig::Cpu::info()->threads() > 1 && m_config->getCPUAffinity(m_id) != -1L) { + Platform::setThreadAffinity(m_config->getCPUAffinity(m_id)); + } + } + + Platform::setThreadPriority(m_config->priority()); + int parallelism = m_hasher->parallelism(m_id); + + while (Workers::sequence() > 0) { + if (Workers::isPaused()) { + do { + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + } + while (Workers::isPaused()); + + if (Workers::sequence() == 0) { + break; + } + + consumeJob(); + } + + while (!Workers::isOutdated(m_sequence)) { + int hashCount = m_hasher->compute(m_id, m_state.blob, m_state.job.size(), m_hash); + + if(hashCount == parallelism) { + + for (size_t i = 0; i < parallelism; ++i) { + if (*reinterpret_cast(m_hash + (i * 36) + 24) < m_state.job.target()) { + Workers::submit(xmrig::JobResult(m_state.job.poolId(), m_state.job.id(), m_state.job.clientId(), + *reinterpret_cast(m_hash + (i * 36) + 32), m_hash + (i * 36), m_state.job.diff(), + m_state.job.algorithm())); + } + } + + m_count += parallelism; + } + + storeStats(); + + std::this_thread::yield(); + } + + consumeJob(); + } +} + +bool Worker::consumeJob() { + xmrig::Job job = Workers::job(); + m_sequence = Workers::sequence(); + if (m_state.job == job) { + return false; + } + + save(job); + + if (resume(job)) { + return false; + } + + m_state.job = job; + + const size_t size = m_state.job.size(); + memcpy(m_state.blob, m_state.job.blob(), size); + + uint32_t *nonce = reinterpret_cast(m_state.blob + 39); + if (m_state.job.isNicehash()) { + *nonce = (*nonce & 0xff000000U) + (0xffffffU / Workers::totalThreads() * m_offset); + } + else { + *nonce = 0xffffffffU / Workers::totalThreads() * m_offset; + } + + return true; +} + +bool Worker::resume(const xmrig::Job &job) +{ + if (m_state.job.poolId() == -1 && job.poolId() >= 0 && job.id() == m_pausedState.job.id()) { + m_state = m_pausedState; + return true; + } + + return false; +} + +void Worker::save(const xmrig::Job &job) +{ + if (job.poolId() == -1 && m_state.job.poolId() >= 0) { + m_pausedState = m_state; + } +} diff --git a/src/workers/Worker.h b/src/workers/Worker.h index 73e25033..c34029af 100644 --- a/src/workers/Worker.h +++ b/src/workers/Worker.h @@ -30,39 +30,54 @@ #include "interfaces/IWorker.h" -#include "Mem.h" - +#include "common/net/Job.h" +#include "net/JobResult.h" class Handle; namespace xmrig { - class CpuThread; + class HasherConfig; } class Worker : public IWorker { public: - Worker(Handle *handle); + Worker(Handle *handle, int workerIdx); - inline const MemInfo &memory() const { return m_memory; } inline size_t id() const override { return m_id; } inline uint64_t hashCount() const override { return m_hashCount.load(std::memory_order_relaxed); } inline uint64_t timestamp() const override { return m_timestamp.load(std::memory_order_relaxed); } + inline size_t parallelism() const override { return m_hasher->parallelism(m_id); } -protected: + bool selfTest() override; + void start() override; + +private: void storeStats(); + bool consumeJob(); + + bool resume(const xmrig::Job &job); + void save(const xmrig::Job &job); + + struct State + { + alignas(16) uint8_t blob[xmrig::Job::kMaxBlobSize]; + xmrig::Job job; + }; const size_t m_id; - const size_t m_totalWays; - const uint32_t m_offset; - MemInfo m_memory; + uint32_t m_offset; std::atomic m_hashCount; std::atomic m_timestamp; + Hasher *m_hasher; uint64_t m_count; uint64_t m_sequence; - xmrig::CpuThread *m_thread; + xmrig::HasherConfig *m_config; + State m_pausedState; + State m_state; + uint8_t *m_hash; }; diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index f718a52c..77382c9d 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -28,18 +28,17 @@ #include "api/Api.h" +#include "api/ApiRouter.h" #include "common/log/Log.h" #include "core/Config.h" #include "core/Controller.h" -#include "crypto/CryptoNight_constants.h" +#include "crypto/Argon2_constants.h" #include "interfaces/IJobResultListener.h" -#include "interfaces/IThread.h" -#include "Mem.h" #include "rapidjson/document.h" #include "workers/Handle.h" #include "workers/Hashrate.h" -#include "workers/MultiWorker.h" #include "workers/Workers.h" +#include "workers/Worker.h" bool Workers::m_active = false; @@ -58,6 +57,7 @@ uv_mutex_t Workers::m_mutex; uv_rwlock_t Workers::m_rwlock; uv_timer_t Workers::m_timer; xmrig::Controller *Workers::m_controller = nullptr; +std::atomic Workers::m_totalThreads; xmrig::Job Workers::job() @@ -70,26 +70,6 @@ xmrig::Job Workers::job() } -size_t Workers::hugePages() -{ - uv_mutex_lock(&m_mutex); - const size_t hugePages = m_status.hugePages; - uv_mutex_unlock(&m_mutex); - - return hugePages; -} - - -size_t Workers::threads() -{ - uv_mutex_lock(&m_mutex); - const size_t threads = m_status.threads; - uv_mutex_unlock(&m_mutex); - - return threads; -} - - void Workers::printHashrate(bool detail) { assert(m_controller != nullptr); @@ -103,19 +83,23 @@ void Workers::printHashrate(bool detail) char num2[8] = { 0 }; char num3[8] = { 0 }; - Log::i()->text("%s| THREAD | AFFINITY | 10s H/s | 60s H/s | 15m H/s |", isColors ? "\x1B[1;37m" : ""); + Log::i()->text("%s| TYPE | ID | 10s H/s | 60s H/s | 15m H/s |", isColors ? "\x1B[1;37m" : ""); size_t i = 0; - for (const xmrig::IThread *thread : m_controller->config()->threads()) { - Log::i()->text("| %6zu | %8" PRId64 " | %7s | %7s | %7s |", - thread->index(), - thread->affinity(), - Hashrate::format(m_hashrate->calc(thread->index(), Hashrate::ShortInterval), num1, sizeof num1), - Hashrate::format(m_hashrate->calc(thread->index(), Hashrate::MediumInterval), num2, sizeof num2), - Hashrate::format(m_hashrate->calc(thread->index(), Hashrate::LargeInterval), num3, sizeof num3) - ); - - i++; + for (const Handle *worker : m_workers) { + for(int i = 0; i < worker->hasher()->deviceCount(); i++) { + Log::i()->text("| %7s | %s%-2d | %7s | %7s | %7s |", + worker->hasher()->subType().c_str(), + worker->hasher()->subType(true).c_str(), + i, + Hashrate::format(m_hashrate->calc(worker->hasherId(), i, Hashrate::ShortInterval), num1, + sizeof num1), + Hashrate::format(m_hashrate->calc(worker->hasherId(), i, Hashrate::MediumInterval), num2, + sizeof num2), + Hashrate::format(m_hashrate->calc(worker->hasherId(), i, Hashrate::LargeInterval), num3, + sizeof num3) + ); + } } } @@ -159,38 +143,22 @@ void Workers::setJob(const xmrig::Job &job, bool donate) } -void Workers::start(xmrig::Controller *controller) +bool Workers::start(xmrig::Controller *controller) { -# ifdef APP_DEBUG - LOG_NOTICE("THREADS ------------------------------------------------------------------"); - for (const xmrig::IThread *thread : controller->config()->threads()) { - thread->print(); - } - LOG_NOTICE("--------------------------------------------------------------------------"); -# endif - -# ifndef XMRIG_NO_ASM - xmrig::CpuThread::patchAsmVariants(); -# endif - m_controller = controller; - const std::vector &threads = controller->config()->threads(); + const std::vector &hashers = controller->config()->hasherConfigs(); m_status.algo = controller->config()->algorithm().algo(); + m_status.variant = controller->config()->algorithm().variant(); m_status.colors = controller->config()->isColors(); - m_status.threads = threads.size(); - - for (const xmrig::IThread *thread : threads) { - m_status.ways += thread->multiway(); - } - - m_hashrate = new Hashrate(threads.size(), controller); + m_status.hashers = hashers.size(); uv_mutex_init(&m_mutex); uv_rwlock_init(&m_rwlock); m_sequence = 1; m_paused = 1; + m_totalThreads = 0; uv_async_init(uv_default_loop(), &m_async, Workers::onResult); uv_timer_init(uv_default_loop(), &m_timer); @@ -198,15 +166,29 @@ void Workers::start(xmrig::Controller *controller) uint32_t offset = 0; - for (xmrig::IThread *thread : threads) { - Handle *handle = new Handle(thread, offset, m_status.ways); - offset += thread->multiway(); + for (xmrig::HasherConfig *hasherConfig : hashers) { + Handle *handle = new Handle(controller->config(), hasherConfig, offset); + if(handle->hasher() != nullptr) { + offset += handle->computingThreads(); + m_totalThreads += handle->computingThreads(); - m_workers.push_back(handle); - handle->start(Workers::onReady); + m_workers.push_back(handle); + handle->start(Workers::onReady); + } } - controller->save(); + if(m_workers.size() > 0) { + Log::i()->text(m_status.colors ? GREEN_BOLD(" * Hashers initialization complete * ") : " * Hashers initialization complete * "); + + m_hashrate = new Hashrate(m_workers, controller); + + controller->save(); + } + else { + return false; + } + + return true; } @@ -236,60 +218,49 @@ void Workers::submit(const xmrig::JobResult &result) #ifndef XMRIG_NO_API -void Workers::threadsSummary(rapidjson::Document &doc) +void Workers::hashersSummary(rapidjson::Document &doc) { - uv_mutex_lock(&m_mutex); - const uint64_t pages[2] = { m_status.hugePages, m_status.pages }; - const uint64_t memory = m_status.ways * xmrig::cn_select_memory(m_status.algo); - uv_mutex_unlock(&m_mutex); - auto &allocator = doc.GetAllocator(); - rapidjson::Value hugepages(rapidjson::kArrayType); - hugepages.PushBack(pages[0], allocator); - hugepages.PushBack(pages[1], allocator); + rapidjson::Value hashers(rapidjson::kArrayType); - doc.AddMember("hugepages", hugepages, allocator); - doc.AddMember("memory", memory, allocator); + for(int i = 0; i < m_workers.size(); i++) { + Handle *worker = m_workers[i]; + for(int j=0; j < worker->hasher()->deviceCount(); j++) { + rapidjson::Value hasherDoc(rapidjson::kObjectType); + + xmrig::String type = worker->hasher()->type().data(); + xmrig::String id = (worker->hasher()->subType(true) + to_string(j)).data(); + + hasherDoc.AddMember("type", type.toJSON(doc), allocator); + hasherDoc.AddMember("id", id.toJSON(doc), allocator); + + rapidjson::Value hashrateEntry(rapidjson::kArrayType); + hashrateEntry.PushBack(ApiRouter::normalize(m_hashrate->calc(i, j, Hashrate::ShortInterval)), allocator); + hashrateEntry.PushBack(ApiRouter::normalize(m_hashrate->calc(i, j, Hashrate::MediumInterval)), allocator); + hashrateEntry.PushBack(ApiRouter::normalize(m_hashrate->calc(i, j, Hashrate::LargeInterval)), allocator); + + hasherDoc.AddMember("hashrate", hashrateEntry, allocator); + + hashers.PushBack(hasherDoc, allocator); + } + } + + doc.AddMember("hashers", hashers, allocator); } #endif void Workers::onReady(void *arg) { - auto handle = static_cast(arg); + auto handleArg = static_cast(arg); - IWorker *worker = nullptr; + IWorker *worker = new Worker(handleArg->handle, handleArg->workerId); - switch (handle->config()->multiway()) { - case 1: - worker = new MultiWorker<1>(handle); - break; - - case 2: - worker = new MultiWorker<2>(handle); - break; - - case 3: - worker = new MultiWorker<3>(handle); - break; - - case 4: - worker = new MultiWorker<4>(handle); - break; - - case 5: - worker = new MultiWorker<5>(handle); - break; - - default: - break; - } - - handle->setWorker(worker); + handleArg->handle->addWorker(worker); if (!worker->selfTest()) { - LOG_ERR("thread %zu error: \"hash self-test failed\".", handle->worker()->id()); + LOG_ERR("hasher %zu error: \"hash self-test failed\".", worker->id()); return; } @@ -319,12 +290,28 @@ void Workers::onResult(uv_async_t *handle) void Workers::onTick(uv_timer_t *handle) { - for (Handle *handle : m_workers) { - if (!handle->worker()) { - return; - } + for (int h =0; h < m_workers.size(); h++) { + Handle *handle = m_workers[h]; - m_hashrate->add(handle->threadId(), handle->worker()->hashCount(), handle->worker()->timestamp()); + std::vector internalWorkers = handle->workers(); + if (internalWorkers.size() == 0) + return; + + int deviceCount = handle->hasher()->deviceCount(); + int computingThreads = internalWorkers.size(); + int multiplier = computingThreads / deviceCount; + + for(int i = 0; i < deviceCount; i++) { + uint64_t hashCount = 0; + uint64_t timeStamp = 0; + + for(int j = 0; j < multiplier; j++) { + hashCount += internalWorkers[i * multiplier + j]->hashCount(); + timeStamp = max(timeStamp, internalWorkers[i * multiplier + j]->timestamp()); + } + + m_hashrate->add(h, i, hashCount, timeStamp); + } } if ((m_ticks++ & 0xF) == 0) { @@ -339,23 +326,19 @@ void Workers::start(IWorker *worker) uv_mutex_lock(&m_mutex); m_status.started++; - m_status.pages += w->memory().pages; - m_status.hugePages += w->memory().hugePages; - if (m_status.started == m_status.threads) { - const double percent = (double) m_status.hugePages / m_status.pages * 100.0; - const size_t memory = m_status.ways * xmrig::cn_select_memory(m_status.algo) / 1024; - - if (m_status.colors) { - LOG_INFO(GREEN_BOLD("READY (CPU)") " threads " CYAN_BOLD("%zu(%zu)") " huge pages %s%zu/%zu %1.0f%%\x1B[0m memory " CYAN_BOLD("%zu KB") "", - m_status.threads, m_status.ways, + if (m_status.started == m_status.hashers) { +/// TODO better status description +/* if (m_status.colors) { + LOG_INFO(GREEN_BOLD("READY (CPU)") " threads " CYAN_BOLD("%zu") " huge pages %s%zu/%zu %1.0f%%\x1B[0m memory " CYAN_BOLD("%.2f KB") "", + m_status.hashers, (m_status.hugePages == m_status.pages ? "\x1B[1;32m" : (m_status.hugePages == 0 ? "\x1B[1;31m" : "\x1B[1;33m")), m_status.hugePages, m_status.pages, percent, memory); } else { - LOG_INFO("READY (CPU) threads %zu(%zu) huge pages %zu/%zu %1.0f%% memory %zu KB", - m_status.threads, m_status.ways, m_status.hugePages, m_status.pages, percent, memory); - } + LOG_INFO("READY (CPU) threads %zu huge pages %zu/%zu %1.0f%% memory %zu KB", + m_status.hashers, m_status.hugePages, m_status.pages, percent, memory); + } */ } uv_mutex_unlock(&m_mutex); diff --git a/src/workers/Workers.h b/src/workers/Workers.h index a9b8e695..8c42c8b3 100644 --- a/src/workers/Workers.h +++ b/src/workers/Workers.h @@ -51,12 +51,10 @@ class Workers { public: static xmrig::Job job(); - static size_t hugePages(); - static size_t threads(); static void printHashrate(bool detail); static void setEnabled(bool enabled); static void setJob(const xmrig::Job &job, bool donate); - static void start(xmrig::Controller *controller); + static bool start(xmrig::Controller *controller); static void stop(); static void submit(const xmrig::JobResult &result); @@ -67,9 +65,11 @@ public: static inline uint64_t sequence() { return m_sequence.load(std::memory_order_relaxed); } static inline void pause() { m_active = false; m_paused = 1; m_sequence++; } static inline void setListener(xmrig::IJobResultListener *listener) { m_listener = listener; } + static inline int totalThreads() { return m_totalThreads.load(std::memory_order_relaxed); } + static inline std::vector workers() { return m_workers; } # ifndef XMRIG_NO_API - static void threadsSummary(rapidjson::Document &doc); + static void hashersSummary(rapidjson::Document &doc); # endif private: @@ -82,22 +82,17 @@ private: { public: inline LaunchStatus() : - colors(true), - hugePages(0), - pages(0), - started(0), - threads(0), - ways(0), - algo(xmrig::CRYPTONIGHT) + colors(true), + started(0), + hashers(0), + algo(xmrig::ARGON2) {} bool colors; - size_t hugePages; - size_t pages; size_t started; - size_t threads; - size_t ways; + size_t hashers; xmrig::Algo algo; + xmrig::Variant variant; }; static bool m_active; @@ -110,6 +105,7 @@ private: static std::atomic m_sequence; static std::list m_queue; static std::vector m_workers; + static std::atomic m_totalThreads; static uint64_t m_ticks; static uv_async_t m_async; static uv_mutex_t m_mutex;