Integration of cnv2 and asm optimizations for cnv1&cnv2 thx to @SChernykh (#185)
This commit is contained in:
parent
fe5f6f0673
commit
89e210ddd7
33 changed files with 4957 additions and 862 deletions
|
@ -10,6 +10,7 @@ option(WITH_HTTPD "HTTP REST API" OFF)
|
||||||
option(WITH_CC_CLIENT "CC Client" ON)
|
option(WITH_CC_CLIENT "CC Client" ON)
|
||||||
option(WITH_CC_SERVER "CC Server" ON)
|
option(WITH_CC_SERVER "CC Server" ON)
|
||||||
option(WITH_TLS "TLS support" ON)
|
option(WITH_TLS "TLS support" ON)
|
||||||
|
option(WITH_ASM "ASM optimizations" ON)
|
||||||
option(BUILD_STATIC "Build static binary" OFF)
|
option(BUILD_STATIC "Build static binary" OFF)
|
||||||
set(Boost_USE_STATIC_RUNTIME ON)
|
set(Boost_USE_STATIC_RUNTIME ON)
|
||||||
set(Boost_USE_STATIC_LIBS ON)
|
set(Boost_USE_STATIC_LIBS ON)
|
||||||
|
@ -128,7 +129,7 @@ find_package(UV REQUIRED)
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
add_definitions(-DBOOST_ALL_NO_LIB)
|
add_definitions(-DBOOST_ALL_NO_LIB)
|
||||||
endif()
|
endif(WIN32)
|
||||||
|
|
||||||
find_package(Boost 1.63.0 COMPONENTS system REQUIRED)
|
find_package(Boost 1.63.0 COMPONENTS system REQUIRED)
|
||||||
|
|
||||||
|
@ -144,10 +145,10 @@ if (WITH_TLS)
|
||||||
set(SOURCES_SSL_TLS src/net/BoostTlsConnection.cpp)
|
set(SOURCES_SSL_TLS src/net/BoostTlsConnection.cpp)
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR "OpenSSL NOT found: use `-DWITH_TLS=OFF` to build without TLS support")
|
message(FATAL_ERROR "OpenSSL NOT found: use `-DWITH_TLS=OFF` to build without TLS support")
|
||||||
endif()
|
endif(OPENSSL_FOUND)
|
||||||
else()
|
else()
|
||||||
add_definitions(/DXMRIG_NO_TLS)
|
add_definitions(/DXMRIG_NO_TLS)
|
||||||
endif()
|
endif(WITH_TLS)
|
||||||
|
|
||||||
if (WITH_LIBCPUID)
|
if (WITH_LIBCPUID)
|
||||||
add_subdirectory(src/3rdparty/libcpuid)
|
add_subdirectory(src/3rdparty/libcpuid)
|
||||||
|
@ -162,8 +163,8 @@ else()
|
||||||
set(SOURCES_CPUID src/Cpu_arm.cpp)
|
set(SOURCES_CPUID src/Cpu_arm.cpp)
|
||||||
else()
|
else()
|
||||||
set(SOURCES_CPUID src/Cpu_stub.cpp)
|
set(SOURCES_CPUID src/Cpu_stub.cpp)
|
||||||
endif()
|
endif(XMRIG_ARM)
|
||||||
endif()
|
endif(WITH_LIBCPUID)
|
||||||
|
|
||||||
CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
|
CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
|
||||||
if (HAVE_SYSLOG_H)
|
if (HAVE_SYSLOG_H)
|
||||||
|
@ -179,11 +180,11 @@ if (WITH_HTTPD)
|
||||||
set(HTTPD_SOURCES src/api/Httpd.h src/api/Httpd.cpp)
|
set(HTTPD_SOURCES src/api/Httpd.h src/api/Httpd.cpp)
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR "microhttpd NOT found: use `-DWITH_HTTPD=OFF` to build without http deamon support")
|
message(FATAL_ERROR "microhttpd NOT found: use `-DWITH_HTTPD=OFF` to build without http deamon support")
|
||||||
endif()
|
endif(MHD_FOUND)
|
||||||
else()
|
else()
|
||||||
add_definitions(/DXMRIG_NO_HTTPD)
|
add_definitions(/DXMRIG_NO_HTTPD)
|
||||||
add_definitions(/DXMRIG_NO_API)
|
add_definitions(/DXMRIG_NO_API)
|
||||||
endif()
|
endif(WITH_HTTPD)
|
||||||
|
|
||||||
if (WITH_CC_SERVER)
|
if (WITH_CC_SERVER)
|
||||||
find_package(MHD)
|
find_package(MHD)
|
||||||
|
@ -192,7 +193,7 @@ if (WITH_CC_SERVER)
|
||||||
include_directories(${MHD_INCLUDE_DIRS})
|
include_directories(${MHD_INCLUDE_DIRS})
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR "microhttpd NOT found: use `-DWITH_CC_SERVER=OFF` to build without CC Server support")
|
message(FATAL_ERROR "microhttpd NOT found: use `-DWITH_CC_SERVER=OFF` to build without CC Server support")
|
||||||
endif()
|
endif(MHD_FOUND)
|
||||||
|
|
||||||
set(SOURCES_CC_SERVER
|
set(SOURCES_CC_SERVER
|
||||||
src/cc/CCServer.cpp
|
src/cc/CCServer.cpp
|
||||||
|
@ -201,12 +202,12 @@ if (WITH_CC_SERVER)
|
||||||
src/cc/Httpd.cpp
|
src/cc/Httpd.cpp
|
||||||
src/cc/XMRigCC.cpp
|
src/cc/XMRigCC.cpp
|
||||||
)
|
)
|
||||||
endif()
|
endif(WITH_CC_SERVER)
|
||||||
|
|
||||||
if (WITH_CC_CLIENT)
|
if (WITH_CC_CLIENT)
|
||||||
set(SOURCES_CC_CLIENT
|
set(SOURCES_CC_CLIENT
|
||||||
src/cc/CCClient.cpp)
|
src/cc/CCClient.cpp)
|
||||||
endif()
|
endif(WITH_CC_CLIENT)
|
||||||
|
|
||||||
if (WITH_CC_SERVER OR WITH_CC_CLIENT)
|
if (WITH_CC_SERVER OR WITH_CC_CLIENT)
|
||||||
set(SOURCES_CC_COMMON
|
set(SOURCES_CC_COMMON
|
||||||
|
@ -215,11 +216,34 @@ if (WITH_CC_SERVER OR WITH_CC_CLIENT)
|
||||||
src/cc/GPUInfo.cpp)
|
src/cc/GPUInfo.cpp)
|
||||||
else()
|
else()
|
||||||
add_definitions(/DXMRIG_NO_CC)
|
add_definitions(/DXMRIG_NO_CC)
|
||||||
|
endif(WITH_CC_SERVER OR WITH_CC_CLIENT)
|
||||||
|
|
||||||
|
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
|
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||||
|
enable_language(ASM_MASM)
|
||||||
|
set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop.asm")
|
||||||
|
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
|
||||||
|
else()
|
||||||
|
enable_language(ASM)
|
||||||
|
|
||||||
|
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||||
|
set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop_win_gcc.S")
|
||||||
|
else()
|
||||||
|
set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop.S")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_library(xmrig_asm STATIC ${XMRIG_ASM_FILE})
|
||||||
|
set_property(TARGET xmrig_asm PROPERTY LINKER_LANGUAGE C)
|
||||||
|
else()
|
||||||
|
add_definitions(/DXMRIG_NO_ASM)
|
||||||
|
endif(WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
|
|
||||||
if (BUILD_STATIC)
|
if (BUILD_STATIC)
|
||||||
set(CMAKE_EXE_LINKER_FLAGS " -static")
|
set(CMAKE_EXE_LINKER_FLAGS " -static")
|
||||||
endif()
|
endif(BUILD_STATIC)
|
||||||
|
|
||||||
include_directories(src)
|
include_directories(src)
|
||||||
include_directories(src/3rdparty)
|
include_directories(src/3rdparty)
|
||||||
|
@ -253,6 +277,10 @@ if (WITH_TLS)
|
||||||
target_link_libraries(xmrigMiner xmrig_tls ${OPENSSL_LIBRARIES} ${EXTRA_LIBS})
|
target_link_libraries(xmrigMiner xmrig_tls ${OPENSSL_LIBRARIES} ${EXTRA_LIBS})
|
||||||
endif(WITH_TLS)
|
endif(WITH_TLS)
|
||||||
|
|
||||||
|
if (WITH_ASM)
|
||||||
|
target_link_libraries(xmrigMiner xmrig_asm)
|
||||||
|
endif(WITH_ASM)
|
||||||
|
|
||||||
add_executable(xmrigDaemon src/cc/XMRigd.cpp res/app.rc)
|
add_executable(xmrigDaemon src/cc/XMRigd.cpp res/app.rc)
|
||||||
set_target_properties(xmrigDaemon PROPERTIES OUTPUT_NAME ${DAEMON_EXECUTABLE_NAME})
|
set_target_properties(xmrigDaemon PROPERTIES OUTPUT_NAME ${DAEMON_EXECUTABLE_NAME})
|
||||||
|
|
||||||
|
@ -269,6 +297,6 @@ if (WITH_CC_SERVER AND MHD_FOUND)
|
||||||
|
|
||||||
set_target_properties(xmrig_common_cc PROPERTIES COMPILE_FLAGS "-DXMRIG_CC_SERVER ${SHARED_FLAGS}")
|
set_target_properties(xmrig_common_cc PROPERTIES COMPILE_FLAGS "-DXMRIG_CC_SERVER ${SHARED_FLAGS}")
|
||||||
set_target_properties(xmrigCCServer PROPERTIES COMPILE_FLAGS "-DXMRIG_CC_SERVER ${SHARED_FLAGS}")
|
set_target_properties(xmrigCCServer PROPERTIES COMPILE_FLAGS "-DXMRIG_CC_SERVER ${SHARED_FLAGS}")
|
||||||
endif()
|
endif(WITH_CC_SERVER AND MHD_FOUND)
|
||||||
|
|
||||||
add_subdirectory(test EXCLUDE_FROM_ALL)
|
add_subdirectory(test EXCLUDE_FROM_ALL)
|
||||||
|
|
|
@ -16,7 +16,7 @@ clone_folder: c:\xmrigCC
|
||||||
|
|
||||||
install:
|
install:
|
||||||
- mkdir c:\xmrigCC-deps
|
- mkdir c:\xmrigCC-deps
|
||||||
- curl -sL https://github.com/Bendr0id/xmrigCC-deps/releases/download/v2/xmrigCC-deps.zip -o xmrigCC-deps.zip
|
- curl -sL https://github.com/Bendr0id/xmrigCC-deps/releases/download/v3/xmrigCC-deps.zip -o xmrigCC-deps.zip
|
||||||
- 7z x xmrigCC-deps.zip -o"c:\xmrigCC-deps" -y > nul
|
- 7z x xmrigCC-deps.zip -o"c:\xmrigCC-deps" -y > nul
|
||||||
|
|
||||||
build_script:
|
build_script:
|
||||||
|
|
89
src/AsmOptimization.h
Normal file
89
src/AsmOptimization.h
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
/* XMRigCC
|
||||||
|
* Copyright 2018- BenDr0id <ben@graef.in>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ASM_OPTIMIZATION_H__
|
||||||
|
#define __ASM_OPTIMIZATION_H__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
enum AsmOptimization
|
||||||
|
{
|
||||||
|
ASM_AUTODETECT,
|
||||||
|
ASM_INTEL,
|
||||||
|
ASM_RYZEN,
|
||||||
|
ASM_NONE
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::string getAsmOptimizationName(AsmOptimization asmOptimization)
|
||||||
|
{
|
||||||
|
switch (asmOptimization)
|
||||||
|
{
|
||||||
|
case ASM_INTEL:
|
||||||
|
return "INTEL";
|
||||||
|
case ASM_RYZEN:
|
||||||
|
return "RYZEN";
|
||||||
|
case ASM_NONE:
|
||||||
|
return "OFF";
|
||||||
|
case ASM_AUTODETECT:
|
||||||
|
default:
|
||||||
|
return "-1";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline AsmOptimization parseAsmOptimization(int optimization)
|
||||||
|
{
|
||||||
|
AsmOptimization asmOptimization = AsmOptimization::ASM_AUTODETECT;
|
||||||
|
|
||||||
|
switch (optimization) {
|
||||||
|
case -1:
|
||||||
|
asmOptimization = AsmOptimization::ASM_AUTODETECT;
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
asmOptimization = AsmOptimization::ASM_NONE;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
asmOptimization = AsmOptimization::ASM_INTEL;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
asmOptimization = AsmOptimization::ASM_RYZEN;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return asmOptimization;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline AsmOptimization parseAsmOptimization(const std::string optimization)
|
||||||
|
{
|
||||||
|
AsmOptimization asmOptimization = AsmOptimization::ASM_AUTODETECT;
|
||||||
|
|
||||||
|
if (optimization == "0" || optimization == "none" || optimization == "off") {
|
||||||
|
asmOptimization = AsmOptimization::ASM_NONE;
|
||||||
|
} else if (optimization == "1" || optimization == "intel") {
|
||||||
|
asmOptimization = AsmOptimization::ASM_INTEL;
|
||||||
|
} else if (optimization == "2" || optimization == "ryzen") {
|
||||||
|
asmOptimization = AsmOptimization::ASM_RYZEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
return asmOptimization;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* __ASM_OPTIMIZATION_H__ */
|
|
@ -48,6 +48,7 @@ CpuImpl::CpuImpl()
|
||||||
, m_sockets(1)
|
, m_sockets(1)
|
||||||
, m_totalCores(0)
|
, m_totalCores(0)
|
||||||
, m_totalThreads(0)
|
, m_totalThreads(0)
|
||||||
|
, m_asmOptimization(AsmOptimization::ASM_NONE)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,7 +87,7 @@ void CpuImpl::optimizeParameters(size_t& threadsCount, size_t& hashFactor,
|
||||||
if (threadsCount > maximumReasonableThreadCount) {
|
if (threadsCount > maximumReasonableThreadCount) {
|
||||||
threadsCount = maximumReasonableThreadCount;
|
threadsCount = maximumReasonableThreadCount;
|
||||||
}
|
}
|
||||||
if (hashFactor > maximumReasonableFactor / threadsCount) {
|
if (threadsCount > 0 && hashFactor > maximumReasonableFactor / threadsCount) {
|
||||||
hashFactor = std::min(maximumReasonableFactor / threadsCount, maximumReasonableHashFactor);
|
hashFactor = std::min(maximumReasonableFactor / threadsCount, maximumReasonableHashFactor);
|
||||||
hashFactor = std::max(hashFactor, static_cast<size_t>(1));
|
hashFactor = std::max(hashFactor, static_cast<size_t>(1));
|
||||||
}
|
}
|
||||||
|
@ -106,6 +107,7 @@ void CpuImpl::optimizeParameters(size_t& threadsCount, size_t& hashFactor,
|
||||||
}
|
}
|
||||||
threadsCount = std::max(threadsCount, static_cast<size_t>(1));
|
threadsCount = std::max(threadsCount, static_cast<size_t>(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hashFactor == 0) {
|
if (hashFactor == 0) {
|
||||||
hashFactor = std::min(maximumReasonableHashFactor, maximumReasonableFactor / threadsCount);
|
hashFactor = std::min(maximumReasonableHashFactor, maximumReasonableFactor / threadsCount);
|
||||||
hashFactor = std::max(hashFactor, static_cast<size_t>(1));
|
hashFactor = std::max(hashFactor, static_cast<size_t>(1));
|
||||||
|
@ -215,3 +217,8 @@ int Cpu::getAssignedCpuId(size_t threadId, int64_t affinityMask)
|
||||||
|
|
||||||
return cpuId;
|
return cpuId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AsmOptimization Cpu::asmOptimization()
|
||||||
|
{
|
||||||
|
return CpuImpl::instance().asmOptimization();
|
||||||
|
}
|
||||||
|
|
|
@ -54,6 +54,7 @@ public:
|
||||||
static size_t threads();
|
static size_t threads();
|
||||||
static size_t availableCache();
|
static size_t availableCache();
|
||||||
static int getAssignedCpuId(size_t threadId, int64_t affinityMask);
|
static int getAssignedCpuId(size_t threadId, int64_t affinityMask);
|
||||||
|
static AsmOptimization asmOptimization();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,7 @@ public:
|
||||||
size_t sockets() { return m_sockets; }
|
size_t sockets() { return m_sockets; }
|
||||||
size_t threads() { return m_totalThreads; }
|
size_t threads() { return m_totalThreads; }
|
||||||
size_t availableCache();
|
size_t availableCache();
|
||||||
|
AsmOptimization asmOptimization() { return m_asmOptimization; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void initCommon();
|
void initCommon();
|
||||||
|
@ -63,6 +64,7 @@ private:
|
||||||
size_t m_sockets;
|
size_t m_sockets;
|
||||||
size_t m_totalCores;
|
size_t m_totalCores;
|
||||||
size_t m_totalThreads;
|
size_t m_totalThreads;
|
||||||
|
AsmOptimization m_asmOptimization;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* __CPU_IMPL_H__ */
|
#endif /* __CPU_IMPL_H__ */
|
||||||
|
|
|
@ -80,4 +80,15 @@ void CpuImpl::initCommon()
|
||||||
if (data.flags[CPU_FEATURE_BMI2]) {
|
if (data.flags[CPU_FEATURE_BMI2]) {
|
||||||
m_flags |= Cpu::BMI2;
|
m_flags |= Cpu::BMI2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ifndef XMRIG_NO_ASM
|
||||||
|
if (data.vendor == VENDOR_AMD && data.ext_family >= 0x17) {
|
||||||
|
m_asmOptimization = AsmOptimization::ASM_RYZEN;
|
||||||
|
} else if (data.vendor == VENDOR_INTEL &&
|
||||||
|
((data.ext_family >= 0x06 && data.ext_model > 0x2) ||
|
||||||
|
(data.ext_family >= 0x06 && data.ext_model == 0x2 && data.model >= 0xA))) {
|
||||||
|
m_asmOptimization = AsmOptimization::ASM_INTEL;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,8 +73,9 @@ Options:\n"
|
||||||
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
|
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
|
||||||
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
|
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
|
||||||
-R, --retry-pause=N time to pause between retries (default: 5)\n\
|
-R, --retry-pause=N time to pause between retries (default: 5)\n\
|
||||||
--pow-variant=V specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), ipbc (tube), alloy, xtl (including autodetect for v5)\n\
|
--pow-variant=V specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka cnv7), 2(v2, aka cnv8), ipbc (tube), alloy, xtl (including autodetect for v5)\n\
|
||||||
for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations\n\
|
for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations\n\
|
||||||
|
--asm-optimization=V specificy the ASM optimization to use: -> 'auto' (default), 'intel', 'ryzen', 'none' \n\
|
||||||
--multihash-factor=N number of hash blocks to process at a time (don't set or 0 enables automatic selection of optimal number of hash blocks)\n\
|
--multihash-factor=N number of hash blocks to process at a time (don't set or 0 enables automatic selection of optimal number of hash blocks)\n\
|
||||||
--multihash-thread-mask=MASK limits multihash to given threads (mask), (default: all threads)\n\
|
--multihash-thread-mask=MASK limits multihash to given threads (mask), (default: all threads)\n\
|
||||||
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
|
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
|
||||||
|
@ -166,7 +167,7 @@ static struct option const options[] = {
|
||||||
{ "userpass", 1, nullptr, 'O' },
|
{ "userpass", 1, nullptr, 'O' },
|
||||||
{ "version", 0, nullptr, 'V' },
|
{ "version", 0, nullptr, 'V' },
|
||||||
{ "use-tls", 0, nullptr, 1015 },
|
{ "use-tls", 0, nullptr, 1015 },
|
||||||
{ "force-pow-version",1, nullptr, 1016 },
|
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
||||||
{ "pow-variant" ,1, nullptr, 1017 },
|
{ "pow-variant" ,1, nullptr, 1017 },
|
||||||
{ "api-port", 1, nullptr, 4000 },
|
{ "api-port", 1, nullptr, 4000 },
|
||||||
{ "api-access-token", 1, nullptr, 4001 },
|
{ "api-access-token", 1, nullptr, 4001 },
|
||||||
|
@ -189,6 +190,7 @@ static struct option const options[] = {
|
||||||
{ "daemonized", 0, nullptr, 4011 },
|
{ "daemonized", 0, nullptr, 4011 },
|
||||||
{ "doublehash-thread-mask", 1, nullptr, 4013 },
|
{ "doublehash-thread-mask", 1, nullptr, 4013 },
|
||||||
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
||||||
|
{ "asm-optimization", 1, nullptr, 4020 },
|
||||||
{ nullptr, 0, nullptr, 0 }
|
{ nullptr, 0, nullptr, 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -217,6 +219,7 @@ static struct option const config_options[] = {
|
||||||
{ "pow-variant", 1, nullptr, 1017 },
|
{ "pow-variant", 1, nullptr, 1017 },
|
||||||
{ "doublehash-thread-mask", 1, nullptr, 4013 },
|
{ "doublehash-thread-mask", 1, nullptr, 4013 },
|
||||||
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
||||||
|
{ "asm-optimization", 1, nullptr, 4020 },
|
||||||
{ nullptr, 0, nullptr, 0 }
|
{ nullptr, 0, nullptr, 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -282,6 +285,7 @@ constexpr static const char *pow_variant_names[] = {
|
||||||
"auto",
|
"auto",
|
||||||
"0",
|
"0",
|
||||||
"1",
|
"1",
|
||||||
|
"2",
|
||||||
"tube",
|
"tube",
|
||||||
"alloy",
|
"alloy",
|
||||||
"xtl",
|
"xtl",
|
||||||
|
@ -290,6 +294,13 @@ constexpr static const char *pow_variant_names[] = {
|
||||||
"rto"
|
"rto"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
constexpr static const char *asm_optimization_names[] = {
|
||||||
|
"auto",
|
||||||
|
"intel",
|
||||||
|
"ryzen",
|
||||||
|
"none"
|
||||||
|
};
|
||||||
|
|
||||||
Options *Options::parse(int argc, char **argv)
|
Options *Options::parse(int argc, char **argv)
|
||||||
{
|
{
|
||||||
auto options = new Options(argc, argv);
|
auto options = new Options(argc, argv);
|
||||||
|
@ -342,6 +353,7 @@ Options::Options(int argc, char **argv) :
|
||||||
m_algoVariant(AV0_AUTO),
|
m_algoVariant(AV0_AUTO),
|
||||||
m_aesni(AESNI_AUTO),
|
m_aesni(AESNI_AUTO),
|
||||||
m_powVariant(POW_AUTODETECT),
|
m_powVariant(POW_AUTODETECT),
|
||||||
|
m_asmOptimization(ASM_AUTODETECT),
|
||||||
m_hashFactor(0),
|
m_hashFactor(0),
|
||||||
m_apiPort(0),
|
m_apiPort(0),
|
||||||
m_donateLevel(kDonateLevel),
|
m_donateLevel(kDonateLevel),
|
||||||
|
@ -400,6 +412,10 @@ Options::Options(int argc, char **argv) :
|
||||||
|
|
||||||
optimizeAlgorithmConfiguration();
|
optimizeAlgorithmConfiguration();
|
||||||
|
|
||||||
|
if (m_asmOptimization == AsmOptimization::ASM_AUTODETECT) {
|
||||||
|
m_asmOptimization = Cpu::asmOptimization();
|
||||||
|
}
|
||||||
|
|
||||||
for (Url *url : m_pools) {
|
for (Url *url : m_pools) {
|
||||||
url->applyExceptions();
|
url->applyExceptions();
|
||||||
}
|
}
|
||||||
|
@ -588,6 +604,9 @@ bool Options::parseArg(int key, const char *arg)
|
||||||
case 4019: /* --cc-upload-config-on-startup */
|
case 4019: /* --cc-upload-config-on-startup */
|
||||||
return parseBoolean(key, true);
|
return parseBoolean(key, true);
|
||||||
|
|
||||||
|
case 4020: /* --asm-optimization */
|
||||||
|
return parseAsmOptimization(arg);
|
||||||
|
|
||||||
case 't': /* --threads */
|
case 't': /* --threads */
|
||||||
if (strncmp(arg, "all", 3) == 0) {
|
if (strncmp(arg, "all", 3) == 0) {
|
||||||
m_threads = Cpu::threads();
|
m_threads = Cpu::threads();
|
||||||
|
@ -1015,11 +1034,16 @@ bool Options::parsePowVariant(const char *powVariant)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "monerov7") || !strcmp(powVariant, "aeonv7") || !strcmp(powVariant, "v7"))) {
|
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "cnv1") || !strcmp(powVariant, "monerov7") || !strcmp(powVariant, "aeonv7") || !strcmp(powVariant, "v7"))) {
|
||||||
m_powVariant = POW_V1;
|
m_powVariant = POW_V1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "cnv2") || !strcmp(powVariant, "monerov8") || !strcmp(powVariant, "aeonv8") || !strcmp(powVariant, "v8"))) {
|
||||||
|
m_powVariant = POW_V2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && !strcmp(powVariant, "stellite")) {
|
if (i == ARRAY_SIZE(pow_variant_names) - 1 && !strcmp(powVariant, "stellite")) {
|
||||||
m_powVariant = POW_XTL;
|
m_powVariant = POW_XTL;
|
||||||
break;
|
break;
|
||||||
|
@ -1049,6 +1073,25 @@ bool Options::parsePowVariant(const char *powVariant)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Options::parseAsmOptimization(const char *asmOptimization)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < ARRAY_SIZE(pow_variant_names); i++) {
|
||||||
|
if (pow_variant_names[i] && !strcmp(asmOptimization, asm_optimization_names[i])) {
|
||||||
|
m_asmOptimization = static_cast<AsmOptimization>(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == ARRAY_SIZE(asm_optimization_names) - 1) {
|
||||||
|
showUsage(1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void Options::optimizeAlgorithmConfiguration()
|
void Options::optimizeAlgorithmConfiguration()
|
||||||
{
|
{
|
||||||
// backwards compatibility for configs still setting algo variant (av)
|
// backwards compatibility for configs still setting algo variant (av)
|
||||||
|
@ -1123,5 +1166,3 @@ bool Options::parseCCUrl(const char* url)
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
|
|
||||||
#include "rapidjson/fwd.h"
|
#include "rapidjson/fwd.h"
|
||||||
#include "PowVariant.h"
|
#include "PowVariant.h"
|
||||||
|
#include "AsmOptimization.h"
|
||||||
|
|
||||||
class Url;
|
class Url;
|
||||||
struct option;
|
struct option;
|
||||||
|
@ -91,6 +92,7 @@ public:
|
||||||
inline const std::vector<Url*> &pools() const { return m_pools; }
|
inline const std::vector<Url*> &pools() const { return m_pools; }
|
||||||
inline Algo algo() const { return m_algo; }
|
inline Algo algo() const { return m_algo; }
|
||||||
inline PowVariant powVariant() const { return m_powVariant; }
|
inline PowVariant powVariant() const { return m_powVariant; }
|
||||||
|
inline AsmOptimization asmOptimization() const { return m_asmOptimization; }
|
||||||
inline bool aesni() const { return m_aesni == AESNI_ON; }
|
inline bool aesni() const { return m_aesni == AESNI_ON; }
|
||||||
inline size_t hashFactor() const { return m_hashFactor; }
|
inline size_t hashFactor() const { return m_hashFactor; }
|
||||||
inline int apiPort() const { return m_apiPort; }
|
inline int apiPort() const { return m_apiPort; }
|
||||||
|
@ -136,6 +138,7 @@ private:
|
||||||
|
|
||||||
bool setAlgo(const char *algo);
|
bool setAlgo(const char *algo);
|
||||||
bool parsePowVariant(const char *powVariant);
|
bool parsePowVariant(const char *powVariant);
|
||||||
|
bool parseAsmOptimization(const char *arg);
|
||||||
|
|
||||||
void optimizeAlgorithmConfiguration();
|
void optimizeAlgorithmConfiguration();
|
||||||
|
|
||||||
|
@ -167,6 +170,7 @@ private:
|
||||||
AlgoVariant m_algoVariant;
|
AlgoVariant m_algoVariant;
|
||||||
AesNi m_aesni;
|
AesNi m_aesni;
|
||||||
PowVariant m_powVariant;
|
PowVariant m_powVariant;
|
||||||
|
AsmOptimization m_asmOptimization;
|
||||||
size_t m_hashFactor;
|
size_t m_hashFactor;
|
||||||
int m_apiPort;
|
int m_apiPort;
|
||||||
int m_donateLevel;
|
int m_donateLevel;
|
||||||
|
|
|
@ -27,6 +27,7 @@ enum PowVariant
|
||||||
POW_AUTODETECT,
|
POW_AUTODETECT,
|
||||||
POW_V0,
|
POW_V0,
|
||||||
POW_V1,
|
POW_V1,
|
||||||
|
POW_V2,
|
||||||
POW_TUBE,
|
POW_TUBE,
|
||||||
POW_ALLOY,
|
POW_ALLOY,
|
||||||
POW_XTL,
|
POW_XTL,
|
||||||
|
@ -44,6 +45,8 @@ inline std::string getPowVariantName(PowVariant powVariant)
|
||||||
return "0";
|
return "0";
|
||||||
case POW_V1:
|
case POW_V1:
|
||||||
return "1";
|
return "1";
|
||||||
|
case POW_V2:
|
||||||
|
return "2";
|
||||||
case POW_TUBE:
|
case POW_TUBE:
|
||||||
return "tube";
|
return "tube";
|
||||||
case POW_ALLOY:
|
case POW_ALLOY:
|
||||||
|
@ -88,6 +91,9 @@ inline PowVariant parseVariant(int variant)
|
||||||
case 1:
|
case 1:
|
||||||
powVariant = PowVariant::POW_V1;
|
powVariant = PowVariant::POW_V1;
|
||||||
break;
|
break;
|
||||||
|
case 2:
|
||||||
|
powVariant = PowVariant::POW_V2;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -104,6 +110,8 @@ inline PowVariant parseVariant(const std::string variant)
|
||||||
powVariant = PowVariant::POW_V0;
|
powVariant = PowVariant::POW_V0;
|
||||||
} else if (variant == "1") {
|
} else if (variant == "1") {
|
||||||
powVariant = PowVariant::POW_V1;
|
powVariant = PowVariant::POW_V1;
|
||||||
|
} else if (variant == "2") {
|
||||||
|
powVariant = PowVariant::POW_V2;
|
||||||
} else if (variant == "ipbc" || variant == "tube" || variant == "bittube") {
|
} else if (variant == "ipbc" || variant == "tube" || variant == "bittube") {
|
||||||
powVariant = PowVariant::POW_TUBE;
|
powVariant = PowVariant::POW_TUBE;
|
||||||
} else if (variant == "xao" || variant == "alloy") {
|
} else if (variant == "xao" || variant == "alloy") {
|
||||||
|
|
|
@ -59,17 +59,21 @@ static void print_versions()
|
||||||
static void print_cpu()
|
static void print_cpu()
|
||||||
{
|
{
|
||||||
if (Options::i()->colors()) {
|
if (Options::i()->colors()) {
|
||||||
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU: %s (%d) %sx64 %sAES-NI",
|
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU: %s (%d) %sx64 %sAES-NI %sASM-%s",
|
||||||
Cpu::brand(),
|
Cpu::brand(),
|
||||||
Cpu::sockets(),
|
Cpu::sockets(),
|
||||||
Cpu::isX64() ? "\x1B[01;32m" : "\x1B[01;31m-",
|
Cpu::isX64() ? "\x1B[01;32m" : "\x1B[01;31m-",
|
||||||
Cpu::hasAES() ? "\x1B[01;32m" : "\x1B[01;31m-");
|
Cpu::hasAES() ? "\x1B[01;32m" : "\x1B[01;31m-",
|
||||||
|
Options::i()->asmOptimization() != AsmOptimization::ASM_NONE ? "\x1B[01;32m" : "\x1B[01;31m",
|
||||||
|
getAsmOptimizationName(Options::i()->asmOptimization()).c_str());
|
||||||
# ifndef XMRIG_NO_LIBCPUID
|
# ifndef XMRIG_NO_LIBCPUID
|
||||||
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
|
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Log::i()->text(" * CPU: %s (%d) %sx64 %sAES-NI", Cpu::brand(), Cpu::sockets(), Cpu::isX64() ? "" : "-", Cpu::hasAES() ? "" : "-");
|
Log::i()->text(" * CPU: %s (%d) %sx64 %sAES-NI ASM-%s",
|
||||||
|
Cpu::brand(), Cpu::sockets(), Cpu::isX64() ? "" : "-", Cpu::hasAES() ? "" : "-",
|
||||||
|
getAsmOptimizationName(Options::i()->asmOptimization()).c_str());
|
||||||
# ifndef XMRIG_NO_LIBCPUID
|
# ifndef XMRIG_NO_LIBCPUID
|
||||||
Log::i()->text(" * CPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
|
Log::i()->text(" * CPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
|
||||||
# endif
|
# endif
|
||||||
|
|
|
@ -4,8 +4,9 @@
|
||||||
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
||||||
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
||||||
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
||||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), tube (ipbc), alloy, xtl (including autodetect for v5), msr, xhv, rto
|
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy, xtl (including autodetect for v5), msr, xhv, rto
|
||||||
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
||||||
|
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, none
|
||||||
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
||||||
"colors": true, // false to disable colored output
|
"colors": true, // false to disable colored output
|
||||||
"cpu-affinity": null, // set process affinity to CPU core(s), mask "0x3" for cores 0 and 1
|
"cpu-affinity": null, // set process affinity to CPU core(s), mask "0x3" for cores 0 and 1
|
||||||
|
|
|
@ -34,10 +34,28 @@
|
||||||
#include "crypto/CryptoNight_test.h"
|
#include "crypto/CryptoNight_test.h"
|
||||||
|
|
||||||
template <size_t NUM_HASH_BLOCKS>
|
template <size_t NUM_HASH_BLOCKS>
|
||||||
static void cryptonight_aesni(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||||
# if !defined(XMRIG_ARMv7)
|
# if !defined(XMRIG_ARMv7)
|
||||||
if (powVersion == PowVariant::POW_V1) {
|
if (powVersion == PowVariant::POW_V1) {
|
||||||
|
#if defined(XMRIG_ARM)
|
||||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
#else
|
||||||
|
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||||
|
} else {
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} else if (powVersion == PowVariant::POW_V2) {
|
||||||
|
#if defined(XMRIG_ARM)
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||||
|
#else
|
||||||
|
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) || (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1)) {
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||||
|
} else {
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} else if (powVersion == PowVariant::POW_ALLOY) {
|
} else if (powVersion == PowVariant::POW_ALLOY) {
|
||||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||||
} else if (powVersion == PowVariant::POW_XTL) {
|
} else if (powVersion == PowVariant::POW_XTL) {
|
||||||
|
@ -53,9 +71,27 @@ static void cryptonight_aesni(PowVariant powVersion, const uint8_t* input, size_
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t NUM_HASH_BLOCKS>
|
template <size_t NUM_HASH_BLOCKS>
|
||||||
static void cryptonight_softaes(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||||
if (powVersion == PowVariant::POW_V1) {
|
if (powVersion == PowVariant::POW_V1) {
|
||||||
|
#if defined(XMRIG_ARM)
|
||||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
#else
|
||||||
|
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||||
|
} else {
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} else if (powVersion == PowVariant::POW_V2) {
|
||||||
|
#if defined(XMRIG_ARM)
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||||
|
#else
|
||||||
|
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||||
|
} else {
|
||||||
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} else if (powVersion == PowVariant::POW_ALLOY) {
|
} else if (powVersion == PowVariant::POW_ALLOY) {
|
||||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||||
} else if (powVersion == PowVariant::POW_XTL) {
|
} else if (powVersion == PowVariant::POW_XTL) {
|
||||||
|
@ -70,7 +106,7 @@ static void cryptonight_softaes(PowVariant powVersion, const uint8_t* input, siz
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t NUM_HASH_BLOCKS>
|
template <size_t NUM_HASH_BLOCKS>
|
||||||
static void cryptonight_lite_aesni(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||||
# if !defined(XMRIG_ARMv7)
|
# if !defined(XMRIG_ARMv7)
|
||||||
if (powVersion == PowVariant::POW_V1) {
|
if (powVersion == PowVariant::POW_V1) {
|
||||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
@ -83,7 +119,7 @@ static void cryptonight_lite_aesni(PowVariant powVersion, const uint8_t* input,
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t NUM_HASH_BLOCKS>
|
template <size_t NUM_HASH_BLOCKS>
|
||||||
static void cryptonight_lite_softaes(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
static void cryptonight_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||||
if (powVersion == PowVariant::POW_V1) {
|
if (powVersion == PowVariant::POW_V1) {
|
||||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
} else if (powVersion == PowVariant::POW_TUBE) {
|
} else if (powVersion == PowVariant::POW_TUBE) {
|
||||||
|
@ -94,7 +130,7 @@ static void cryptonight_lite_softaes(PowVariant powVersion, const uint8_t* input
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t NUM_HASH_BLOCKS>
|
template <size_t NUM_HASH_BLOCKS>
|
||||||
static void cryptonight_heavy_aesni(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||||
# if !defined(XMRIG_ARMv7)
|
# if !defined(XMRIG_ARMv7)
|
||||||
if (powVersion == PowVariant::POW_XHV) {
|
if (powVersion == PowVariant::POW_XHV) {
|
||||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
||||||
|
@ -109,7 +145,7 @@ static void cryptonight_heavy_aesni(PowVariant powVersion, const uint8_t* input,
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t NUM_HASH_BLOCKS>
|
template <size_t NUM_HASH_BLOCKS>
|
||||||
static void cryptonight_heavy_softaes(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
static void cryptonight_heavy_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||||
if (powVersion == PowVariant::POW_XHV) {
|
if (powVersion == PowVariant::POW_XHV) {
|
||||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
||||||
}
|
}
|
||||||
|
@ -121,7 +157,7 @@ static void cryptonight_heavy_softaes(PowVariant powVersion, const uint8_t* inpu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
|
void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
|
||||||
|
|
||||||
template <size_t HASH_FACTOR>
|
template <size_t HASH_FACTOR>
|
||||||
void setCryptoNightHashMethods(Options::Algo algo, bool aesni)
|
void setCryptoNightHashMethods(Options::Algo algo, bool aesni)
|
||||||
|
@ -163,13 +199,19 @@ void setCryptoNightHashMethods<0>(Options::Algo algo, bool aesni)
|
||||||
|
|
||||||
bool CryptoNight::init(int algo, bool aesni)
|
bool CryptoNight::init(int algo, bool aesni)
|
||||||
{
|
{
|
||||||
|
for (int i = 0; i < 256; ++i)
|
||||||
|
{
|
||||||
|
const uint64_t index = (((i >> 3) & 6) | (i & 1)) << 1;
|
||||||
|
variant1_table[i] = i ^ ((0x75310 >> index) & 0x30);
|
||||||
|
}
|
||||||
|
|
||||||
setCryptoNightHashMethods<MAX_NUM_HASH_BLOCKS>(static_cast<Options::Algo>(algo), aesni);
|
setCryptoNightHashMethods<MAX_NUM_HASH_BLOCKS>(static_cast<Options::Algo>(algo), aesni);
|
||||||
return selfTest(algo);
|
return selfTest(algo);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CryptoNight::hash(size_t factor, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
|
void CryptoNight::hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
|
||||||
{
|
{
|
||||||
cryptonight_hash_ctx[factor-1](powVersion, input, size, output, scratchPad);
|
cryptonight_hash_ctx[factor-1](asmOptimization, powVersion, input, size, output, scratchPad);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CryptoNight::selfTest(int algo)
|
bool CryptoNight::selfTest(int algo)
|
||||||
|
@ -206,203 +248,231 @@ bool CryptoNight::selfTest(int algo)
|
||||||
bool resultLite = true;
|
bool resultLite = true;
|
||||||
bool resultHeavy = true;
|
bool resultHeavy = true;
|
||||||
|
|
||||||
|
AsmOptimization asmOptimization = Options::i()->asmOptimization();
|
||||||
|
|
||||||
if (algo == Options::ALGO_CRYPTONIGHT_HEAVY) {
|
if (algo == Options::ALGO_CRYPTONIGHT_HEAVY) {
|
||||||
// cn-heavy
|
// cn-heavy
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 32) == 0;
|
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 32) == 0;
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 1
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
cryptonight_hash_ctx[1](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 64) == 0;
|
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 64) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 2
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
cryptonight_hash_ctx[2](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 96) == 0;
|
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 96) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// cn-heavy haven
|
// cn-heavy haven
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 32) == 0;
|
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 32) == 0;
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 1
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
cryptonight_hash_ctx[1](PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 64) == 0;
|
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 64) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 2
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
cryptonight_hash_ctx[2](PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 96) == 0;
|
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 96) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// cn-heavy bittube
|
// cn-heavy bittube
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 32) == 0;
|
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 32) == 0;
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 1
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
cryptonight_hash_ctx[1](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 64) == 0;
|
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 64) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 2
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
cryptonight_hash_ctx[2](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 96) == 0;
|
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 96) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} else if (algo == Options::ALGO_CRYPTONIGHT_LITE) {
|
} else if (algo == Options::ALGO_CRYPTONIGHT_LITE) {
|
||||||
// cn-lite v0
|
// cn-lite v0
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 32) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v0_lite, 32) == 0;
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 1
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
cryptonight_hash_ctx[1](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 64) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v0_lite, 64) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 2
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
cryptonight_hash_ctx[2](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 96) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v0_lite, 96) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 3
|
#if MAX_NUM_HASH_BLOCKS > 3
|
||||||
cryptonight_hash_ctx[3](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 128) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v0_lite, 128) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 4
|
#if MAX_NUM_HASH_BLOCKS > 4
|
||||||
cryptonight_hash_ctx[4](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 160) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v0_lite, 160) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// cn-lite v7 tests
|
// cn-lite v7 tests
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 32) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v1_lite, 32) == 0;
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 1
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
cryptonight_hash_ctx[1](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 64) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v1_lite, 64) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 2
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
cryptonight_hash_ctx[2](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 96) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v1_lite, 96) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 3
|
#if MAX_NUM_HASH_BLOCKS > 3
|
||||||
cryptonight_hash_ctx[3](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 128) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v1_lite, 128) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 4
|
#if MAX_NUM_HASH_BLOCKS > 4
|
||||||
cryptonight_hash_ctx[4](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 160) == 0;
|
resultLite = resultLite && memcmp(output, test_output_v1_lite, 160) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// cn-lite ibpc tests
|
// cn-lite ibpc tests
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 32) == 0;
|
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 32) == 0;
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 1
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
cryptonight_hash_ctx[1](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 64) == 0;
|
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 64) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 2
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
cryptonight_hash_ctx[2](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 96) == 0;
|
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 96) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 3
|
#if MAX_NUM_HASH_BLOCKS > 3
|
||||||
cryptonight_hash_ctx[3](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 128) == 0;
|
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 128) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 4
|
#if MAX_NUM_HASH_BLOCKS > 4
|
||||||
cryptonight_hash_ctx[4](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 160) == 0;
|
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 160) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// cn v0
|
// cn v0 aka orignal
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](PowVariant::POW_V0,test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0,test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v0, 32) == 0;
|
result = result && memcmp(output, test_output_v0, 32) == 0;
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 1
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
cryptonight_hash_ctx[1](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v0, 64) == 0;
|
result = result && memcmp(output, test_output_v0, 64) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 2
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
cryptonight_hash_ctx[2](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v0, 96) == 0;
|
result = result && memcmp(output, test_output_v0, 96) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 3
|
#if MAX_NUM_HASH_BLOCKS > 3
|
||||||
cryptonight_hash_ctx[3](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v0, 128) == 0;
|
result = result && memcmp(output, test_output_v0, 128) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 4
|
#if MAX_NUM_HASH_BLOCKS > 4
|
||||||
cryptonight_hash_ctx[4](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v0, 160) == 0;
|
result = result && memcmp(output, test_output_v0, 160) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// cn v7
|
// cn v7 aka cnv1
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v1, 32) == 0;
|
result = result && memcmp(output, test_output_v1, 32) == 0;
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 1
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
cryptonight_hash_ctx[1](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v1, 64) == 0;
|
result = result && memcmp(output, test_output_v1, 64) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 2
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
cryptonight_hash_ctx[2](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v1, 96) == 0;
|
result = result && memcmp(output, test_output_v1, 96) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 3
|
#if MAX_NUM_HASH_BLOCKS > 3
|
||||||
cryptonight_hash_ctx[3](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v1, 128) == 0;
|
result = result && memcmp(output, test_output_v1, 128) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 4
|
#if MAX_NUM_HASH_BLOCKS > 4
|
||||||
cryptonight_hash_ctx[4](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_v1, 160) == 0;
|
result = result && memcmp(output, test_output_v1, 160) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// cn xtl
|
// cn v7 + xtl
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](PowVariant::POW_XTL,test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XTL,test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_xtl, 32) == 0;
|
result = result && memcmp(output, test_output_xtl, 32) == 0;
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 1
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
cryptonight_hash_ctx[1](PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_xtl, 64) == 0;
|
result = result && memcmp(output, test_output_xtl, 64) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 2
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
cryptonight_hash_ctx[2](PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_xtl, 96) == 0;
|
result = result && memcmp(output, test_output_xtl, 96) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 3
|
#if MAX_NUM_HASH_BLOCKS > 3
|
||||||
cryptonight_hash_ctx[3](PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_xtl, 128) == 0;
|
result = result && memcmp(output, test_output_xtl, 128) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MAX_NUM_HASH_BLOCKS > 4
|
#if MAX_NUM_HASH_BLOCKS > 4
|
||||||
cryptonight_hash_ctx[4](PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_xtl, 160) == 0;
|
result = result && memcmp(output, test_output_xtl, 160) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// cn v8 aka cnv2
|
||||||
|
|
||||||
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||||
|
result = result && memcmp(output, test_output_v2, 32) == 0;
|
||||||
|
|
||||||
|
#if MAX_NUM_HASH_BLOCKS > 1
|
||||||
|
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||||
|
result = result && memcmp(output, test_output_v2, 64) == 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if MAX_NUM_HASH_BLOCKS > 2
|
||||||
|
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||||
|
result = result && memcmp(output, test_output_v2, 96) == 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if MAX_NUM_HASH_BLOCKS > 3
|
||||||
|
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||||
|
result = result && memcmp(output, test_output_v2, 128) == 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if MAX_NUM_HASH_BLOCKS > 4
|
||||||
|
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||||
|
result = result && memcmp(output, test_output_v2, 160) == 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < MAX_NUM_HASH_BLOCKS; ++i) {
|
for (size_t i = 0; i < MAX_NUM_HASH_BLOCKS; ++i) {
|
||||||
|
|
|
@ -25,9 +25,10 @@
|
||||||
#define __CRYPTONIGHT_H__
|
#define __CRYPTONIGHT_H__
|
||||||
|
|
||||||
|
|
||||||
#include <cstddef>
|
#include <stddef.h>
|
||||||
#include <cstdint>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "AsmOptimization.h"
|
||||||
#include "Options.h"
|
#include "Options.h"
|
||||||
|
|
||||||
#define MEMORY 2097152 /* 2 MiB */
|
#define MEMORY 2097152 /* 2 MiB */
|
||||||
|
@ -38,10 +39,17 @@
|
||||||
#define POW_XLT_V4_INDEX_SHIFT 4
|
#define POW_XLT_V4_INDEX_SHIFT 4
|
||||||
|
|
||||||
struct ScratchPad {
|
struct ScratchPad {
|
||||||
alignas(16) uint8_t state[208]; // 208 instead of 200 to maintain aligned to 16 byte boundaries
|
alignas(16) uint8_t state[224]; // 224 instead of 200 to maintain aligned to 16 byte boundaries
|
||||||
alignas(16) uint8_t* memory;
|
alignas(16) uint8_t* memory;
|
||||||
|
|
||||||
|
// Additional stuff for asm impl
|
||||||
|
uint8_t ctx_info[24];
|
||||||
|
const void* input;
|
||||||
|
uint8_t* variant1_table;
|
||||||
|
const uint32_t* t_fn;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
alignas(64) static uint8_t variant1_table[256];
|
||||||
|
|
||||||
class Job;
|
class Job;
|
||||||
class JobResult;
|
class JobResult;
|
||||||
|
@ -50,8 +58,9 @@ class CryptoNight
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static bool init(int algo, bool aesni);
|
static bool init(int algo, bool aesni);
|
||||||
|
static void hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
|
||||||
|
|
||||||
static void hash(size_t factor, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
|
public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool selfTest(int algo);
|
static bool selfTest(int algo);
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -84,6 +84,20 @@ const static uint8_t test_output_v1[160] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// CN V8
|
||||||
|
const static uint8_t test_output_v2[160] = {
|
||||||
|
0x97, 0x37, 0x82, 0x82, 0xcf, 0x10, 0xe7, 0xad, 0x03, 0x3f, 0x7b, 0x80, 0x74, 0xc4, 0x0e, 0x14,
|
||||||
|
0xd0, 0x6e, 0x7f, 0x60, 0x9d, 0xdd, 0xda, 0x78, 0x76, 0x80, 0xb5, 0x8c, 0x05, 0xf4, 0x3d, 0x21,
|
||||||
|
0x87, 0x1f, 0xcd, 0x68, 0x23, 0xf6, 0xa8, 0x79, 0xbb, 0x3f, 0x33, 0x95, 0x1c, 0x8e, 0x8e, 0x89,
|
||||||
|
0x1d, 0x40, 0x43, 0x88, 0x0b, 0x02, 0xdf, 0xa1, 0xbb, 0x3b, 0xe4, 0x98, 0xb5, 0x0e, 0x75, 0x78,
|
||||||
|
0xe6, 0x0d, 0x24, 0x0f, 0x65, 0x85, 0x60, 0x3a, 0x4a, 0xe5, 0x5f, 0x54, 0x9b, 0xc8, 0x79, 0x93,
|
||||||
|
0xeb, 0x3d, 0x98, 0x2c, 0xfe, 0x9b, 0xfb, 0x15, 0xb6, 0x88, 0x21, 0x94, 0xb0, 0x05, 0x86, 0x5c,
|
||||||
|
0x59, 0x8b, 0x93, 0x7a, 0xda, 0xd2, 0xa2, 0x14, 0xed, 0xb7, 0xc4, 0x5d, 0xa1, 0xef, 0x26, 0xf3,
|
||||||
|
0xc7, 0x73, 0x29, 0x4d, 0xf1, 0xc8, 0x2c, 0xe0, 0xd0, 0xe9, 0xed, 0x0c, 0x70, 0x75, 0x05, 0x3e,
|
||||||
|
0x5b, 0xf6, 0xa0, 0x6e, 0xea, 0xde, 0x87, 0x0b, 0x06, 0x29, 0x03, 0xbf, 0xb4, 0x85, 0x9d, 0x04,
|
||||||
|
0x75, 0x1a, 0xcd, 0x1e, 0xd6, 0xaa, 0x1b, 0x05, 0x24, 0x6a, 0x2c, 0x80, 0x69, 0x68, 0xdc, 0x97
|
||||||
|
};
|
||||||
|
|
||||||
// CN XTL
|
// CN XTL
|
||||||
const static uint8_t test_output_xtl[160] = {
|
const static uint8_t test_output_xtl[160] = {
|
||||||
0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3,
|
0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3,
|
||||||
|
|
File diff suppressed because it is too large
Load diff
88
src/crypto/asm/cn_main_loop.S
Normal file
88
src/crypto/asm/cn_main_loop.S
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
#define ALIGN .align
|
||||||
|
.intel_syntax noprefix
|
||||||
|
#ifdef __APPLE__
|
||||||
|
# define FN_PREFIX(fn) _ ## fn
|
||||||
|
.text
|
||||||
|
#else
|
||||||
|
# define FN_PREFIX(fn) fn
|
||||||
|
.section .text
|
||||||
|
#endif
|
||||||
|
.global FN_PREFIX(cnv1_mainloop_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||||
|
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||||
|
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||||
|
|
||||||
|
.global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cnv1_mainloop_sandybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cnv1_mainloop_sandybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cnv2_main_loop_ivybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cnv2_main_loop_ryzen.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
mov rdx, rsi
|
||||||
|
#include "cnv2_double_main_loop_sandybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cnv1_mainloop_soft_aes_sandybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cnv2_mainloop_soft_aes_sandybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
71
src/crypto/asm/cn_main_loop.asm
Normal file
71
src/crypto/asm/cn_main_loop.asm
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
_TEXT_CN_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||||
|
PUBLIC cnv1_mainloop_sandybridge_asm
|
||||||
|
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||||
|
PUBLIC cnv2_mainloop_ryzen_asm
|
||||||
|
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||||
|
|
||||||
|
PUBLIC cnv1_mainloop_soft_aes_sandybridge_asm
|
||||||
|
PUBLIC cnv2_mainloop_soft_aes_sandybridge_asm
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv1_mainloop_sandybridge_asm PROC
|
||||||
|
INCLUDE cnv1_mainloop_sandybridge.inc
|
||||||
|
ret 0
|
||||||
|
cnv1_mainloop_sandybridge_asm ENDP
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv2_mainloop_ivybridge_asm PROC
|
||||||
|
INCLUDE cnv2_main_loop_ivybridge.inc
|
||||||
|
ret 0
|
||||||
|
cnv2_mainloop_ivybridge_asm ENDP
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv2_mainloop_ryzen_asm PROC
|
||||||
|
INCLUDE cnv2_main_loop_ryzen.inc
|
||||||
|
ret 0
|
||||||
|
cnv2_mainloop_ryzen_asm ENDP
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv2_double_mainloop_sandybridge_asm PROC
|
||||||
|
INCLUDE cnv2_double_main_loop_sandybridge.inc
|
||||||
|
ret 0
|
||||||
|
cnv2_double_mainloop_sandybridge_asm ENDP
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv1_mainloop_soft_aes_sandybridge_asm PROC
|
||||||
|
INCLUDE cnv1_mainloop_soft_aes_sandybridge.inc
|
||||||
|
ret 0
|
||||||
|
cnv1_mainloop_soft_aes_sandybridge_asm ENDP
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv2_mainloop_soft_aes_sandybridge_asm PROC
|
||||||
|
INCLUDE cnv2_mainloop_soft_aes_sandybridge.inc
|
||||||
|
ret 0
|
||||||
|
cnv2_mainloop_soft_aes_sandybridge_asm ENDP
|
||||||
|
|
||||||
|
_TEXT_CN_MAINLOOP ENDS
|
||||||
|
END
|
42
src/crypto/asm/cn_main_loop_win_gcc.S
Normal file
42
src/crypto/asm/cn_main_loop_win_gcc.S
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
#define ALIGN .align
|
||||||
|
.intel_syntax noprefix
|
||||||
|
# define FN_PREFIX(fn) fn
|
||||||
|
.section .text
|
||||||
|
|
||||||
|
.global FN_PREFIX(cnv1_mainloop_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||||
|
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||||
|
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||||
|
|
||||||
|
.global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cnv1_mainloop_sandybridge_asm):
|
||||||
|
#include "cnv1_mainloop_sandybridge.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||||
|
#include "cnv2_main_loop_ivybridge.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||||
|
#include "cnv2_main_loop_ryzen.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||||
|
#include "cnv2_double_main_loop_sandybridge.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm):
|
||||||
|
#include "cnv1_mainloop_soft_aes_sandybridge.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm):
|
||||||
|
#include "cnv2_mainloop_soft_aes_sandybridge.inc"
|
||||||
|
ret 0
|
74
src/crypto/asm/cnv1_mainloop_sandybridge.inc
Normal file
74
src/crypto/asm/cnv1_mainloop_sandybridge.inc
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
mov QWORD PTR [rsp+8], rbx
|
||||||
|
mov QWORD PTR [rsp+16], rbp
|
||||||
|
mov QWORD PTR [rsp+24], rsi
|
||||||
|
mov QWORD PTR [rsp+32], rdi
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov ebp, 524288
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
movq xmm3, rax
|
||||||
|
mov rax, QWORD PTR [rcx+256]
|
||||||
|
mov rdi, QWORD PTR [rcx+40]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rdi, QWORD PTR [rcx+8]
|
||||||
|
mov rdx, r8
|
||||||
|
mov r15, QWORD PTR [rcx+264]
|
||||||
|
and edx, 2097136
|
||||||
|
mov r14, QWORD PTR [rax+35]
|
||||||
|
xor r14, QWORD PTR [rcx+192]
|
||||||
|
mov rsi, QWORD PTR [rcx+224]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv1_mainloop_sandybridge:
|
||||||
|
movq xmm0, rdi
|
||||||
|
movq xmm1, r8
|
||||||
|
punpcklqdq xmm1, xmm0
|
||||||
|
aesenc xmm2, xmm1
|
||||||
|
movq r10, xmm2
|
||||||
|
mov r9d, r10d
|
||||||
|
and r9d, 2097136
|
||||||
|
add r9, rsi
|
||||||
|
movdqa xmm0, xmm2
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
movdqa xmm3, xmm2
|
||||||
|
movdqu XMMWORD PTR [rdx+rsi], xmm0
|
||||||
|
psrldq xmm0, 11
|
||||||
|
movq rax, xmm0
|
||||||
|
movzx eax, al
|
||||||
|
movzx eax, BYTE PTR [rax+r15]
|
||||||
|
mov BYTE PTR [rsi+rdx+11], al
|
||||||
|
mov rbx, QWORD PTR [r9]
|
||||||
|
mov r11, QWORD PTR [r9+8]
|
||||||
|
mov rax, rbx
|
||||||
|
mul r10
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r9], r8
|
||||||
|
add rdi, rax
|
||||||
|
mov rax, r14
|
||||||
|
xor rax, rdi
|
||||||
|
mov QWORD PTR [r9+8], rax
|
||||||
|
xor r8, rbx
|
||||||
|
mov rdx, r8
|
||||||
|
and edx, 2097136
|
||||||
|
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||||
|
xor rdi, r11
|
||||||
|
dec ebp
|
||||||
|
jne cnv1_mainloop_sandybridge
|
||||||
|
|
||||||
|
mov rbx, QWORD PTR [rsp+24]
|
||||||
|
mov rbp, QWORD PTR [rsp+32]
|
||||||
|
mov rsi, QWORD PTR [rsp+40]
|
||||||
|
mov rdi, QWORD PTR [rsp+48]
|
||||||
|
pop r15
|
||||||
|
pop r14
|
166
src/crypto/asm/cnv1_mainloop_soft_aes_sandybridge.inc
Normal file
166
src/crypto/asm/cnv1_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 72
|
||||||
|
|
||||||
|
movaps XMMWORD PTR [rsp], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm9
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
movq xmm4, rax
|
||||||
|
mov rax, QWORD PTR [rcx+256]
|
||||||
|
mov r13, QWORD PTR [rcx+40]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor r13, QWORD PTR [rcx+8]
|
||||||
|
mov rdx, r8
|
||||||
|
mov rdi, QWORD PTR [rcx+224]
|
||||||
|
and edx, 2097136
|
||||||
|
mov rax, QWORD PTR [rax+35]
|
||||||
|
xor rax, QWORD PTR [rcx+192]
|
||||||
|
movq xmm5, rax
|
||||||
|
movq xmm8, rdi
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
mov QWORD PTR [rsp+64], rdx
|
||||||
|
|
||||||
|
movq xmm6, rcx
|
||||||
|
mov rax, QWORD PTR [rcx+264]
|
||||||
|
movq xmm7, rax
|
||||||
|
|
||||||
|
mov eax, 524288
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv1_mainloop_soft_aes_sandybridge:
|
||||||
|
movq xmm9, rax
|
||||||
|
mov r12, QWORD PTR [rcx+272]
|
||||||
|
mov esi, DWORD PTR [rdx+rdi]
|
||||||
|
mov r10d, DWORD PTR [rdx+rdi+4]
|
||||||
|
mov ebp, DWORD PTR [rdx+rdi+12]
|
||||||
|
mov r14d, DWORD PTR [rdx+rdi+8]
|
||||||
|
mov rdx, QWORD PTR [rsp+64]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
mov r15d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
mov edi, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov ebx, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov eax, r14d
|
||||||
|
shr eax, 8
|
||||||
|
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add eax, 256
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add r12, 2048
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
add r10d, 256
|
||||||
|
mov r11d, DWORD PTR [r12+rax*4]
|
||||||
|
xor r11d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r11d, r9d
|
||||||
|
movzx ecx, sil
|
||||||
|
mov r10d, DWORD PTR [r12+r10*4]
|
||||||
|
shr esi, 8
|
||||||
|
add esi, 256
|
||||||
|
xor r10d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
xor r10d, ebx
|
||||||
|
shr ebp, 8
|
||||||
|
add ebp, 256
|
||||||
|
movd xmm1, r11d
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r9d, DWORD PTR [r12+rsi*4]
|
||||||
|
mov eax, DWORD PTR [r12+rbp*4]
|
||||||
|
xor r9d, edi
|
||||||
|
movq rdi, xmm8
|
||||||
|
movzx ecx, r14b
|
||||||
|
movd xmm0, r10d
|
||||||
|
movd xmm2, r9d
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
movq xmm1, r8
|
||||||
|
xor eax, DWORD PTR [r12+rcx*4]
|
||||||
|
xor eax, r15d
|
||||||
|
movd xmm3, eax
|
||||||
|
movq rax, xmm7
|
||||||
|
punpckldq xmm3, xmm0
|
||||||
|
movq xmm0, r13
|
||||||
|
punpcklqdq xmm1, xmm0
|
||||||
|
punpckldq xmm3, xmm2
|
||||||
|
pxor xmm3, xmm1
|
||||||
|
movq r9, xmm3
|
||||||
|
mov r10d, r9d
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm0, xmm3
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
movdqu XMMWORD PTR [rdx+rdi], xmm0
|
||||||
|
psrldq xmm0, 11
|
||||||
|
movq rcx, xmm0
|
||||||
|
movzx ecx, cl
|
||||||
|
mov cl, BYTE PTR [rcx+rax]
|
||||||
|
mov BYTE PTR [rdi+rdx+11], cl
|
||||||
|
mov rbx, QWORD PTR [r10+rdi]
|
||||||
|
mov rcx, r9
|
||||||
|
lea r9, QWORD PTR [r10+rdi]
|
||||||
|
mov r11, QWORD PTR [r9+8]
|
||||||
|
mov rax, rbx
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
mul rcx
|
||||||
|
movq rcx, xmm6
|
||||||
|
add r8, rdx
|
||||||
|
add r13, rax
|
||||||
|
movq rax, xmm5
|
||||||
|
xor rax, r13
|
||||||
|
mov QWORD PTR [r9], r8
|
||||||
|
xor r8, rbx
|
||||||
|
mov QWORD PTR [r9+8], rax
|
||||||
|
movq rax, xmm9
|
||||||
|
mov rdx, r8
|
||||||
|
xor r13, r11
|
||||||
|
and edx, 2097136
|
||||||
|
mov QWORD PTR [rsp+64], rdx
|
||||||
|
sub eax, 1
|
||||||
|
jne cnv1_mainloop_soft_aes_sandybridge
|
||||||
|
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+16]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm9, XMMWORD PTR [rsp+48]
|
||||||
|
|
||||||
|
add rsp, 72
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
414
src/crypto/asm/cnv2_double_main_loop_sandybridge.inc
Normal file
414
src/crypto/asm/cnv2_double_main_loop_sandybridge.inc
Normal file
|
@ -0,0 +1,414 @@
|
||||||
|
mov rax, rsp
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 184
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp+272]
|
||||||
|
mov DWORD PTR [rsp+276], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+276]
|
||||||
|
|
||||||
|
mov r13, QWORD PTR [rcx+224]
|
||||||
|
mov r9, rdx
|
||||||
|
mov r10, QWORD PTR [rcx+32]
|
||||||
|
mov r8, rcx
|
||||||
|
xor r10, QWORD PTR [rcx]
|
||||||
|
mov r14d, 524288
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rsi, QWORD PTR [rdx+224]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov rdi, QWORD PTR [r9+32]
|
||||||
|
xor rdi, QWORD PTR [r9]
|
||||||
|
mov rbp, QWORD PTR [r9+40]
|
||||||
|
xor rbp, QWORD PTR [r9+8]
|
||||||
|
movq xmm0, rdx
|
||||||
|
movaps XMMWORD PTR [rax-88], xmm6
|
||||||
|
movaps XMMWORD PTR [rax-104], xmm7
|
||||||
|
movaps XMMWORD PTR [rax-120], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+112], xmm9
|
||||||
|
movaps XMMWORD PTR [rsp+96], xmm10
|
||||||
|
movaps XMMWORD PTR [rsp+80], xmm11
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm12
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm13
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm14
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm15
|
||||||
|
mov rdx, r10
|
||||||
|
movq xmm4, QWORD PTR [r8+96]
|
||||||
|
and edx, 2097136
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
xorps xmm13, xmm13
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r8+72]
|
||||||
|
movq xmm5, QWORD PTR [r8+104]
|
||||||
|
movq xmm7, rax
|
||||||
|
|
||||||
|
mov eax, 1
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm14, rax
|
||||||
|
punpcklqdq xmm14, xmm14
|
||||||
|
|
||||||
|
mov eax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm12, rax
|
||||||
|
punpcklqdq xmm12, xmm12
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [r8+80]
|
||||||
|
xor rax, QWORD PTR [r8+64]
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov rcx, QWORD PTR [r9+56]
|
||||||
|
xor rcx, QWORD PTR [r9+24]
|
||||||
|
movq xmm3, rax
|
||||||
|
mov rax, QWORD PTR [r9+48]
|
||||||
|
xor rax, QWORD PTR [r9+16]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp], r13
|
||||||
|
mov rcx, QWORD PTR [r9+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movq xmm6, rax
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
punpcklqdq xmm6, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp+256], r10
|
||||||
|
mov rcx, rdi
|
||||||
|
mov QWORD PTR [rsp+264], r11
|
||||||
|
movq xmm8, rax
|
||||||
|
and ecx, 2097136
|
||||||
|
punpcklqdq xmm8, xmm0
|
||||||
|
movq xmm0, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movq xmm0, QWORD PTR [r9+104]
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
movdqu xmm11, XMMWORD PTR [r8]
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
lea r9, QWORD PTR [rdx+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r9]
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
main_loop_double_sandybridge:
|
||||||
|
movdqu xmm9, xmm15
|
||||||
|
mov eax, edx
|
||||||
|
mov ebx, edx
|
||||||
|
xor eax, 16
|
||||||
|
xor ebx, 32
|
||||||
|
xor edx, 48
|
||||||
|
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm2, r10
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
aesenc xmm9, xmm2
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||||
|
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||||
|
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||||
|
|
||||||
|
movq r11, xmm9
|
||||||
|
mov edx, r11d
|
||||||
|
and edx, 2097136
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
pxor xmm0, xmm7
|
||||||
|
movdqu XMMWORD PTR [r9], xmm0
|
||||||
|
|
||||||
|
lea rbx, QWORD PTR [rdx+r13]
|
||||||
|
mov r10, QWORD PTR [rdx+r13]
|
||||||
|
|
||||||
|
movdqu xmm10, xmm11
|
||||||
|
movq xmm0, rbp
|
||||||
|
movq xmm11, rdi
|
||||||
|
punpcklqdq xmm11, xmm0
|
||||||
|
aesenc xmm10, xmm11
|
||||||
|
|
||||||
|
mov eax, ecx
|
||||||
|
mov r12d, ecx
|
||||||
|
xor eax, 16
|
||||||
|
xor r12d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||||
|
paddq xmm0, xmm6
|
||||||
|
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||||
|
|
||||||
|
movq rcx, xmm10
|
||||||
|
and ecx, 2097136
|
||||||
|
|
||||||
|
movdqa xmm0, xmm10
|
||||||
|
pxor xmm0, xmm6
|
||||||
|
movdqu XMMWORD PTR [r8], xmm0
|
||||||
|
mov r12, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov r9, QWORD PTR [rbx+8]
|
||||||
|
|
||||||
|
xor edx, 16
|
||||||
|
mov r8d, edx
|
||||||
|
mov r15d, edx
|
||||||
|
|
||||||
|
movq rdx, xmm5
|
||||||
|
shl rdx, 32
|
||||||
|
movq rax, xmm4
|
||||||
|
xor rdx, rax
|
||||||
|
xor r10, rdx
|
||||||
|
mov rax, r10
|
||||||
|
mul r11
|
||||||
|
mov r11d, r8d
|
||||||
|
xor r11d, 48
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rdx, [r11+r13]
|
||||||
|
movq xmm1, rax
|
||||||
|
xor rax, [r11+r13+8]
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
xor r8d, 32
|
||||||
|
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||||
|
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
movdqu XMMWORD PTR [r15+r13], xmm0
|
||||||
|
|
||||||
|
mov r11, QWORD PTR [rsp+256]
|
||||||
|
add r11, rdx
|
||||||
|
mov rdx, QWORD PTR [rsp+264]
|
||||||
|
add rdx, rax
|
||||||
|
mov QWORD PTR [rbx], r11
|
||||||
|
xor r11, r10
|
||||||
|
mov QWORD PTR [rbx+8], rdx
|
||||||
|
xor rdx, r9
|
||||||
|
mov QWORD PTR [rsp+256], r11
|
||||||
|
and r11d, 2097136
|
||||||
|
mov QWORD PTR [rsp+264], rdx
|
||||||
|
mov QWORD PTR [rsp+8], r11
|
||||||
|
lea r15, QWORD PTR [r11+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||||
|
lea r13, QWORD PTR [rsi+rcx]
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movaps xmm2, xmm13
|
||||||
|
movq r10, xmm0
|
||||||
|
psllq xmm5, 1
|
||||||
|
shl r10, 32
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movdqa xmm1, xmm10
|
||||||
|
movq r11, xmm0
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movq r8, xmm1
|
||||||
|
psrldq xmm4, 8
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
movq rax, xmm4
|
||||||
|
xor r10, rax
|
||||||
|
movaps xmm1, xmm13
|
||||||
|
xor r10, r12
|
||||||
|
lea rax, QWORD PTR [r11+1]
|
||||||
|
shr rax, 1
|
||||||
|
movdqa xmm3, xmm9
|
||||||
|
punpcklqdq xmm3, xmm10
|
||||||
|
paddq xmm5, xmm3
|
||||||
|
movq rdx, xmm5
|
||||||
|
psrldq xmm5, 8
|
||||||
|
cvtsi2sd xmm2, rax
|
||||||
|
or edx, -2147483647
|
||||||
|
lea rax, QWORD PTR [r8+1]
|
||||||
|
shr rax, 1
|
||||||
|
movq r9, xmm5
|
||||||
|
cvtsi2sd xmm0, rax
|
||||||
|
or r9d, -2147483647
|
||||||
|
cvtsi2sd xmm1, rdx
|
||||||
|
unpcklpd xmm2, xmm0
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
cvtsi2sd xmm0, r9
|
||||||
|
unpcklpd xmm1, xmm0
|
||||||
|
divpd xmm2, xmm1
|
||||||
|
paddq xmm2, xmm14
|
||||||
|
cvttsd2si rax, xmm2
|
||||||
|
psrldq xmm2, 8
|
||||||
|
mov rbx, rax
|
||||||
|
imul rax, rdx
|
||||||
|
sub r11, rax
|
||||||
|
js div_fix_1_sandybridge
|
||||||
|
div_fix_1_ret_sandybridge:
|
||||||
|
|
||||||
|
cvttsd2si rdx, xmm2
|
||||||
|
mov rax, rdx
|
||||||
|
imul rax, r9
|
||||||
|
movd xmm2, r11d
|
||||||
|
movd xmm4, ebx
|
||||||
|
sub r8, rax
|
||||||
|
js div_fix_2_sandybridge
|
||||||
|
div_fix_2_ret_sandybridge:
|
||||||
|
|
||||||
|
movd xmm1, r8d
|
||||||
|
movd xmm0, edx
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
punpckldq xmm4, xmm0
|
||||||
|
punpckldq xmm4, xmm2
|
||||||
|
paddq xmm3, xmm4
|
||||||
|
movdqa xmm0, xmm3
|
||||||
|
psrlq xmm0, 12
|
||||||
|
paddq xmm0, xmm12
|
||||||
|
sqrtpd xmm1, xmm0
|
||||||
|
movq r9, xmm1
|
||||||
|
movdqa xmm5, xmm1
|
||||||
|
psrlq xmm5, 19
|
||||||
|
test r9, 524287
|
||||||
|
je sqrt_fix_1_sandybridge
|
||||||
|
sqrt_fix_1_ret_sandybridge:
|
||||||
|
|
||||||
|
movq r9, xmm10
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movq r8, xmm1
|
||||||
|
test r8, 524287
|
||||||
|
je sqrt_fix_2_sandybridge
|
||||||
|
sqrt_fix_2_ret_sandybridge:
|
||||||
|
|
||||||
|
mov r12d, ecx
|
||||||
|
mov r8d, ecx
|
||||||
|
xor r12d, 16
|
||||||
|
xor r8d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
mov rax, r10
|
||||||
|
mul r9
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm3, rdx
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||||
|
xor rdx, [r8+rsi]
|
||||||
|
xor rax, [r8+rsi+8]
|
||||||
|
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||||
|
paddq xmm0, xmm6
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
paddq xmm3, xmm8
|
||||||
|
movdqu XMMWORD PTR [r8+rsi], xmm0
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm3
|
||||||
|
|
||||||
|
add rdi, rdx
|
||||||
|
mov QWORD PTR [r13], rdi
|
||||||
|
xor rdi, r10
|
||||||
|
mov ecx, edi
|
||||||
|
and ecx, 2097136
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov rdx, QWORD PTR [r13+8]
|
||||||
|
add rbp, rax
|
||||||
|
mov QWORD PTR [r13+8], rbp
|
||||||
|
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||||
|
xor rbp, rdx
|
||||||
|
mov r13, QWORD PTR [rsp]
|
||||||
|
movdqa xmm3, xmm7
|
||||||
|
mov rdx, QWORD PTR [rsp+8]
|
||||||
|
movdqa xmm8, xmm6
|
||||||
|
mov r10, QWORD PTR [rsp+256]
|
||||||
|
movdqa xmm7, xmm9
|
||||||
|
mov r11, QWORD PTR [rsp+264]
|
||||||
|
movdqa xmm6, xmm10
|
||||||
|
mov r9, r15
|
||||||
|
dec r14d
|
||||||
|
jne main_loop_double_sandybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp+272]
|
||||||
|
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+184]
|
||||||
|
movaps xmm6, XMMWORD PTR [r11-24]
|
||||||
|
movaps xmm7, XMMWORD PTR [r11-40]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-56]
|
||||||
|
movaps xmm9, XMMWORD PTR [r11-72]
|
||||||
|
movaps xmm10, XMMWORD PTR [r11-88]
|
||||||
|
movaps xmm11, XMMWORD PTR [r11-104]
|
||||||
|
movaps xmm12, XMMWORD PTR [r11-120]
|
||||||
|
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
jmp cnv2_double_mainloop_asm_sandybridge_endp
|
||||||
|
|
||||||
|
div_fix_1_sandybridge:
|
||||||
|
dec rbx
|
||||||
|
add r11, rdx
|
||||||
|
jmp div_fix_1_ret_sandybridge
|
||||||
|
|
||||||
|
div_fix_2_sandybridge:
|
||||||
|
dec rdx
|
||||||
|
add r8, r9
|
||||||
|
jmp div_fix_2_ret_sandybridge
|
||||||
|
|
||||||
|
sqrt_fix_1_sandybridge:
|
||||||
|
movq r8, xmm3
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
dec r9
|
||||||
|
mov r11d, -1022
|
||||||
|
shl r11, 32
|
||||||
|
mov rax, r9
|
||||||
|
shr r9, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r9
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+r11+1]
|
||||||
|
add rax, r11
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r8
|
||||||
|
adc r9, 0
|
||||||
|
movq xmm5, r9
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp sqrt_fix_1_ret_sandybridge
|
||||||
|
|
||||||
|
sqrt_fix_2_sandybridge:
|
||||||
|
psrldq xmm3, 8
|
||||||
|
movq r11, xmm3
|
||||||
|
dec r8
|
||||||
|
mov ebx, -1022
|
||||||
|
shl rbx, 32
|
||||||
|
mov rax, r8
|
||||||
|
shr r8, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r8
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+rbx+1]
|
||||||
|
add rax, rbx
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r11
|
||||||
|
adc r8, 0
|
||||||
|
movq xmm0, r8
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp sqrt_fix_2_ret_sandybridge
|
||||||
|
|
||||||
|
cnv2_double_mainloop_asm_sandybridge_endp:
|
186
src/crypto/asm/cnv2_main_loop_ivybridge.inc
Normal file
186
src/crypto/asm/cnv2_main_loop_ivybridge.inc
Normal file
|
@ -0,0 +1,186 @@
|
||||||
|
mov QWORD PTR [rsp+24], rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 80
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov esi, 524288
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
mov r13d, -2147483647
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm4, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movq xmm3, QWORD PTR [r9+104]
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm8
|
||||||
|
and r10d, 2097136
|
||||||
|
movq xmm5, rax
|
||||||
|
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm8, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
$main_loop_ivybridge:
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
mov rdi, r15
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm7, r8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
aesenc xmm6, xmm7
|
||||||
|
movq rbp, xmm6
|
||||||
|
mov r9, rbp
|
||||||
|
and r9d, 2097136
|
||||||
|
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
mov r10, r9
|
||||||
|
xor r10d, 32
|
||||||
|
movq rcx, xmm3
|
||||||
|
mov rax, rcx
|
||||||
|
shl rax, 32
|
||||||
|
xor rdi, rax
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rdi, QWORD PTR [r9+rbx]
|
||||||
|
lea r14, QWORD PTR [r9+rbx]
|
||||||
|
mov r12, QWORD PTR [r14+8]
|
||||||
|
xor edx, edx
|
||||||
|
lea r9d, DWORD PTR [ecx+ecx]
|
||||||
|
add r9d, ebp
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
psrldq xmm0, 8
|
||||||
|
or r9d, r13d
|
||||||
|
movq rax, xmm0
|
||||||
|
div r9
|
||||||
|
xorps xmm3, xmm3
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
add rdx, rax
|
||||||
|
lea r9, QWORD PTR [rdx+rbp]
|
||||||
|
mov r15, rdx
|
||||||
|
mov rax, r9
|
||||||
|
shr rax, 12
|
||||||
|
movq xmm0, rax
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
sqrtsd xmm3, xmm0
|
||||||
|
movq rdx, xmm3
|
||||||
|
test edx, 524287
|
||||||
|
je $sqrt_fixup_ivybridge
|
||||||
|
psrlq xmm3, 19
|
||||||
|
$sqrt_fixup_ivybridge_ret:
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov rax, rdi
|
||||||
|
mul rbp
|
||||||
|
movq xmm2, rdx
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rdi
|
||||||
|
mov edi, r8d
|
||||||
|
and edi, 2097136
|
||||||
|
movq xmm0, rax
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r14+8], r11
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
xor r9d, 48
|
||||||
|
xor r10d, 16
|
||||||
|
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+rbx], xmm0
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
movdqu xmm6, [rdi+rbx]
|
||||||
|
mov r10d, edi
|
||||||
|
xor r11, r12
|
||||||
|
dec rsi
|
||||||
|
jne $main_loop_ivybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
mov rbx, QWORD PTR [rsp+160]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||||
|
add rsp, 80
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
jmp $cnv2_main_loop_ivybridge_endp
|
||||||
|
|
||||||
|
$sqrt_fixup_ivybridge:
|
||||||
|
dec rdx
|
||||||
|
mov r13d, -1022
|
||||||
|
shl r13, 32
|
||||||
|
mov rax, rdx
|
||||||
|
shr rdx, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdx
|
||||||
|
sub rcx, rax
|
||||||
|
add rax, r13
|
||||||
|
not r13
|
||||||
|
sub rcx, r13
|
||||||
|
mov r13d, -2147483647
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdx, 0
|
||||||
|
movq xmm3, rdx
|
||||||
|
jmp $sqrt_fixup_ivybridge_ret
|
||||||
|
|
||||||
|
$cnv2_main_loop_ivybridge_endp:
|
183
src/crypto/asm/cnv2_main_loop_ryzen.inc
Normal file
183
src/crypto/asm/cnv2_main_loop_ryzen.inc
Normal file
|
@ -0,0 +1,183 @@
|
||||||
|
mov QWORD PTR [rsp+16], rbx
|
||||||
|
mov QWORD PTR [rsp+24], rbp
|
||||||
|
mov QWORD PTR [rsp+32], rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 64
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov ebp, 524288
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm3, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
mov rdi, QWORD PTR [r9+104]
|
||||||
|
and r10d, 2097136
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm6
|
||||||
|
movq xmm4, rax
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm8
|
||||||
|
xorps xmm8, xmm8
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm7, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
$main_loop_ryzen:
|
||||||
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm6, r8
|
||||||
|
punpcklqdq xmm6, xmm0
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
lea r9, QWORD PTR [rdi+rdi]
|
||||||
|
shl rdi, 32
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
aesenc xmm5, xmm6
|
||||||
|
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movaps xmm1, xmm8
|
||||||
|
mov rsi, r15
|
||||||
|
xor rsi, rdi
|
||||||
|
movq r14, xmm5
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
mov r10, r14
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rsi, QWORD PTR [r10+rbx]
|
||||||
|
lea r12, QWORD PTR [r10+rbx]
|
||||||
|
mov r13, QWORD PTR [r10+rbx+8]
|
||||||
|
|
||||||
|
add r9d, r14d
|
||||||
|
or r9d, -2147483647
|
||||||
|
xor edx, edx
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movq rax, xmm0
|
||||||
|
|
||||||
|
div r9
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm1, rdx
|
||||||
|
punpckldq xmm0, xmm1
|
||||||
|
movq r15, xmm0
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqa xmm2, xmm0
|
||||||
|
psrlq xmm0, 12
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdi, xmm1
|
||||||
|
test rdi, 524287
|
||||||
|
je $sqrt_fixup_ryzen
|
||||||
|
shr rdi, 19
|
||||||
|
|
||||||
|
$sqrt_fixup_ryzen_ret:
|
||||||
|
mov rax, rsi
|
||||||
|
mul r14
|
||||||
|
movq xmm1, rax
|
||||||
|
movq xmm0, rdx
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
mov ecx, r10d
|
||||||
|
xor r9d, 16
|
||||||
|
xor ecx, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
pxor xmm2, xmm0
|
||||||
|
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
add r8, rdx
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r12], r8
|
||||||
|
xor r8, rsi
|
||||||
|
mov QWORD PTR [r12+8], r11
|
||||||
|
mov r10, r8
|
||||||
|
xor r11, r13
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm3, xmm5
|
||||||
|
dec ebp
|
||||||
|
jne $main_loop_ryzen
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+64]
|
||||||
|
mov rbx, QWORD PTR [r11+56]
|
||||||
|
mov rbp, QWORD PTR [r11+64]
|
||||||
|
mov rsi, QWORD PTR [r11+72]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-48]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
jmp $cnv2_main_loop_ryzen_endp
|
||||||
|
|
||||||
|
$sqrt_fixup_ryzen:
|
||||||
|
movq r9, xmm2
|
||||||
|
dec rdi
|
||||||
|
mov edx, -1022
|
||||||
|
shl rdx, 32
|
||||||
|
mov rax, rdi
|
||||||
|
shr rdi, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdi
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+rdx+1]
|
||||||
|
add rax, rdx
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdi, 0
|
||||||
|
jmp $sqrt_fixup_ryzen_ret
|
||||||
|
|
||||||
|
$cnv2_main_loop_ryzen_endp:
|
271
src/crypto/asm/cnv2_mainloop_soft_aes_sandybridge.inc
Normal file
271
src/crypto/asm/cnv2_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,271 @@
|
||||||
|
mov QWORD PTR [rsp+8], rcx
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 152
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp+4]
|
||||||
|
mov DWORD PTR [rsp], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r10, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r9, QWORD PTR [rcx+40]
|
||||||
|
xor r9, QWORD PTR [rcx+8]
|
||||||
|
movq xmm4, rax
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r11, QWORD PTR [rcx+224]
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r10+72]
|
||||||
|
mov rax, QWORD PTR [r10+80]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rax, QWORD PTR [r10+64]
|
||||||
|
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm9
|
||||||
|
movaps XMMWORD PTR [rsp+80], xmm10
|
||||||
|
movaps XMMWORD PTR [rsp+96], xmm11
|
||||||
|
movaps XMMWORD PTR [rsp+112], xmm12
|
||||||
|
movaps XMMWORD PTR [rsp+128], xmm13
|
||||||
|
|
||||||
|
movq xmm5, rax
|
||||||
|
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm8, rax
|
||||||
|
|
||||||
|
mov rax, r8
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
and eax, 2097136
|
||||||
|
movq xmm10, QWORD PTR [r10+96]
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov rcx, QWORD PTR [r10+104]
|
||||||
|
xorps xmm9, xmm9
|
||||||
|
mov QWORD PTR [rsp+248], rax
|
||||||
|
movq xmm12, r11
|
||||||
|
mov QWORD PTR [rsp+240], r9
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
movq xmm13, rcx
|
||||||
|
mov r12d, 524288
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv2_mainloop_soft_aes_sandybridge:
|
||||||
|
movd xmm11, r12d
|
||||||
|
mov r12, QWORD PTR [r10+272]
|
||||||
|
lea r13, QWORD PTR [rax+r11]
|
||||||
|
mov esi, DWORD PTR [r13]
|
||||||
|
movq xmm0, r9
|
||||||
|
mov r10d, DWORD PTR [r13+4]
|
||||||
|
movq xmm7, r8
|
||||||
|
mov ebp, DWORD PTR [r13+12]
|
||||||
|
mov r14d, DWORD PTR [r13+8]
|
||||||
|
mov rdx, QWORD PTR [rsp+248]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
mov r15d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
mov edi, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov ebx, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov eax, r14d
|
||||||
|
shr eax, 8
|
||||||
|
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add eax, 256
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add r12, 2048
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
add r10d, 256
|
||||||
|
mov r11d, DWORD PTR [r12+rax*4]
|
||||||
|
xor r11d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r11d, r9d
|
||||||
|
movzx ecx, sil
|
||||||
|
mov r10d, DWORD PTR [r12+r10*4]
|
||||||
|
shr esi, 8
|
||||||
|
add esi, 256
|
||||||
|
xor r10d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
xor r10d, ebx
|
||||||
|
shr ebp, 8
|
||||||
|
movd xmm1, r11d
|
||||||
|
add ebp, 256
|
||||||
|
movq r11, xmm12
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r9d, DWORD PTR [r12+rsi*4]
|
||||||
|
mov eax, DWORD PTR [r12+rbp*4]
|
||||||
|
xor r9d, edi
|
||||||
|
movzx ecx, r14b
|
||||||
|
movd xmm0, r10d
|
||||||
|
movd xmm2, r9d
|
||||||
|
xor eax, DWORD PTR [r12+rcx*4]
|
||||||
|
mov rcx, rdx
|
||||||
|
xor eax, r15d
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
xor rcx, 16
|
||||||
|
movd xmm6, eax
|
||||||
|
mov rax, rdx
|
||||||
|
punpckldq xmm6, xmm0
|
||||||
|
xor rax, 32
|
||||||
|
punpckldq xmm6, xmm2
|
||||||
|
xor rdx, 48
|
||||||
|
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||||
|
pxor xmm6, xmm7
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
|
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movq rcx, xmm13
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||||
|
movq rdi, xmm6
|
||||||
|
mov r10, rdi
|
||||||
|
and r10d, 2097136
|
||||||
|
xor edx, edx
|
||||||
|
mov rax, rcx
|
||||||
|
shl rax, 32
|
||||||
|
movq rbx, xmm10
|
||||||
|
xor rbx, rax
|
||||||
|
lea r9, QWORD PTR [rcx+rcx]
|
||||||
|
add r9d, edi
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
mov ecx, -2147483647
|
||||||
|
movdqu XMMWORD PTR [r13], xmm0
|
||||||
|
or r9, rcx
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
movaps xmm1, xmm9
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movq rax, xmm0
|
||||||
|
xor rbx, QWORD PTR [r10+r11]
|
||||||
|
lea r14, QWORD PTR [r10+r11]
|
||||||
|
mov rbp, QWORD PTR [r14+8]
|
||||||
|
div r9
|
||||||
|
shl rdx, 32
|
||||||
|
mov eax, eax
|
||||||
|
add rdx, rax
|
||||||
|
lea r9, QWORD PTR [rdx+rdi]
|
||||||
|
movq xmm10, rdx
|
||||||
|
mov rax, r9
|
||||||
|
shr rax, 12
|
||||||
|
movq xmm0, rax
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdx, xmm1
|
||||||
|
test rdx, 524287
|
||||||
|
je sqrt_fixup_soft_aes_sandybridge
|
||||||
|
psrlq xmm1, 19
|
||||||
|
sqrt_fixup_soft_aes_sandybridge_ret:
|
||||||
|
|
||||||
|
mov r9, r10
|
||||||
|
movdqa xmm13, xmm1
|
||||||
|
xor r9, 16
|
||||||
|
mov rcx, r10
|
||||||
|
xor rcx, 32
|
||||||
|
xor r10, 48
|
||||||
|
mov rax, rbx
|
||||||
|
mul rdi
|
||||||
|
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm3, rdx
|
||||||
|
xor rax, QWORD PTR [r11+rcx+8]
|
||||||
|
xor rdx, QWORD PTR [rcx+r11]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
add r8, rdx
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||||
|
pxor xmm2, xmm3
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
mov r9, QWORD PTR [rsp+240]
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
add r9, rax
|
||||||
|
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||||
|
mov r10, QWORD PTR [rsp+224]
|
||||||
|
movd r12d, xmm11
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rbx
|
||||||
|
mov rax, r8
|
||||||
|
mov QWORD PTR [r14+8], r9
|
||||||
|
and eax, 2097136
|
||||||
|
xor r9, rbp
|
||||||
|
mov QWORD PTR [rsp+240], r9
|
||||||
|
mov QWORD PTR [rsp+248], rax
|
||||||
|
sub r12d, 1
|
||||||
|
jne cnv2_mainloop_soft_aes_sandybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||||
|
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||||
|
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||||
|
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||||
|
|
||||||
|
add rsp, 152
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
jmp cnv2_mainloop_soft_aes_sandybridge_asm_endp
|
||||||
|
|
||||||
|
sqrt_fixup_soft_aes_sandybridge:
|
||||||
|
dec rdx
|
||||||
|
mov r15d, -1022
|
||||||
|
shl r15, 32
|
||||||
|
mov rax, rdx
|
||||||
|
shr rdx, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdx
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+r15+1]
|
||||||
|
add rax, r15
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdx, 0
|
||||||
|
movq xmm1, rdx
|
||||||
|
jmp sqrt_fixup_soft_aes_sandybridge_ret
|
||||||
|
|
||||||
|
cnv2_mainloop_soft_aes_sandybridge_asm_endp:
|
|
@ -4,8 +4,9 @@
|
||||||
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
||||||
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
||||||
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
||||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), tube (ipbc), alloy, xtl (including autodetect for v5), msr, xhv, rto
|
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy, xtl (including autodetect for v5), msr, xhv, rto
|
||||||
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
||||||
|
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, none
|
||||||
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
||||||
"colors": true, // false to disable colored output
|
"colors": true, // false to disable colored output
|
||||||
"cpu-affinity": null, // set process affinity to CPU core(s), mask "0x3" for cores 0 and 1
|
"cpu-affinity": null, // set process affinity to CPU core(s), mask "0x3" for cores 0 and 1
|
||||||
|
|
|
@ -31,6 +31,12 @@
|
||||||
class ILogBackend
|
class ILogBackend
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
# ifdef APP_DEBUG
|
||||||
|
constexpr static const size_t kBufferSize = 1024;
|
||||||
|
# else
|
||||||
|
constexpr static const size_t kBufferSize = 512;
|
||||||
|
# endif
|
||||||
|
|
||||||
virtual ~ILogBackend() {}
|
virtual ~ILogBackend() {}
|
||||||
|
|
||||||
virtual void message(int level, const char* fmt, va_list args) = 0;
|
virtual void message(int level, const char* fmt, va_list args) = 0;
|
||||||
|
|
|
@ -56,19 +56,20 @@ void FileLog::message(int level, const char* fmt, va_list args)
|
||||||
localtime_r(&now, &stime);
|
localtime_r(&now, &stime);
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
auto *buf = new char[512];
|
snprintf(m_fmt, sizeof(m_fmt) - 1, "[%d-%02d-%02d %02d:%02d:%02d] %s",
|
||||||
int size = snprintf(buf, 23, "[%d-%02d-%02d %02d:%02d:%02d] ",
|
|
||||||
stime.tm_year + 1900,
|
stime.tm_year + 1900,
|
||||||
stime.tm_mon + 1,
|
stime.tm_mon + 1,
|
||||||
stime.tm_mday,
|
stime.tm_mday,
|
||||||
stime.tm_hour,
|
stime.tm_hour,
|
||||||
stime.tm_min,
|
stime.tm_min,
|
||||||
stime.tm_sec);
|
stime.tm_sec,
|
||||||
|
fmt);
|
||||||
|
|
||||||
size = vsnprintf(buf + size, 512 - size - 1, fmt, args) + size;
|
auto *buf = new char[kBufferSize];
|
||||||
|
const int size = vsnprintf(buf, kBufferSize - 1, m_fmt, args);
|
||||||
buf[size] = '\n';
|
buf[size] = '\n';
|
||||||
|
|
||||||
std::string row = std::regex_replace(std::string(buf, size+1), std::regex("\x1B\\[[0-9;]*[a-zA-Z]"), "");
|
std::string row = std::regex_replace(std::string(buf, static_cast<unsigned long>(size + 1)), std::regex("\x1B\\[[0-9;]*[a-zA-Z]"), "");
|
||||||
|
|
||||||
memcpy(buf, row.c_str(), row.length());
|
memcpy(buf, row.c_str(), row.length());
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ private:
|
||||||
|
|
||||||
void write(char *data, size_t size);
|
void write(char *data, size_t size);
|
||||||
|
|
||||||
|
char m_fmt[256];
|
||||||
int m_file;
|
int m_file;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -138,24 +138,24 @@ bool Job::setTarget(const char *target)
|
||||||
|
|
||||||
PowVariant Job::powVariant() const
|
PowVariant Job::powVariant() const
|
||||||
{
|
{
|
||||||
if (m_powVariant == PowVariant::POW_AUTODETECT)
|
if (m_powVariant == PowVariant::POW_AUTODETECT) {
|
||||||
{
|
if (m_blob[0] > 7) {
|
||||||
return (m_blob[0] > 6 ? PowVariant::POW_V1 : PowVariant::POW_V0);
|
return PowVariant::POW_V2;
|
||||||
|
} else if (m_blob[0] > 6) {
|
||||||
|
return PowVariant::POW_V1;
|
||||||
|
} else {
|
||||||
|
return PowVariant::POW_V0;
|
||||||
}
|
}
|
||||||
else if (m_powVariant == PowVariant::POW_XTL && m_blob[0] < 4)
|
}
|
||||||
{
|
else if (m_powVariant == PowVariant::POW_XTL && m_blob[0] < 4) {
|
||||||
return POW_V1;
|
return POW_V1;
|
||||||
}
|
}
|
||||||
else if (m_powVariant == PowVariant::POW_MSR && m_blob[0] < 7)
|
else if (m_powVariant == PowVariant::POW_MSR && m_blob[0] < 7) {
|
||||||
{
|
|
||||||
return POW_V1;
|
return POW_V1;
|
||||||
}
|
}
|
||||||
else if (m_powVariant == PowVariant::POW_XHV && m_blob[0] < 3)
|
else if (m_powVariant == PowVariant::POW_XHV && m_blob[0] < 3) {
|
||||||
{
|
|
||||||
return POW_V0;
|
return POW_V0;
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
return m_powVariant;
|
return m_powVariant;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,13 +36,13 @@
|
||||||
#define APP_DESC "XMRigCC CPU miner"
|
#define APP_DESC "XMRigCC CPU miner"
|
||||||
#define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id"
|
#define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id"
|
||||||
#endif
|
#endif
|
||||||
#define APP_VERSION "1.7.0 (based on XMRig)"
|
#define APP_VERSION "1.8.0_beta1 (based on XMRig)"
|
||||||
#define APP_DOMAIN ""
|
#define APP_DOMAIN ""
|
||||||
#define APP_SITE "https://github.com/Bendr0id/xmrigCC"
|
#define APP_SITE "https://github.com/Bendr0id/xmrigCC"
|
||||||
#define APP_KIND "cpu"
|
#define APP_KIND "cpu"
|
||||||
|
|
||||||
#define APP_VER_MAJOR 1
|
#define APP_VER_MAJOR 1
|
||||||
#define APP_VER_MINOR 7
|
#define APP_VER_MINOR 8
|
||||||
#define APP_VER_BUILD 0
|
#define APP_VER_BUILD 0
|
||||||
#define APP_VER_REV 0
|
#define APP_VER_REV 0
|
||||||
|
|
||||||
|
|
|
@ -140,7 +140,7 @@ void MultiWorker::start()
|
||||||
*Job::nonce(m_state->blob + i * m_state->job.size()) = ++m_state->nonces[i];
|
*Job::nonce(m_state->blob + i * m_state->job.size()) = ++m_state->nonces[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
CryptoNight::hash(m_hashFactor, m_state->job.powVariant(), m_state->blob, m_state->job.size(), m_hash, scratchPads);
|
CryptoNight::hash(m_hashFactor, Options::i()->asmOptimization(), m_state->job.powVariant(), m_state->blob, m_state->job.size(), m_hash, scratchPads);
|
||||||
|
|
||||||
for (size_t i=0; i < m_hashFactor; ++i) {
|
for (size_t i=0; i < m_hashFactor; ++i) {
|
||||||
if (*reinterpret_cast<uint64_t *>(m_hash + 24 + i * 32) < m_state->job.target()) {
|
if (*reinterpret_cast<uint64_t *>(m_hash + 24 + i * 32) < m_state->job.target()) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue