Merge branch 'master' into develop
This commit is contained in:
commit
16caa6b76b
51 changed files with 6898 additions and 877 deletions
21
.travis.yml
Normal file
21
.travis.yml
Normal file
|
@ -0,0 +1,21 @@
|
|||
os: osx
|
||||
|
||||
osx_image: xcode8
|
||||
|
||||
sudo: false
|
||||
|
||||
language: cpp
|
||||
|
||||
compiler: gcc
|
||||
|
||||
before_install:
|
||||
brew update
|
||||
|
||||
script:
|
||||
- brew install gcc libuv libmicrohttpd
|
||||
- brew upgrade boost
|
||||
- cmake . -DUV_LIBRARY=/usr/local/lib/libuv.a -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DOPENSSL_SSL_LIBRARY=/usr/local/opt/openssl/lib/libssl.a -DOPENSSL_CRYPTO_LIBRARY=/usr/local/opt/openssl/lib/libcrypto.a -DBOOST_ROOT=/usr/local/lib
|
||||
- make
|
||||
- cp ./src/config.json .
|
||||
- ./xmrigDaemon --version
|
||||
- ./xmrigDaemon --help
|
|
@ -1,3 +1,8 @@
|
|||
# 1.8.0
|
||||
- Integration of cnv2 aka moneroV8
|
||||
- Hashrate improve -> all cnv1, cn-litev1, cnv2, xtl by adding ASM code for Intel/AMD and softaes [thx @SChernykh]
|
||||
- Fixes for OSX builds [thx @djfinch]
|
||||
- Fixed safe mode #173
|
||||
# 1.7.0
|
||||
- First official Release of XMRigCC-amd #33 #3
|
||||
- Full integration of xmrigCC-amd into XMRigCCServer/Dashboard with GPUInfo / remote logging
|
||||
|
|
|
@ -10,6 +10,7 @@ option(WITH_HTTPD "HTTP REST API" OFF)
|
|||
option(WITH_CC_CLIENT "CC Client" ON)
|
||||
option(WITH_CC_SERVER "CC Server" ON)
|
||||
option(WITH_TLS "TLS support" ON)
|
||||
option(WITH_ASM "ASM optimizations" ON)
|
||||
option(BUILD_STATIC "Build static binary" OFF)
|
||||
set(Boost_USE_STATIC_RUNTIME ON)
|
||||
set(Boost_USE_STATIC_LIBS ON)
|
||||
|
@ -128,7 +129,7 @@ find_package(UV REQUIRED)
|
|||
|
||||
if (WIN32)
|
||||
add_definitions(-DBOOST_ALL_NO_LIB)
|
||||
endif()
|
||||
endif(WIN32)
|
||||
|
||||
find_package(Boost 1.63.0 COMPONENTS system REQUIRED)
|
||||
|
||||
|
@ -144,10 +145,10 @@ if (WITH_TLS)
|
|||
set(SOURCES_SSL_TLS src/net/BoostTlsConnection.cpp)
|
||||
else()
|
||||
message(FATAL_ERROR "OpenSSL NOT found: use `-DWITH_TLS=OFF` to build without TLS support")
|
||||
endif()
|
||||
endif(OPENSSL_FOUND)
|
||||
else()
|
||||
add_definitions(/DXMRIG_NO_TLS)
|
||||
endif()
|
||||
endif(WITH_TLS)
|
||||
|
||||
if (WITH_LIBCPUID)
|
||||
add_subdirectory(src/3rdparty/libcpuid)
|
||||
|
@ -162,8 +163,8 @@ else()
|
|||
set(SOURCES_CPUID src/Cpu_arm.cpp)
|
||||
else()
|
||||
set(SOURCES_CPUID src/Cpu_stub.cpp)
|
||||
endif()
|
||||
endif()
|
||||
endif(XMRIG_ARM)
|
||||
endif(WITH_LIBCPUID)
|
||||
|
||||
CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
|
||||
if (HAVE_SYSLOG_H)
|
||||
|
@ -179,11 +180,11 @@ if (WITH_HTTPD)
|
|||
set(HTTPD_SOURCES src/api/Httpd.h src/api/Httpd.cpp)
|
||||
else()
|
||||
message(FATAL_ERROR "microhttpd NOT found: use `-DWITH_HTTPD=OFF` to build without http deamon support")
|
||||
endif()
|
||||
endif(MHD_FOUND)
|
||||
else()
|
||||
add_definitions(/DXMRIG_NO_HTTPD)
|
||||
add_definitions(/DXMRIG_NO_API)
|
||||
endif()
|
||||
endif(WITH_HTTPD)
|
||||
|
||||
if (WITH_CC_SERVER)
|
||||
find_package(MHD)
|
||||
|
@ -192,7 +193,7 @@ if (WITH_CC_SERVER)
|
|||
include_directories(${MHD_INCLUDE_DIRS})
|
||||
else()
|
||||
message(FATAL_ERROR "microhttpd NOT found: use `-DWITH_CC_SERVER=OFF` to build without CC Server support")
|
||||
endif()
|
||||
endif(MHD_FOUND)
|
||||
|
||||
set(SOURCES_CC_SERVER
|
||||
src/cc/CCServer.cpp
|
||||
|
@ -201,12 +202,12 @@ if (WITH_CC_SERVER)
|
|||
src/cc/Httpd.cpp
|
||||
src/cc/XMRigCC.cpp
|
||||
)
|
||||
endif()
|
||||
endif(WITH_CC_SERVER)
|
||||
|
||||
if (WITH_CC_CLIENT)
|
||||
set(SOURCES_CC_CLIENT
|
||||
src/cc/CCClient.cpp)
|
||||
endif()
|
||||
endif(WITH_CC_CLIENT)
|
||||
|
||||
if (WITH_CC_SERVER OR WITH_CC_CLIENT)
|
||||
set(SOURCES_CC_COMMON
|
||||
|
@ -215,11 +216,34 @@ if (WITH_CC_SERVER OR WITH_CC_CLIENT)
|
|||
src/cc/GPUInfo.cpp)
|
||||
else()
|
||||
add_definitions(/DXMRIG_NO_CC)
|
||||
endif()
|
||||
endif(WITH_CC_SERVER OR WITH_CC_CLIENT)
|
||||
|
||||
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
enable_language(ASM_MASM)
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop.asm")
|
||||
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
|
||||
else()
|
||||
enable_language(ASM)
|
||||
|
||||
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop_win_gcc.S")
|
||||
else()
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop.S")
|
||||
endif()
|
||||
|
||||
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
|
||||
endif()
|
||||
|
||||
add_library(xmrig_asm STATIC ${XMRIG_ASM_FILE})
|
||||
set_property(TARGET xmrig_asm PROPERTY LINKER_LANGUAGE C)
|
||||
else()
|
||||
add_definitions(/DXMRIG_NO_ASM)
|
||||
endif(WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
|
||||
if (BUILD_STATIC)
|
||||
set(CMAKE_EXE_LINKER_FLAGS " -static")
|
||||
endif()
|
||||
endif(BUILD_STATIC)
|
||||
|
||||
include_directories(src)
|
||||
include_directories(src/3rdparty)
|
||||
|
@ -246,12 +270,16 @@ target_link_libraries(xmrigMiner xmrig_common xmrig_os_dependencies xmrig_cpuid
|
|||
|
||||
if (WITH_CC_CLIENT)
|
||||
target_link_libraries(xmrigMiner xmrig_cc_common)
|
||||
endif (WITH_CC_CLIENT)
|
||||
endif(WITH_CC_CLIENT)
|
||||
|
||||
if (WITH_TLS)
|
||||
target_link_libraries(xmrigMiner xmrig_tls ${OPENSSL_LIBRARIES} ${EXTRA_LIBS})
|
||||
target_link_libraries(xmrigMiner xmrig_tls ${OPENSSL_LIBRARIES} ${EXTRA_LIBS})
|
||||
endif (WITH_TLS)
|
||||
endif(WITH_TLS)
|
||||
|
||||
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
target_link_libraries(xmrigMiner xmrig_asm)
|
||||
endif(WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
|
||||
add_executable(xmrigDaemon src/cc/XMRigd.cpp res/app.rc)
|
||||
set_target_properties(xmrigDaemon PROPERTIES OUTPUT_NAME ${DAEMON_EXECUTABLE_NAME})
|
||||
|
@ -269,6 +297,6 @@ if (WITH_CC_SERVER AND MHD_FOUND)
|
|||
|
||||
set_target_properties(xmrig_common_cc PROPERTIES COMPILE_FLAGS "-DXMRIG_CC_SERVER ${SHARED_FLAGS}")
|
||||
set_target_properties(xmrigCCServer PROPERTIES COMPILE_FLAGS "-DXMRIG_CC_SERVER ${SHARED_FLAGS}")
|
||||
endif()
|
||||
endif(WITH_CC_SERVER AND MHD_FOUND)
|
||||
|
||||
add_subdirectory(test EXCLUDE_FROM_ALL)
|
||||
|
|
15
README.md
15
README.md
|
@ -1,11 +1,15 @@
|
|||
# XMRigCC
|
||||
|
||||
:warning: **Confused by all the forks? Check the [Coin Configuration](https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations) guide.**
|
||||
:warning: **Monero is forking to cnv2 on block 1685555 (2018-10-18) make sure you upgrade to at least XMRigCC 1.8.0**
|
||||
|
||||
:bulb: **This is the CPU variant of XMRigCC, if you're looking for the AMD GPU (OpenCL) variant [click here](https://github.com/Bendr0id/xmrigCC-amd/).**
|
||||
|
||||
:warning: **Confused by all the forks? Check the [Coin Configuration](https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations) guide.**
|
||||
|
||||
|
||||
[](https://ci.appveyor.com/project/Bendr0id/xmrigcc)
|
||||
[](https://hub.docker.com/r/bendr0id/xmrigcc/)
|
||||
[](https://travis-ci.com/Bendr0id/xmrigCC)
|
||||
[](https://github.com/bendr0id/xmrigCC/releases)
|
||||
[](https://github.com/bendr0id/xmrigCC/releases)
|
||||
[](https://github.com/bendr0id/xmrigCC/releases)
|
||||
|
@ -26,16 +30,17 @@ Full Windows/Linux compatible, and you can mix Linux and Windows miner on one XM
|
|||
## Additional features of XMRigCC (on top of XMRig)
|
||||
|
||||
Check the [Coin Configuration](https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations) guide
|
||||
* **NEW: Support of Crytptonight-Heavy BitTube (TUBE) v4 variant (will fork on Block 110000)**
|
||||
* **NEW: Support of Crytptonight Arto (RTO) variant**
|
||||
* **NEW Support of Crytptonight v8 PoW changes aka CNV2 (XMR fork on Block 1685555)**
|
||||
* **Support of Crytptonight-Heavy BitTube (TUBE) v4 variant (fork on Block 110000)**
|
||||
* **Support of Crytptonight Arto (RTO) variant**
|
||||
* **Support of Crytptonight Masari (MSR) v7 variant (use variant "msr" to be ready for the fork, with autodetect)**
|
||||
* **Support of Crytptonight-Heavy Haven Protocol (XHV) v3 variant (use variant "xhv")**
|
||||
* **Support of Crytptonight Stellite (XTL) v4 variant**
|
||||
* **Support of Crytptonight Alloy (XAO) variant**
|
||||
* **Support of Crytptonight-Lite IPBC/TUBE variant**
|
||||
* **Support of Crytptonight-Heavy (Loki, Ryo, ...)**
|
||||
* **Support of Crytptonight v7 PoW changes**
|
||||
* **Support of Crytptonight-Lite v7 PoW changes**
|
||||
* **Support of Crytptonight v7 PoW changes aka CNV1**
|
||||
* **Support of Crytptonight-Lite v7 PoW changes aka CN-LiteV1**
|
||||
* Full SSL/TLS support for the whole communication: [Howto](https://github.com/Bendr0id/xmrigCC/wiki/tls)
|
||||
- XMRigCCServer Dashboard <-> Browser
|
||||
- XMRigCCServer <-> XMRigMiner
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# version format
|
||||
version: 1.7.{build}
|
||||
version: 1.8.{build}
|
||||
|
||||
# build only tags
|
||||
skip_non_tags: true
|
||||
|
@ -16,7 +16,7 @@ clone_folder: c:\xmrigCC
|
|||
|
||||
install:
|
||||
- mkdir c:\xmrigCC-deps
|
||||
- curl -sL https://github.com/Bendr0id/xmrigCC-deps/releases/download/v2/xmrigCC-deps.zip -o xmrigCC-deps.zip
|
||||
- curl -sL https://github.com/Bendr0id/xmrigCC-deps/releases/download/v3/xmrigCC-deps.zip -o xmrigCC-deps.zip
|
||||
- 7z x xmrigCC-deps.zip -o"c:\xmrigCC-deps" -y > nul
|
||||
|
||||
build_script:
|
||||
|
|
88
src/AsmOptimization.h
Normal file
88
src/AsmOptimization.h
Normal file
|
@ -0,0 +1,88 @@
|
|||
/* XMRigCC
|
||||
* Copyright 2018- BenDr0id <ben@graef.in>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __ASM_OPTIMIZATION_H__
|
||||
#define __ASM_OPTIMIZATION_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
enum AsmOptimization
|
||||
{
|
||||
ASM_AUTODETECT,
|
||||
ASM_INTEL,
|
||||
ASM_RYZEN,
|
||||
ASM_OFF
|
||||
};
|
||||
|
||||
inline std::string getAsmOptimizationName(AsmOptimization asmOptimization)
|
||||
{
|
||||
switch (asmOptimization)
|
||||
{
|
||||
case ASM_INTEL:
|
||||
return "INTEL";
|
||||
case ASM_RYZEN:
|
||||
return "RYZEN";
|
||||
case ASM_OFF:
|
||||
return "OFF";
|
||||
case ASM_AUTODETECT:
|
||||
default:
|
||||
return "-1";
|
||||
}
|
||||
}
|
||||
|
||||
inline AsmOptimization parseAsmOptimization(int optimization)
|
||||
{
|
||||
AsmOptimization asmOptimization = AsmOptimization::ASM_AUTODETECT;
|
||||
|
||||
switch (optimization) {
|
||||
case -1:
|
||||
asmOptimization = AsmOptimization::ASM_AUTODETECT;
|
||||
break;
|
||||
case 0:
|
||||
asmOptimization = AsmOptimization::ASM_OFF;
|
||||
break;
|
||||
case 1:
|
||||
asmOptimization = AsmOptimization::ASM_INTEL;
|
||||
break;
|
||||
case 2:
|
||||
asmOptimization = AsmOptimization::ASM_RYZEN;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return asmOptimization;
|
||||
}
|
||||
|
||||
inline AsmOptimization parseAsmOptimization(const std::string optimization)
|
||||
{
|
||||
AsmOptimization asmOptimization = AsmOptimization::ASM_AUTODETECT;
|
||||
|
||||
if (optimization == "0" || optimization == "none" || optimization == "off") {
|
||||
asmOptimization = AsmOptimization::ASM_OFF;
|
||||
} else if (optimization == "1" || optimization == "intel") {
|
||||
asmOptimization = AsmOptimization::ASM_INTEL;
|
||||
} else if (optimization == "2" || optimization == "ryzen") {
|
||||
asmOptimization = AsmOptimization::ASM_RYZEN;
|
||||
}
|
||||
|
||||
return asmOptimization;
|
||||
}
|
||||
|
||||
|
||||
#endif /* __ASM_OPTIMIZATION_H__ */
|
|
@ -48,6 +48,7 @@ CpuImpl::CpuImpl()
|
|||
, m_sockets(1)
|
||||
, m_totalCores(0)
|
||||
, m_totalThreads(0)
|
||||
, m_asmOptimization(AsmOptimization::ASM_OFF)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -86,7 +87,7 @@ void CpuImpl::optimizeParameters(size_t& threadsCount, size_t& hashFactor,
|
|||
if (threadsCount > maximumReasonableThreadCount) {
|
||||
threadsCount = maximumReasonableThreadCount;
|
||||
}
|
||||
if (hashFactor > maximumReasonableFactor / threadsCount) {
|
||||
if (threadsCount > 0 && hashFactor > maximumReasonableFactor / threadsCount) {
|
||||
hashFactor = std::min(maximumReasonableFactor / threadsCount, maximumReasonableHashFactor);
|
||||
hashFactor = std::max(hashFactor, static_cast<size_t>(1));
|
||||
}
|
||||
|
@ -106,6 +107,7 @@ void CpuImpl::optimizeParameters(size_t& threadsCount, size_t& hashFactor,
|
|||
}
|
||||
threadsCount = std::max(threadsCount, static_cast<size_t>(1));
|
||||
}
|
||||
|
||||
if (hashFactor == 0) {
|
||||
hashFactor = std::min(maximumReasonableHashFactor, maximumReasonableFactor / threadsCount);
|
||||
hashFactor = std::max(hashFactor, static_cast<size_t>(1));
|
||||
|
@ -215,3 +217,8 @@ int Cpu::getAssignedCpuId(size_t threadId, int64_t affinityMask)
|
|||
|
||||
return cpuId;
|
||||
}
|
||||
|
||||
AsmOptimization Cpu::asmOptimization()
|
||||
{
|
||||
return CpuImpl::instance().asmOptimization();
|
||||
}
|
||||
|
|
|
@ -54,6 +54,7 @@ public:
|
|||
static size_t threads();
|
||||
static size_t availableCache();
|
||||
static int getAssignedCpuId(size_t threadId, int64_t affinityMask);
|
||||
static AsmOptimization asmOptimization();
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@ public:
|
|||
size_t sockets() { return m_sockets; }
|
||||
size_t threads() { return m_totalThreads; }
|
||||
size_t availableCache();
|
||||
AsmOptimization asmOptimization() { return m_asmOptimization; }
|
||||
|
||||
private:
|
||||
void initCommon();
|
||||
|
@ -63,6 +64,7 @@ private:
|
|||
size_t m_sockets;
|
||||
size_t m_totalCores;
|
||||
size_t m_totalThreads;
|
||||
AsmOptimization m_asmOptimization;
|
||||
};
|
||||
|
||||
#endif /* __CPU_IMPL_H__ */
|
||||
|
|
|
@ -80,4 +80,15 @@ void CpuImpl::initCommon()
|
|||
if (data.flags[CPU_FEATURE_BMI2]) {
|
||||
m_flags |= Cpu::BMI2;
|
||||
}
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
if (data.vendor == VENDOR_AMD && data.ext_family >= 0x17) {
|
||||
m_asmOptimization = AsmOptimization::ASM_RYZEN;
|
||||
} else if (data.vendor == VENDOR_INTEL &&
|
||||
((data.ext_family >= 0x06 && data.ext_model > 0x2) ||
|
||||
(data.ext_family >= 0x06 && data.ext_model == 0x2 && data.model >= 0xA))) {
|
||||
m_asmOptimization = AsmOptimization::ASM_INTEL;
|
||||
}
|
||||
# endif
|
||||
|
||||
}
|
||||
|
|
|
@ -73,8 +73,9 @@ Options:\n"
|
|||
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
|
||||
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
|
||||
-R, --retry-pause=N time to pause between retries (default: 5)\n\
|
||||
--pow-variant=V specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), ipbc (tube), alloy, xtl (including autodetect for v5)\n\
|
||||
--pow-variant=V specificy the PoW variat to use: -> 'auto' (default), '0' (v0), '1' (v1, aka cnv7), '2' (v2, aka cnv8), 'ipbc' (tube), 'alloy', 'xtl' (including autodetect for v5)\n\
|
||||
for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations\n\
|
||||
--asm-optimization=V specificy the ASM optimization to use: -> 'auto' (default), 'intel', 'ryzen', 'off' \n\
|
||||
--multihash-factor=N number of hash blocks to process at a time (don't set or 0 enables automatic selection of optimal number of hash blocks)\n\
|
||||
--multihash-thread-mask=MASK limits multihash to given threads (mask), (default: all threads)\n\
|
||||
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
|
||||
|
@ -166,7 +167,7 @@ static struct option const options[] = {
|
|||
{ "userpass", 1, nullptr, 'O' },
|
||||
{ "version", 0, nullptr, 'V' },
|
||||
{ "use-tls", 0, nullptr, 1015 },
|
||||
{ "force-pow-version",1, nullptr, 1016 },
|
||||
{ "force-pow-version", 1, nullptr, 1016 },
|
||||
{ "pow-variant" ,1, nullptr, 1017 },
|
||||
{ "api-port", 1, nullptr, 4000 },
|
||||
{ "api-access-token", 1, nullptr, 4001 },
|
||||
|
@ -189,6 +190,7 @@ static struct option const options[] = {
|
|||
{ "daemonized", 0, nullptr, 4011 },
|
||||
{ "doublehash-thread-mask", 1, nullptr, 4013 },
|
||||
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
||||
{ "asm-optimization", 1, nullptr, 4020 },
|
||||
{ nullptr, 0, nullptr, 0 }
|
||||
};
|
||||
|
||||
|
@ -217,6 +219,7 @@ static struct option const config_options[] = {
|
|||
{ "pow-variant", 1, nullptr, 1017 },
|
||||
{ "doublehash-thread-mask", 1, nullptr, 4013 },
|
||||
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
||||
{ "asm-optimization", 1, nullptr, 4020 },
|
||||
{ nullptr, 0, nullptr, 0 }
|
||||
};
|
||||
|
||||
|
@ -282,6 +285,7 @@ constexpr static const char *pow_variant_names[] = {
|
|||
"auto",
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
"tube",
|
||||
"alloy",
|
||||
"xtl",
|
||||
|
@ -290,6 +294,13 @@ constexpr static const char *pow_variant_names[] = {
|
|||
"rto"
|
||||
};
|
||||
|
||||
constexpr static const char *asm_optimization_names[] = {
|
||||
"auto",
|
||||
"intel",
|
||||
"ryzen",
|
||||
"off"
|
||||
};
|
||||
|
||||
Options *Options::parse(int argc, char **argv)
|
||||
{
|
||||
auto options = new Options(argc, argv);
|
||||
|
@ -342,6 +353,7 @@ Options::Options(int argc, char **argv) :
|
|||
m_algoVariant(AV0_AUTO),
|
||||
m_aesni(AESNI_AUTO),
|
||||
m_powVariant(POW_AUTODETECT),
|
||||
m_asmOptimization(ASM_AUTODETECT),
|
||||
m_hashFactor(0),
|
||||
m_apiPort(0),
|
||||
m_donateLevel(kDonateLevel),
|
||||
|
@ -400,6 +412,10 @@ Options::Options(int argc, char **argv) :
|
|||
|
||||
optimizeAlgorithmConfiguration();
|
||||
|
||||
if (m_asmOptimization == AsmOptimization::ASM_AUTODETECT) {
|
||||
m_asmOptimization = Cpu::asmOptimization();
|
||||
}
|
||||
|
||||
for (Url *url : m_pools) {
|
||||
url->applyExceptions();
|
||||
}
|
||||
|
@ -588,6 +604,9 @@ bool Options::parseArg(int key, const char *arg)
|
|||
case 4019: /* --cc-upload-config-on-startup */
|
||||
return parseBoolean(key, true);
|
||||
|
||||
case 4020: /* --asm-optimization */
|
||||
return parseAsmOptimization(arg);
|
||||
|
||||
case 't': /* --threads */
|
||||
if (strncmp(arg, "all", 3) == 0) {
|
||||
m_threads = Cpu::threads();
|
||||
|
@ -1015,11 +1034,16 @@ bool Options::parsePowVariant(const char *powVariant)
|
|||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "monerov7") || !strcmp(powVariant, "aeonv7") || !strcmp(powVariant, "v7"))) {
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "cnv1") || !strcmp(powVariant, "monerov7") || !strcmp(powVariant, "aeonv7") || !strcmp(powVariant, "v7"))) {
|
||||
m_powVariant = POW_V1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "cnv2") || !strcmp(powVariant, "monerov8") || !strcmp(powVariant, "aeonv8") || !strcmp(powVariant, "v8"))) {
|
||||
m_powVariant = POW_V2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && !strcmp(powVariant, "stellite")) {
|
||||
m_powVariant = POW_XTL;
|
||||
break;
|
||||
|
@ -1049,6 +1073,30 @@ bool Options::parsePowVariant(const char *powVariant)
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Options::parseAsmOptimization(const char *asmOptimization)
|
||||
{
|
||||
for (size_t i = 0; i < ARRAY_SIZE(pow_variant_names); i++) {
|
||||
if (pow_variant_names[i] && !strcmp(asmOptimization, asm_optimization_names[i])) {
|
||||
m_asmOptimization = static_cast<AsmOptimization>(i);
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(asm_optimization_names) - 1 && (!strcmp(asmOptimization, "none") || !strcmp(asmOptimization, "0"))) {
|
||||
m_asmOptimization = ASM_OFF;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(asm_optimization_names) - 1) {
|
||||
showUsage(1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void Options::optimizeAlgorithmConfiguration()
|
||||
{
|
||||
// backwards compatibility for configs still setting algo variant (av)
|
||||
|
@ -1123,5 +1171,3 @@ bool Options::parseCCUrl(const char* url)
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
|
||||
#include "rapidjson/fwd.h"
|
||||
#include "PowVariant.h"
|
||||
#include "AsmOptimization.h"
|
||||
|
||||
class Url;
|
||||
struct option;
|
||||
|
@ -91,6 +92,7 @@ public:
|
|||
inline const std::vector<Url*> &pools() const { return m_pools; }
|
||||
inline Algo algo() const { return m_algo; }
|
||||
inline PowVariant powVariant() const { return m_powVariant; }
|
||||
inline AsmOptimization asmOptimization() const { return m_asmOptimization; }
|
||||
inline bool aesni() const { return m_aesni == AESNI_ON; }
|
||||
inline size_t hashFactor() const { return m_hashFactor; }
|
||||
inline int apiPort() const { return m_apiPort; }
|
||||
|
@ -136,6 +138,7 @@ private:
|
|||
|
||||
bool setAlgo(const char *algo);
|
||||
bool parsePowVariant(const char *powVariant);
|
||||
bool parseAsmOptimization(const char *arg);
|
||||
|
||||
void optimizeAlgorithmConfiguration();
|
||||
|
||||
|
@ -167,6 +170,7 @@ private:
|
|||
AlgoVariant m_algoVariant;
|
||||
AesNi m_aesni;
|
||||
PowVariant m_powVariant;
|
||||
AsmOptimization m_asmOptimization;
|
||||
size_t m_hashFactor;
|
||||
int m_apiPort;
|
||||
int m_donateLevel;
|
||||
|
|
|
@ -27,6 +27,7 @@ enum PowVariant
|
|||
POW_AUTODETECT,
|
||||
POW_V0,
|
||||
POW_V1,
|
||||
POW_V2,
|
||||
POW_TUBE,
|
||||
POW_ALLOY,
|
||||
POW_XTL,
|
||||
|
@ -44,6 +45,8 @@ inline std::string getPowVariantName(PowVariant powVariant)
|
|||
return "0";
|
||||
case POW_V1:
|
||||
return "1";
|
||||
case POW_V2:
|
||||
return "2";
|
||||
case POW_TUBE:
|
||||
return "tube";
|
||||
case POW_ALLOY:
|
||||
|
@ -88,6 +91,9 @@ inline PowVariant parseVariant(int variant)
|
|||
case 1:
|
||||
powVariant = PowVariant::POW_V1;
|
||||
break;
|
||||
case 2:
|
||||
powVariant = PowVariant::POW_V2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -104,6 +110,8 @@ inline PowVariant parseVariant(const std::string variant)
|
|||
powVariant = PowVariant::POW_V0;
|
||||
} else if (variant == "1") {
|
||||
powVariant = PowVariant::POW_V1;
|
||||
} else if (variant == "2") {
|
||||
powVariant = PowVariant::POW_V2;
|
||||
} else if (variant == "ipbc" || variant == "tube" || variant == "bittube") {
|
||||
powVariant = PowVariant::POW_TUBE;
|
||||
} else if (variant == "xao" || variant == "alloy") {
|
||||
|
|
|
@ -59,17 +59,21 @@ static void print_versions()
|
|||
static void print_cpu()
|
||||
{
|
||||
if (Options::i()->colors()) {
|
||||
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU: %s (%d) %sx64 %sAES-NI",
|
||||
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU: %s (%d) %sx64 %sAES-NI %sASM-%s",
|
||||
Cpu::brand(),
|
||||
Cpu::sockets(),
|
||||
Cpu::isX64() ? "\x1B[01;32m" : "\x1B[01;31m-",
|
||||
Cpu::hasAES() ? "\x1B[01;32m" : "\x1B[01;31m-");
|
||||
Cpu::hasAES() ? "\x1B[01;32m" : "\x1B[01;31m-",
|
||||
Options::i()->asmOptimization() != AsmOptimization::ASM_OFF ? "\x1B[01;32m" : "\x1B[01;31m",
|
||||
getAsmOptimizationName(Options::i()->asmOptimization()).c_str());
|
||||
# ifndef XMRIG_NO_LIBCPUID
|
||||
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
|
||||
# endif
|
||||
}
|
||||
else {
|
||||
Log::i()->text(" * CPU: %s (%d) %sx64 %sAES-NI", Cpu::brand(), Cpu::sockets(), Cpu::isX64() ? "" : "-", Cpu::hasAES() ? "" : "-");
|
||||
Log::i()->text(" * CPU: %s (%d) %sx64 %sAES-NI ASM-%s",
|
||||
Cpu::brand(), Cpu::sockets(), Cpu::isX64() ? "" : "-", Cpu::hasAES() ? "" : "-",
|
||||
getAsmOptimizationName(Options::i()->asmOptimization()).c_str());
|
||||
# ifndef XMRIG_NO_LIBCPUID
|
||||
Log::i()->text(" * CPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
|
||||
# endif
|
||||
|
|
|
@ -4,8 +4,9 @@
|
|||
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
||||
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
||||
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), tube (ipbc), alloy, xtl (including autodetect for v5), msr, xhv, rto
|
||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy, xtl (including autodetect for v5), msr, xhv, rto
|
||||
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
||||
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, off
|
||||
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
||||
"colors": true, // false to disable colored output
|
||||
"cpu-affinity": null, // set process affinity to CPU core(s), mask "0x3" for cores 0 and 1
|
||||
|
|
|
@ -34,32 +34,84 @@
|
|||
#include "crypto/CryptoNight_test.h"
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_aesni(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
if (powVersion == PowVariant::POW_V1) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_ALLOY) {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_XTL) {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_MSR) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_RTO) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
}else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_V2) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) || (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1)) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_ALLOY) {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_XTL) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_MSR) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_RTO) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
}else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_softaes(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
if (powVersion == PowVariant::POW_V1) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_V2) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_ALLOY) {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_XTL) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_MSR) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_RTO) {
|
||||
|
@ -70,10 +122,18 @@ static void cryptonight_softaes(PowVariant powVersion, const uint8_t* input, siz
|
|||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_lite_aesni(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
if (powVersion == PowVariant::POW_V1) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
} else {
|
||||
|
@ -83,9 +143,17 @@ static void cryptonight_lite_aesni(PowVariant powVersion, const uint8_t* input,
|
|||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_lite_softaes(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
if (powVersion == PowVariant::POW_V1) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
} else {
|
||||
|
@ -94,7 +162,7 @@ static void cryptonight_lite_softaes(PowVariant powVersion, const uint8_t* input
|
|||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_heavy_aesni(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
if (powVersion == PowVariant::POW_XHV) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
||||
|
@ -109,7 +177,7 @@ static void cryptonight_heavy_aesni(PowVariant powVersion, const uint8_t* input,
|
|||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_heavy_softaes(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_heavy_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
if (powVersion == PowVariant::POW_XHV) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
||||
}
|
||||
|
@ -121,7 +189,7 @@ static void cryptonight_heavy_softaes(PowVariant powVersion, const uint8_t* inpu
|
|||
}
|
||||
}
|
||||
|
||||
void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
|
||||
void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
|
||||
|
||||
template <size_t HASH_FACTOR>
|
||||
void setCryptoNightHashMethods(Options::Algo algo, bool aesni)
|
||||
|
@ -163,13 +231,22 @@ void setCryptoNightHashMethods<0>(Options::Algo algo, bool aesni)
|
|||
|
||||
bool CryptoNight::init(int algo, bool aesni)
|
||||
{
|
||||
for (int i = 0; i < 256; ++i)
|
||||
{
|
||||
const uint64_t index = (((i >> POW_DEFAULT_INDEX_SHIFT) & 6) | (i & 1)) << 1;
|
||||
const uint64_t index_xtl = (((i >> POW_XLT_V4_INDEX_SHIFT) & 6) | (i & 1)) << 1;
|
||||
|
||||
variant1_table[i] = i ^ ((0x75310 >> index) & 0x30);
|
||||
variant_xtl_table[i] = i ^ ((0x75310 >> index_xtl) & 0x30);
|
||||
}
|
||||
|
||||
setCryptoNightHashMethods<MAX_NUM_HASH_BLOCKS>(static_cast<Options::Algo>(algo), aesni);
|
||||
return selfTest(algo);
|
||||
}
|
||||
|
||||
void CryptoNight::hash(size_t factor, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
|
||||
void CryptoNight::hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
|
||||
{
|
||||
cryptonight_hash_ctx[factor-1](powVersion, input, size, output, scratchPad);
|
||||
cryptonight_hash_ctx[factor-1](asmOptimization, powVersion, input, size, output, scratchPad);
|
||||
}
|
||||
|
||||
bool CryptoNight::selfTest(int algo)
|
||||
|
@ -206,203 +283,231 @@ bool CryptoNight::selfTest(int algo)
|
|||
bool resultLite = true;
|
||||
bool resultHeavy = true;
|
||||
|
||||
AsmOptimization asmOptimization = Options::i()->asmOptimization();
|
||||
|
||||
if (algo == Options::ALGO_CRYPTONIGHT_HEAVY) {
|
||||
// cn-heavy
|
||||
|
||||
cryptonight_hash_ctx[0](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 96) == 0;
|
||||
#endif
|
||||
|
||||
// cn-heavy haven
|
||||
|
||||
cryptonight_hash_ctx[0](PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 96) == 0;
|
||||
#endif
|
||||
|
||||
// cn-heavy bittube
|
||||
|
||||
cryptonight_hash_ctx[0](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 96) == 0;
|
||||
#endif
|
||||
|
||||
} else if (algo == Options::ALGO_CRYPTONIGHT_LITE) {
|
||||
// cn-lite v0
|
||||
|
||||
cryptonight_hash_ctx[0](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 160) == 0;
|
||||
#endif
|
||||
|
||||
// cn-lite v7 tests
|
||||
|
||||
cryptonight_hash_ctx[0](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 160) == 0;
|
||||
#endif
|
||||
|
||||
|
||||
// cn-lite ibpc tests
|
||||
|
||||
cryptonight_hash_ctx[0](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 160) == 0;
|
||||
#endif
|
||||
|
||||
} else {
|
||||
// cn v0
|
||||
// cn v0 aka orignal
|
||||
|
||||
cryptonight_hash_ctx[0](PowVariant::POW_V0,test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0,test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 160) == 0;
|
||||
#endif
|
||||
|
||||
// cn v7
|
||||
// cn v7 aka cnv1
|
||||
|
||||
cryptonight_hash_ctx[0](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 160) == 0;
|
||||
#endif
|
||||
|
||||
// cn xtl
|
||||
// cn v7 + xtl
|
||||
|
||||
cryptonight_hash_ctx[0](PowVariant::POW_XTL,test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XTL,test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xtl, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xtl, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xtl, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xtl, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_XTL, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xtl, 160) == 0;
|
||||
#endif
|
||||
|
||||
// cn v8 aka cnv2
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 160) == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < MAX_NUM_HASH_BLOCKS; ++i) {
|
||||
|
|
|
@ -25,9 +25,10 @@
|
|||
#define __CRYPTONIGHT_H__
|
||||
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "AsmOptimization.h"
|
||||
#include "Options.h"
|
||||
|
||||
#define MEMORY 2097152 /* 2 MiB */
|
||||
|
@ -38,10 +39,18 @@
|
|||
#define POW_XLT_V4_INDEX_SHIFT 4
|
||||
|
||||
struct ScratchPad {
|
||||
alignas(16) uint8_t state[208]; // 208 instead of 200 to maintain aligned to 16 byte boundaries
|
||||
alignas(16) uint8_t state[224]; // 224 instead of 200 to maintain aligned to 16 byte boundaries
|
||||
alignas(16) uint8_t* memory;
|
||||
|
||||
// Additional stuff for asm impl
|
||||
uint8_t ctx_info[24];
|
||||
const void* input;
|
||||
uint8_t* variant_table;
|
||||
const uint32_t* t_fn;
|
||||
};
|
||||
|
||||
alignas(64) static uint8_t variant1_table[256];
|
||||
alignas(64) static uint8_t variant_xtl_table[256];
|
||||
|
||||
class Job;
|
||||
class JobResult;
|
||||
|
@ -50,8 +59,9 @@ class CryptoNight
|
|||
{
|
||||
public:
|
||||
static bool init(int algo, bool aesni);
|
||||
static void hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
|
||||
|
||||
static void hash(size_t factor, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
|
||||
public:
|
||||
|
||||
private:
|
||||
static bool selfTest(int algo);
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -84,6 +84,20 @@ const static uint8_t test_output_v1[160] = {
|
|||
};
|
||||
|
||||
|
||||
// CN V8
|
||||
const static uint8_t test_output_v2[160] = {
|
||||
0x97, 0x37, 0x82, 0x82, 0xcf, 0x10, 0xe7, 0xad, 0x03, 0x3f, 0x7b, 0x80, 0x74, 0xc4, 0x0e, 0x14,
|
||||
0xd0, 0x6e, 0x7f, 0x60, 0x9d, 0xdd, 0xda, 0x78, 0x76, 0x80, 0xb5, 0x8c, 0x05, 0xf4, 0x3d, 0x21,
|
||||
0x87, 0x1f, 0xcd, 0x68, 0x23, 0xf6, 0xa8, 0x79, 0xbb, 0x3f, 0x33, 0x95, 0x1c, 0x8e, 0x8e, 0x89,
|
||||
0x1d, 0x40, 0x43, 0x88, 0x0b, 0x02, 0xdf, 0xa1, 0xbb, 0x3b, 0xe4, 0x98, 0xb5, 0x0e, 0x75, 0x78,
|
||||
0xe6, 0x0d, 0x24, 0x0f, 0x65, 0x85, 0x60, 0x3a, 0x4a, 0xe5, 0x5f, 0x54, 0x9b, 0xc8, 0x79, 0x93,
|
||||
0xeb, 0x3d, 0x98, 0x2c, 0xfe, 0x9b, 0xfb, 0x15, 0xb6, 0x88, 0x21, 0x94, 0xb0, 0x05, 0x86, 0x5c,
|
||||
0x59, 0x8b, 0x93, 0x7a, 0xda, 0xd2, 0xa2, 0x14, 0xed, 0xb7, 0xc4, 0x5d, 0xa1, 0xef, 0x26, 0xf3,
|
||||
0xc7, 0x73, 0x29, 0x4d, 0xf1, 0xc8, 0x2c, 0xe0, 0xd0, 0xe9, 0xed, 0x0c, 0x70, 0x75, 0x05, 0x3e,
|
||||
0x5b, 0xf6, 0xa0, 0x6e, 0xea, 0xde, 0x87, 0x0b, 0x06, 0x29, 0x03, 0xbf, 0xb4, 0x85, 0x9d, 0x04,
|
||||
0x75, 0x1a, 0xcd, 0x1e, 0xd6, 0xaa, 0x1b, 0x05, 0x24, 0x6a, 0x2c, 0x80, 0x69, 0x68, 0xdc, 0x97
|
||||
};
|
||||
|
||||
// CN XTL
|
||||
const static uint8_t test_output_xtl[160] = {
|
||||
0x8F, 0xE5, 0xF0, 0x5F, 0x02, 0x2A, 0x61, 0x7D, 0xE5, 0x3F, 0x79, 0x36, 0x4B, 0x25, 0xCB, 0xC3,
|
||||
|
|
File diff suppressed because it is too large
Load diff
74
src/crypto/asm/cn_litev1_mainloop_sandybridge.inc
Normal file
74
src/crypto/asm/cn_litev1_mainloop_sandybridge.inc
Normal file
|
@ -0,0 +1,74 @@
|
|||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r14
|
||||
push r15
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov ebp, 262144
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
movq xmm3, rax
|
||||
mov rax, QWORD PTR [rcx+256]
|
||||
mov rdi, QWORD PTR [rcx+40]
|
||||
movq xmm0, rdx
|
||||
xor rdi, QWORD PTR [rcx+8]
|
||||
mov rdx, r8
|
||||
mov r15, QWORD PTR [rcx+264]
|
||||
and edx, 1048560
|
||||
mov r14, QWORD PTR [rax+35]
|
||||
xor r14, QWORD PTR [rcx+192]
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
cn_litev1_mainloop_sandybridge:
|
||||
movq xmm0, rdi
|
||||
movq xmm1, r8
|
||||
punpcklqdq xmm1, xmm0
|
||||
aesenc xmm2, xmm1
|
||||
movq r10, xmm2
|
||||
mov r9d, r10d
|
||||
and r9d, 1048560
|
||||
add r9, rsi
|
||||
movdqa xmm0, xmm2
|
||||
pxor xmm0, xmm3
|
||||
movdqa xmm3, xmm2
|
||||
movdqu XMMWORD PTR [rdx+rsi], xmm0
|
||||
psrldq xmm0, 11
|
||||
movq rax, xmm0
|
||||
movzx eax, al
|
||||
movzx eax, BYTE PTR [rax+r15]
|
||||
mov BYTE PTR [rsi+rdx+11], al
|
||||
mov rbx, QWORD PTR [r9]
|
||||
mov r11, QWORD PTR [r9+8]
|
||||
mov rax, rbx
|
||||
mul r10
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r9], r8
|
||||
add rdi, rax
|
||||
mov rax, r14
|
||||
xor rax, rdi
|
||||
mov QWORD PTR [r9+8], rax
|
||||
xor r8, rbx
|
||||
mov rdx, r8
|
||||
and edx, 1048560
|
||||
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||
xor rdi, r11
|
||||
dec ebp
|
||||
jne cn_litev1_mainloop_sandybridge
|
||||
|
||||
mov rbx, QWORD PTR [rsp+24]
|
||||
mov rbp, QWORD PTR [rsp+32]
|
||||
mov rsi, QWORD PTR [rsp+40]
|
||||
mov rdi, QWORD PTR [rsp+48]
|
||||
pop r15
|
||||
pop r14
|
166
src/crypto/asm/cn_litev1_mainloop_soft_aes_sandybridge.inc
Normal file
166
src/crypto/asm/cn_litev1_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,166 @@
|
|||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 72
|
||||
|
||||
movaps XMMWORD PTR [rsp], xmm6
|
||||
movaps XMMWORD PTR [rsp+16], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
movaps XMMWORD PTR [rsp+48], xmm9
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
movq xmm4, rax
|
||||
mov rax, QWORD PTR [rcx+256]
|
||||
mov r13, QWORD PTR [rcx+40]
|
||||
movq xmm0, rdx
|
||||
xor r13, QWORD PTR [rcx+8]
|
||||
mov rdx, r8
|
||||
mov rdi, QWORD PTR [rcx+224]
|
||||
and edx, 1048560
|
||||
mov rax, QWORD PTR [rax+35]
|
||||
xor rax, QWORD PTR [rcx+192]
|
||||
movq xmm5, rax
|
||||
movq xmm8, rdi
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov QWORD PTR [rsp+64], rdx
|
||||
|
||||
movq xmm6, rcx
|
||||
mov rax, QWORD PTR [rcx+264]
|
||||
movq xmm7, rax
|
||||
|
||||
mov eax, 262144
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
cn_litev1_mainloop_soft_aes_sandybridge:
|
||||
movq xmm9, rax
|
||||
mov r12, QWORD PTR [rcx+272]
|
||||
mov esi, DWORD PTR [rdx+rdi]
|
||||
mov r10d, DWORD PTR [rdx+rdi+4]
|
||||
mov ebp, DWORD PTR [rdx+rdi+12]
|
||||
mov r14d, DWORD PTR [rdx+rdi+8]
|
||||
mov rdx, QWORD PTR [rsp+64]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
add ebp, 256
|
||||
movd xmm1, r11d
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movq rdi, xmm8
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
punpckldq xmm2, xmm1
|
||||
movq xmm1, r8
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
xor eax, r15d
|
||||
movd xmm3, eax
|
||||
movq rax, xmm7
|
||||
punpckldq xmm3, xmm0
|
||||
movq xmm0, r13
|
||||
punpcklqdq xmm1, xmm0
|
||||
punpckldq xmm3, xmm2
|
||||
pxor xmm3, xmm1
|
||||
movq r9, xmm3
|
||||
mov r10d, r9d
|
||||
and r10d, 1048560
|
||||
movdqa xmm0, xmm3
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx+rdi], xmm0
|
||||
psrldq xmm0, 11
|
||||
movq rcx, xmm0
|
||||
movzx ecx, cl
|
||||
mov cl, BYTE PTR [rcx+rax]
|
||||
mov BYTE PTR [rdi+rdx+11], cl
|
||||
mov rbx, QWORD PTR [r10+rdi]
|
||||
mov rcx, r9
|
||||
lea r9, QWORD PTR [r10+rdi]
|
||||
mov r11, QWORD PTR [r9+8]
|
||||
mov rax, rbx
|
||||
movdqa xmm4, xmm3
|
||||
mul rcx
|
||||
movq rcx, xmm6
|
||||
add r8, rdx
|
||||
add r13, rax
|
||||
movq rax, xmm5
|
||||
xor rax, r13
|
||||
mov QWORD PTR [r9], r8
|
||||
xor r8, rbx
|
||||
mov QWORD PTR [r9+8], rax
|
||||
movq rax, xmm9
|
||||
mov rdx, r8
|
||||
xor r13, r11
|
||||
and edx, 1048560
|
||||
mov QWORD PTR [rsp+64], rdx
|
||||
sub eax, 1
|
||||
jne cn_litev1_mainloop_soft_aes_sandybridge
|
||||
|
||||
movaps xmm6, XMMWORD PTR [rsp]
|
||||
movaps xmm7, XMMWORD PTR [rsp+16]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
movaps xmm9, XMMWORD PTR [rsp+48]
|
||||
|
||||
add rsp, 72
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
115
src/crypto/asm/cn_main_loop.S
Normal file
115
src/crypto/asm/cn_main_loop.S
Normal file
|
@ -0,0 +1,115 @@
|
|||
#define ALIGN .align
|
||||
.intel_syntax noprefix
|
||||
#ifdef __APPLE__
|
||||
# define FN_PREFIX(fn) _ ## fn
|
||||
.text
|
||||
#else
|
||||
# define FN_PREFIX(fn) fn
|
||||
.section .text
|
||||
#endif
|
||||
.global FN_PREFIX(cnv1_mainloop_sandybridge_asm)
|
||||
.global FN_PREFIX(cn_litev1_mainloop_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm)
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv1_mainloop_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv1_mainloop_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cn_litev1_mainloop_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn_litev1_mainloop_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_ivybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_ryzen.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
mov rdx, rsi
|
||||
#include "cnv2_double_main_loop_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv1_mainloop_soft_aes_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn_litev1_mainloop_soft_aes_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_mainloop_soft_aes_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
74
src/crypto/asm/cnv1_mainloop_sandybridge.inc
Normal file
74
src/crypto/asm/cnv1_mainloop_sandybridge.inc
Normal file
|
@ -0,0 +1,74 @@
|
|||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r14
|
||||
push r15
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov ebp, 524288
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
movq xmm3, rax
|
||||
mov rax, QWORD PTR [rcx+256]
|
||||
mov rdi, QWORD PTR [rcx+40]
|
||||
movq xmm0, rdx
|
||||
xor rdi, QWORD PTR [rcx+8]
|
||||
mov rdx, r8
|
||||
mov r15, QWORD PTR [rcx+264]
|
||||
and edx, 2097136
|
||||
mov r14, QWORD PTR [rax+35]
|
||||
xor r14, QWORD PTR [rcx+192]
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
cnv1_mainloop_sandybridge:
|
||||
movq xmm0, rdi
|
||||
movq xmm1, r8
|
||||
punpcklqdq xmm1, xmm0
|
||||
aesenc xmm2, xmm1
|
||||
movq r10, xmm2
|
||||
mov r9d, r10d
|
||||
and r9d, 2097136
|
||||
add r9, rsi
|
||||
movdqa xmm0, xmm2
|
||||
pxor xmm0, xmm3
|
||||
movdqa xmm3, xmm2
|
||||
movdqu XMMWORD PTR [rdx+rsi], xmm0
|
||||
psrldq xmm0, 11
|
||||
movq rax, xmm0
|
||||
movzx eax, al
|
||||
movzx eax, BYTE PTR [rax+r15]
|
||||
mov BYTE PTR [rsi+rdx+11], al
|
||||
mov rbx, QWORD PTR [r9]
|
||||
mov r11, QWORD PTR [r9+8]
|
||||
mov rax, rbx
|
||||
mul r10
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r9], r8
|
||||
add rdi, rax
|
||||
mov rax, r14
|
||||
xor rax, rdi
|
||||
mov QWORD PTR [r9+8], rax
|
||||
xor r8, rbx
|
||||
mov rdx, r8
|
||||
and edx, 2097136
|
||||
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||
xor rdi, r11
|
||||
dec ebp
|
||||
jne cnv1_mainloop_sandybridge
|
||||
|
||||
mov rbx, QWORD PTR [rsp+24]
|
||||
mov rbp, QWORD PTR [rsp+32]
|
||||
mov rsi, QWORD PTR [rsp+40]
|
||||
mov rdi, QWORD PTR [rsp+48]
|
||||
pop r15
|
||||
pop r14
|
166
src/crypto/asm/cnv1_mainloop_soft_aes_sandybridge.inc
Normal file
166
src/crypto/asm/cnv1_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,166 @@
|
|||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 72
|
||||
|
||||
movaps XMMWORD PTR [rsp], xmm6
|
||||
movaps XMMWORD PTR [rsp+16], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
movaps XMMWORD PTR [rsp+48], xmm9
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
movq xmm4, rax
|
||||
mov rax, QWORD PTR [rcx+256]
|
||||
mov r13, QWORD PTR [rcx+40]
|
||||
movq xmm0, rdx
|
||||
xor r13, QWORD PTR [rcx+8]
|
||||
mov rdx, r8
|
||||
mov rdi, QWORD PTR [rcx+224]
|
||||
and edx, 2097136
|
||||
mov rax, QWORD PTR [rax+35]
|
||||
xor rax, QWORD PTR [rcx+192]
|
||||
movq xmm5, rax
|
||||
movq xmm8, rdi
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov QWORD PTR [rsp+64], rdx
|
||||
|
||||
movq xmm6, rcx
|
||||
mov rax, QWORD PTR [rcx+264]
|
||||
movq xmm7, rax
|
||||
|
||||
mov eax, 524288
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
cnv1_mainloop_soft_aes_sandybridge:
|
||||
movq xmm9, rax
|
||||
mov r12, QWORD PTR [rcx+272]
|
||||
mov esi, DWORD PTR [rdx+rdi]
|
||||
mov r10d, DWORD PTR [rdx+rdi+4]
|
||||
mov ebp, DWORD PTR [rdx+rdi+12]
|
||||
mov r14d, DWORD PTR [rdx+rdi+8]
|
||||
mov rdx, QWORD PTR [rsp+64]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
add ebp, 256
|
||||
movd xmm1, r11d
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movq rdi, xmm8
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
punpckldq xmm2, xmm1
|
||||
movq xmm1, r8
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
xor eax, r15d
|
||||
movd xmm3, eax
|
||||
movq rax, xmm7
|
||||
punpckldq xmm3, xmm0
|
||||
movq xmm0, r13
|
||||
punpcklqdq xmm1, xmm0
|
||||
punpckldq xmm3, xmm2
|
||||
pxor xmm3, xmm1
|
||||
movq r9, xmm3
|
||||
mov r10d, r9d
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm3
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx+rdi], xmm0
|
||||
psrldq xmm0, 11
|
||||
movq rcx, xmm0
|
||||
movzx ecx, cl
|
||||
mov cl, BYTE PTR [rcx+rax]
|
||||
mov BYTE PTR [rdi+rdx+11], cl
|
||||
mov rbx, QWORD PTR [r10+rdi]
|
||||
mov rcx, r9
|
||||
lea r9, QWORD PTR [r10+rdi]
|
||||
mov r11, QWORD PTR [r9+8]
|
||||
mov rax, rbx
|
||||
movdqa xmm4, xmm3
|
||||
mul rcx
|
||||
movq rcx, xmm6
|
||||
add r8, rdx
|
||||
add r13, rax
|
||||
movq rax, xmm5
|
||||
xor rax, r13
|
||||
mov QWORD PTR [r9], r8
|
||||
xor r8, rbx
|
||||
mov QWORD PTR [r9+8], rax
|
||||
movq rax, xmm9
|
||||
mov rdx, r8
|
||||
xor r13, r11
|
||||
and edx, 2097136
|
||||
mov QWORD PTR [rsp+64], rdx
|
||||
sub eax, 1
|
||||
jne cnv1_mainloop_soft_aes_sandybridge
|
||||
|
||||
movaps xmm6, XMMWORD PTR [rsp]
|
||||
movaps xmm7, XMMWORD PTR [rsp+16]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
movaps xmm9, XMMWORD PTR [rsp+48]
|
||||
|
||||
add rsp, 72
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
414
src/crypto/asm/cnv2_double_main_loop_sandybridge.inc
Normal file
414
src/crypto/asm/cnv2_double_main_loop_sandybridge.inc
Normal file
|
@ -0,0 +1,414 @@
|
|||
mov rax, rsp
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 184
|
||||
|
||||
stmxcsr DWORD PTR [rsp+272]
|
||||
mov DWORD PTR [rsp+276], 24448
|
||||
ldmxcsr DWORD PTR [rsp+276]
|
||||
|
||||
mov r13, QWORD PTR [rcx+224]
|
||||
mov r9, rdx
|
||||
mov r10, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r10, QWORD PTR [rcx]
|
||||
mov r14d, 524288
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rsi, QWORD PTR [rdx+224]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
mov rbp, QWORD PTR [r9+40]
|
||||
xor rbp, QWORD PTR [r9+8]
|
||||
movq xmm0, rdx
|
||||
movaps XMMWORD PTR [rax-88], xmm6
|
||||
movaps XMMWORD PTR [rax-104], xmm7
|
||||
movaps XMMWORD PTR [rax-120], xmm8
|
||||
movaps XMMWORD PTR [rsp+112], xmm9
|
||||
movaps XMMWORD PTR [rsp+96], xmm10
|
||||
movaps XMMWORD PTR [rsp+80], xmm11
|
||||
movaps XMMWORD PTR [rsp+64], xmm12
|
||||
movaps XMMWORD PTR [rsp+48], xmm13
|
||||
movaps XMMWORD PTR [rsp+32], xmm14
|
||||
movaps XMMWORD PTR [rsp+16], xmm15
|
||||
mov rdx, r10
|
||||
movq xmm4, QWORD PTR [r8+96]
|
||||
and edx, 2097136
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xorps xmm13, xmm13
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
movq xmm5, QWORD PTR [r8+104]
|
||||
movq xmm7, rax
|
||||
|
||||
mov eax, 1
|
||||
shl rax, 52
|
||||
movq xmm14, rax
|
||||
punpcklqdq xmm14, xmm14
|
||||
|
||||
mov eax, 1023
|
||||
shl rax, 52
|
||||
movq xmm12, rax
|
||||
punpcklqdq xmm12, xmm12
|
||||
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
punpcklqdq xmm7, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+56]
|
||||
xor rcx, QWORD PTR [r9+24]
|
||||
movq xmm3, rax
|
||||
mov rax, QWORD PTR [r9+48]
|
||||
xor rax, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp], r13
|
||||
mov rcx, QWORD PTR [r9+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm6, rax
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp+256], r10
|
||||
mov rcx, rdi
|
||||
mov QWORD PTR [rsp+264], r11
|
||||
movq xmm8, rax
|
||||
and ecx, 2097136
|
||||
punpcklqdq xmm8, xmm0
|
||||
movq xmm0, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, QWORD PTR [r9+104]
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
movdqu xmm11, XMMWORD PTR [r8]
|
||||
punpcklqdq xmm5, xmm0
|
||||
lea r9, QWORD PTR [rdx+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r9]
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
main_loop_double_sandybridge:
|
||||
movdqu xmm9, xmm15
|
||||
mov eax, edx
|
||||
mov ebx, edx
|
||||
xor eax, 16
|
||||
xor ebx, 32
|
||||
xor edx, 48
|
||||
|
||||
movq xmm0, r11
|
||||
movq xmm2, r10
|
||||
punpcklqdq xmm2, xmm0
|
||||
aesenc xmm9, xmm2
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||
|
||||
movq r11, xmm9
|
||||
mov edx, r11d
|
||||
and edx, 2097136
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9], xmm0
|
||||
|
||||
lea rbx, QWORD PTR [rdx+r13]
|
||||
mov r10, QWORD PTR [rdx+r13]
|
||||
|
||||
movdqu xmm10, xmm11
|
||||
movq xmm0, rbp
|
||||
movq xmm11, rdi
|
||||
punpcklqdq xmm11, xmm0
|
||||
aesenc xmm10, xmm11
|
||||
|
||||
mov eax, ecx
|
||||
mov r12d, ecx
|
||||
xor eax, 16
|
||||
xor r12d, 32
|
||||
xor ecx, 48
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||
paddq xmm0, xmm6
|
||||
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||
paddq xmm1, xmm11
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
|
||||
movq rcx, xmm10
|
||||
and ecx, 2097136
|
||||
|
||||
movdqa xmm0, xmm10
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [r8], xmm0
|
||||
mov r12, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov r9, QWORD PTR [rbx+8]
|
||||
|
||||
xor edx, 16
|
||||
mov r8d, edx
|
||||
mov r15d, edx
|
||||
|
||||
movq rdx, xmm5
|
||||
shl rdx, 32
|
||||
movq rax, xmm4
|
||||
xor rdx, rax
|
||||
xor r10, rdx
|
||||
mov rax, r10
|
||||
mul r11
|
||||
mov r11d, r8d
|
||||
xor r11d, 48
|
||||
movq xmm0, rdx
|
||||
xor rdx, [r11+r13]
|
||||
movq xmm1, rax
|
||||
xor rax, [r11+r13+8]
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||
xor r8d, 32
|
||||
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [r15+r13], xmm0
|
||||
|
||||
mov r11, QWORD PTR [rsp+256]
|
||||
add r11, rdx
|
||||
mov rdx, QWORD PTR [rsp+264]
|
||||
add rdx, rax
|
||||
mov QWORD PTR [rbx], r11
|
||||
xor r11, r10
|
||||
mov QWORD PTR [rbx+8], rdx
|
||||
xor rdx, r9
|
||||
mov QWORD PTR [rsp+256], r11
|
||||
and r11d, 2097136
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov QWORD PTR [rsp+8], r11
|
||||
lea r15, QWORD PTR [r11+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||
lea r13, QWORD PTR [rsi+rcx]
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movaps xmm2, xmm13
|
||||
movq r10, xmm0
|
||||
psllq xmm5, 1
|
||||
shl r10, 32
|
||||
movdqa xmm0, xmm9
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm1, xmm10
|
||||
movq r11, xmm0
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
psrldq xmm4, 8
|
||||
movaps xmm0, xmm13
|
||||
movq rax, xmm4
|
||||
xor r10, rax
|
||||
movaps xmm1, xmm13
|
||||
xor r10, r12
|
||||
lea rax, QWORD PTR [r11+1]
|
||||
shr rax, 1
|
||||
movdqa xmm3, xmm9
|
||||
punpcklqdq xmm3, xmm10
|
||||
paddq xmm5, xmm3
|
||||
movq rdx, xmm5
|
||||
psrldq xmm5, 8
|
||||
cvtsi2sd xmm2, rax
|
||||
or edx, -2147483647
|
||||
lea rax, QWORD PTR [r8+1]
|
||||
shr rax, 1
|
||||
movq r9, xmm5
|
||||
cvtsi2sd xmm0, rax
|
||||
or r9d, -2147483647
|
||||
cvtsi2sd xmm1, rdx
|
||||
unpcklpd xmm2, xmm0
|
||||
movaps xmm0, xmm13
|
||||
cvtsi2sd xmm0, r9
|
||||
unpcklpd xmm1, xmm0
|
||||
divpd xmm2, xmm1
|
||||
paddq xmm2, xmm14
|
||||
cvttsd2si rax, xmm2
|
||||
psrldq xmm2, 8
|
||||
mov rbx, rax
|
||||
imul rax, rdx
|
||||
sub r11, rax
|
||||
js div_fix_1_sandybridge
|
||||
div_fix_1_ret_sandybridge:
|
||||
|
||||
cvttsd2si rdx, xmm2
|
||||
mov rax, rdx
|
||||
imul rax, r9
|
||||
movd xmm2, r11d
|
||||
movd xmm4, ebx
|
||||
sub r8, rax
|
||||
js div_fix_2_sandybridge
|
||||
div_fix_2_ret_sandybridge:
|
||||
|
||||
movd xmm1, r8d
|
||||
movd xmm0, edx
|
||||
punpckldq xmm2, xmm1
|
||||
punpckldq xmm4, xmm0
|
||||
punpckldq xmm4, xmm2
|
||||
paddq xmm3, xmm4
|
||||
movdqa xmm0, xmm3
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm12
|
||||
sqrtpd xmm1, xmm0
|
||||
movq r9, xmm1
|
||||
movdqa xmm5, xmm1
|
||||
psrlq xmm5, 19
|
||||
test r9, 524287
|
||||
je sqrt_fix_1_sandybridge
|
||||
sqrt_fix_1_ret_sandybridge:
|
||||
|
||||
movq r9, xmm10
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
test r8, 524287
|
||||
je sqrt_fix_2_sandybridge
|
||||
sqrt_fix_2_ret_sandybridge:
|
||||
|
||||
mov r12d, ecx
|
||||
mov r8d, ecx
|
||||
xor r12d, 16
|
||||
xor r8d, 32
|
||||
xor ecx, 48
|
||||
mov rax, r10
|
||||
mul r9
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
punpcklqdq xmm3, xmm0
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||
pxor xmm0, xmm3
|
||||
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||
xor rdx, [r8+rsi]
|
||||
xor rax, [r8+rsi+8]
|
||||
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||
paddq xmm0, xmm6
|
||||
paddq xmm1, xmm11
|
||||
paddq xmm3, xmm8
|
||||
movdqu XMMWORD PTR [r8+rsi], xmm0
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm3
|
||||
|
||||
add rdi, rdx
|
||||
mov QWORD PTR [r13], rdi
|
||||
xor rdi, r10
|
||||
mov ecx, edi
|
||||
and ecx, 2097136
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov rdx, QWORD PTR [r13+8]
|
||||
add rbp, rax
|
||||
mov QWORD PTR [r13+8], rbp
|
||||
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||
xor rbp, rdx
|
||||
mov r13, QWORD PTR [rsp]
|
||||
movdqa xmm3, xmm7
|
||||
mov rdx, QWORD PTR [rsp+8]
|
||||
movdqa xmm8, xmm6
|
||||
mov r10, QWORD PTR [rsp+256]
|
||||
movdqa xmm7, xmm9
|
||||
mov r11, QWORD PTR [rsp+264]
|
||||
movdqa xmm6, xmm10
|
||||
mov r9, r15
|
||||
dec r14d
|
||||
jne main_loop_double_sandybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+272]
|
||||
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+184]
|
||||
movaps xmm6, XMMWORD PTR [r11-24]
|
||||
movaps xmm7, XMMWORD PTR [r11-40]
|
||||
movaps xmm8, XMMWORD PTR [r11-56]
|
||||
movaps xmm9, XMMWORD PTR [r11-72]
|
||||
movaps xmm10, XMMWORD PTR [r11-88]
|
||||
movaps xmm11, XMMWORD PTR [r11-104]
|
||||
movaps xmm12, XMMWORD PTR [r11-120]
|
||||
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp cnv2_double_mainloop_asm_sandybridge_endp
|
||||
|
||||
div_fix_1_sandybridge:
|
||||
dec rbx
|
||||
add r11, rdx
|
||||
jmp div_fix_1_ret_sandybridge
|
||||
|
||||
div_fix_2_sandybridge:
|
||||
dec rdx
|
||||
add r8, r9
|
||||
jmp div_fix_2_ret_sandybridge
|
||||
|
||||
sqrt_fix_1_sandybridge:
|
||||
movq r8, xmm3
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
dec r9
|
||||
mov r11d, -1022
|
||||
shl r11, 32
|
||||
mov rax, r9
|
||||
shr r9, 19
|
||||
shr rax, 20
|
||||
mov rdx, r9
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+r11+1]
|
||||
add rax, r11
|
||||
imul rdx, rax
|
||||
sub rdx, r8
|
||||
adc r9, 0
|
||||
movq xmm5, r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp sqrt_fix_1_ret_sandybridge
|
||||
|
||||
sqrt_fix_2_sandybridge:
|
||||
psrldq xmm3, 8
|
||||
movq r11, xmm3
|
||||
dec r8
|
||||
mov ebx, -1022
|
||||
shl rbx, 32
|
||||
mov rax, r8
|
||||
shr r8, 19
|
||||
shr rax, 20
|
||||
mov rdx, r8
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+rbx+1]
|
||||
add rax, rbx
|
||||
imul rdx, rax
|
||||
sub rdx, r11
|
||||
adc r8, 0
|
||||
movq xmm0, r8
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp sqrt_fix_2_ret_sandybridge
|
||||
|
||||
cnv2_double_mainloop_asm_sandybridge_endp:
|
186
src/crypto/asm/cnv2_main_loop_ivybridge.inc
Normal file
186
src/crypto/asm/cnv2_main_loop_ivybridge.inc
Normal file
|
@ -0,0 +1,186 @@
|
|||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov esi, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
mov r13d, -2147483647
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm4, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm3, QWORD PTR [r9+104]
|
||||
movaps XMMWORD PTR [rsp+64], xmm6
|
||||
movaps XMMWORD PTR [rsp+48], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
and r10d, 2097136
|
||||
movq xmm5, rax
|
||||
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm8, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm5, xmm0
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
$main_loop_ivybridge:
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
mov rdi, r15
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
movq xmm0, r11
|
||||
movq xmm7, r8
|
||||
punpcklqdq xmm7, xmm0
|
||||
aesenc xmm6, xmm7
|
||||
movq rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm1, xmm7
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
mov r10, r9
|
||||
xor r10d, 32
|
||||
movq rcx, xmm3
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
xor rdi, rax
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
xor rdi, QWORD PTR [r9+rbx]
|
||||
lea r14, QWORD PTR [r9+rbx]
|
||||
mov r12, QWORD PTR [r14+8]
|
||||
xor edx, edx
|
||||
lea r9d, DWORD PTR [ecx+ecx]
|
||||
add r9d, ebp
|
||||
movdqa xmm0, xmm6
|
||||
psrldq xmm0, 8
|
||||
or r9d, r13d
|
||||
movq rax, xmm0
|
||||
div r9
|
||||
xorps xmm3, xmm3
|
||||
mov eax, eax
|
||||
shl rdx, 32
|
||||
add rdx, rax
|
||||
lea r9, QWORD PTR [rdx+rbp]
|
||||
mov r15, rdx
|
||||
mov rax, r9
|
||||
shr rax, 12
|
||||
movq xmm0, rax
|
||||
paddq xmm0, xmm8
|
||||
sqrtsd xmm3, xmm0
|
||||
movq rdx, xmm3
|
||||
test edx, 524287
|
||||
je $sqrt_fixup_ivybridge
|
||||
psrlq xmm3, 19
|
||||
$sqrt_fixup_ivybridge_ret:
|
||||
|
||||
mov ecx, r10d
|
||||
mov rax, rdi
|
||||
mul rbp
|
||||
movq xmm2, rdx
|
||||
xor rdx, [rcx+rbx]
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
movq xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
punpcklqdq xmm2, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
xor r9d, 48
|
||||
xor r10d, 16
|
||||
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm0, xmm5
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||
paddq xmm2, xmm4
|
||||
paddq xmm1, xmm7
|
||||
movdqa xmm5, xmm4
|
||||
movdqu XMMWORD PTR [r9+rbx], xmm0
|
||||
movdqa xmm4, xmm6
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
movdqu xmm6, [rdi+rbx]
|
||||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne $main_loop_ivybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
mov rbx, QWORD PTR [rsp+160]
|
||||
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
add rsp, 80
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
jmp $cnv2_main_loop_ivybridge_endp
|
||||
|
||||
$sqrt_fixup_ivybridge:
|
||||
dec rdx
|
||||
mov r13d, -1022
|
||||
shl r13, 32
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
add rax, r13
|
||||
not r13
|
||||
sub rcx, r13
|
||||
mov r13d, -2147483647
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movq xmm3, rdx
|
||||
jmp $sqrt_fixup_ivybridge_ret
|
||||
|
||||
$cnv2_main_loop_ivybridge_endp:
|
183
src/crypto/asm/cnv2_main_loop_ryzen.inc
Normal file
183
src/crypto/asm/cnv2_main_loop_ryzen.inc
Normal file
|
@ -0,0 +1,183 @@
|
|||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 64
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov ebp, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm3, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
mov rdi, QWORD PTR [r9+104]
|
||||
and r10d, 2097136
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm4, rax
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
xorps xmm8, xmm8
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm7, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm4, xmm0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
$main_loop_ryzen:
|
||||
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||
movq xmm0, r11
|
||||
movq xmm6, r8
|
||||
punpcklqdq xmm6, xmm0
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
lea r9, QWORD PTR [rdi+rdi]
|
||||
shl rdi, 32
|
||||
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
aesenc xmm5, xmm6
|
||||
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
paddq xmm0, xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movaps xmm1, xmm8
|
||||
mov rsi, r15
|
||||
xor rsi, rdi
|
||||
movq r14, xmm5
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm3
|
||||
mov r10, r14
|
||||
and r10d, 2097136
|
||||
movdqa XMMWORD PTR [rdx], xmm0
|
||||
xor rsi, QWORD PTR [r10+rbx]
|
||||
lea r12, QWORD PTR [r10+rbx]
|
||||
mov r13, QWORD PTR [r10+rbx+8]
|
||||
|
||||
add r9d, r14d
|
||||
or r9d, -2147483647
|
||||
xor edx, edx
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movq rax, xmm0
|
||||
|
||||
div r9
|
||||
movq xmm0, rax
|
||||
movq xmm1, rdx
|
||||
punpckldq xmm0, xmm1
|
||||
movq r15, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqa xmm2, xmm0
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm7
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdi, xmm1
|
||||
test rdi, 524287
|
||||
je $sqrt_fixup_ryzen
|
||||
shr rdi, 19
|
||||
|
||||
$sqrt_fixup_ryzen_ret:
|
||||
mov rax, rsi
|
||||
mul r14
|
||||
movq xmm1, rax
|
||||
movq xmm0, rdx
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
mov r9d, r10d
|
||||
mov ecx, r10d
|
||||
xor r9d, 16
|
||||
xor ecx, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||
xor rdx, [rcx+rbx]
|
||||
xor rax, [rcx+rbx+8]
|
||||
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||
pxor xmm2, xmm0
|
||||
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
add r8, rdx
|
||||
add r11, rax
|
||||
mov QWORD PTR [r12], r8
|
||||
xor r8, rsi
|
||||
mov QWORD PTR [r12+8], r11
|
||||
mov r10, r8
|
||||
xor r11, r13
|
||||
and r10d, 2097136
|
||||
movdqa xmm3, xmm5
|
||||
dec ebp
|
||||
jne $main_loop_ryzen
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+64]
|
||||
mov rbx, QWORD PTR [r11+56]
|
||||
mov rbp, QWORD PTR [r11+64]
|
||||
mov rsi, QWORD PTR [r11+72]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
jmp $cnv2_main_loop_ryzen_endp
|
||||
|
||||
$sqrt_fixup_ryzen:
|
||||
movq r9, xmm2
|
||||
dec rdi
|
||||
mov edx, -1022
|
||||
shl rdx, 32
|
||||
mov rax, rdi
|
||||
shr rdi, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdi
|
||||
sub rcx, rax
|
||||
lea rcx, [rcx+rdx+1]
|
||||
add rax, rdx
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdi, 0
|
||||
jmp $sqrt_fixup_ryzen_ret
|
||||
|
||||
$cnv2_main_loop_ryzen_endp:
|
271
src/crypto/asm/cnv2_mainloop_soft_aes_sandybridge.inc
Normal file
271
src/crypto/asm/cnv2_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,271 @@
|
|||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 152
|
||||
|
||||
stmxcsr DWORD PTR [rsp+4]
|
||||
mov DWORD PTR [rsp], 24448
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm8, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+248], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+240], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
cnv2_mainloop_soft_aes_sandybridge:
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+248]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
xor edx, edx
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
movq rbx, xmm10
|
||||
xor rbx, rax
|
||||
lea r9, QWORD PTR [rcx+rcx]
|
||||
add r9d, edi
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
mov ecx, -2147483647
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
or r9, rcx
|
||||
movdqa xmm0, xmm6
|
||||
movaps xmm1, xmm9
|
||||
psrldq xmm0, 8
|
||||
movq rax, xmm0
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
div r9
|
||||
shl rdx, 32
|
||||
mov eax, eax
|
||||
add rdx, rax
|
||||
lea r9, QWORD PTR [rdx+rdi]
|
||||
movq xmm10, rdx
|
||||
mov rax, r9
|
||||
shr rax, 12
|
||||
movq xmm0, rax
|
||||
paddq xmm0, xmm8
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdx, xmm1
|
||||
test rdx, 524287
|
||||
je sqrt_fixup_soft_aes_sandybridge
|
||||
psrlq xmm1, 19
|
||||
sqrt_fixup_soft_aes_sandybridge_ret:
|
||||
|
||||
mov r9, r10
|
||||
movdqa xmm13, xmm1
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
paddq xmm1, xmm7
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
xor rax, QWORD PTR [r11+rcx+8]
|
||||
xor rdx, QWORD PTR [rcx+r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm2, xmm3
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+240]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+224]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+240], r9
|
||||
mov QWORD PTR [rsp+248], rax
|
||||
sub r12d, 1
|
||||
jne cnv2_mainloop_soft_aes_sandybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 152
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp cnv2_mainloop_soft_aes_sandybridge_asm_endp
|
||||
|
||||
sqrt_fixup_soft_aes_sandybridge:
|
||||
dec rdx
|
||||
mov r15d, -1022
|
||||
shl r15, 32
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
lea rcx, [rcx+r15+1]
|
||||
add rax, r15
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movq xmm1, rdx
|
||||
jmp sqrt_fixup_soft_aes_sandybridge_ret
|
||||
|
||||
cnv2_mainloop_soft_aes_sandybridge_asm_endp:
|
70
src/crypto/asm/win/cn_litev1_mainloop_sandybridge.inc
Normal file
70
src/crypto/asm/win/cn_litev1_mainloop_sandybridge.inc
Normal file
|
@ -0,0 +1,70 @@
|
|||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r14
|
||||
push r15
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov ebp, 262144
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
movq xmm3, rax
|
||||
mov rax, QWORD PTR [rcx+256]
|
||||
mov rdi, QWORD PTR [rcx+40]
|
||||
movq xmm0, rdx
|
||||
xor rdi, QWORD PTR [rcx+8]
|
||||
mov rdx, r8
|
||||
mov r15, QWORD PTR [rcx+264]
|
||||
and edx, 1048560
|
||||
mov r14, QWORD PTR [rax+35]
|
||||
xor r14, QWORD PTR [rcx+192]
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||
|
||||
ALIGN 64
|
||||
cn_litev1_mainloop_sandybridge:
|
||||
movq xmm0, rdi
|
||||
movq xmm1, r8
|
||||
punpcklqdq xmm1, xmm0
|
||||
aesenc xmm2, xmm1
|
||||
movq r10, xmm2
|
||||
mov r9d, r10d
|
||||
and r9d, 1048560
|
||||
add r9, rsi
|
||||
movdqa xmm0, xmm2
|
||||
pxor xmm0, xmm3
|
||||
movdqa xmm3, xmm2
|
||||
movdqu XMMWORD PTR [rdx+rsi], xmm0
|
||||
psrldq xmm0, 11
|
||||
movq rax, xmm0
|
||||
movzx eax, al
|
||||
movzx eax, BYTE PTR [rax+r15]
|
||||
mov BYTE PTR [rsi+rdx+11], al
|
||||
mov rbx, QWORD PTR [r9]
|
||||
mov r11, QWORD PTR [r9+8]
|
||||
mov rax, rbx
|
||||
mul r10
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r9], r8
|
||||
add rdi, rax
|
||||
mov rax, r14
|
||||
xor rax, rdi
|
||||
mov QWORD PTR [r9+8], rax
|
||||
xor r8, rbx
|
||||
mov rdx, r8
|
||||
and edx, 1048560
|
||||
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||
xor rdi, r11
|
||||
dec ebp
|
||||
jne cn_litev1_mainloop_sandybridge
|
||||
|
||||
mov rbx, QWORD PTR [rsp+24]
|
||||
mov rbp, QWORD PTR [rsp+32]
|
||||
mov rsi, QWORD PTR [rsp+40]
|
||||
mov rdi, QWORD PTR [rsp+48]
|
||||
pop r15
|
||||
pop r14
|
162
src/crypto/asm/win/cn_litev1_mainloop_soft_aes_sandybridge.inc
Normal file
162
src/crypto/asm/win/cn_litev1_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,162 @@
|
|||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 72
|
||||
|
||||
movaps XMMWORD PTR [rsp], xmm6
|
||||
movaps XMMWORD PTR [rsp+16], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
movaps XMMWORD PTR [rsp+48], xmm9
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
movq xmm4, rax
|
||||
mov rax, QWORD PTR [rcx+256]
|
||||
mov r13, QWORD PTR [rcx+40]
|
||||
movq xmm0, rdx
|
||||
xor r13, QWORD PTR [rcx+8]
|
||||
mov rdx, r8
|
||||
mov rdi, QWORD PTR [rcx+224]
|
||||
and edx, 1048560
|
||||
mov rax, QWORD PTR [rax+35]
|
||||
xor rax, QWORD PTR [rcx+192]
|
||||
movq xmm5, rax
|
||||
movq xmm8, rdi
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov QWORD PTR [rsp+64], rdx
|
||||
|
||||
movq xmm6, rcx
|
||||
mov rax, QWORD PTR [rcx+264]
|
||||
movq xmm7, rax
|
||||
|
||||
mov eax, 262144
|
||||
|
||||
ALIGN 64
|
||||
cn_litev1_mainloop_soft_aes_sandybridge:
|
||||
movq xmm9, rax
|
||||
mov r12, QWORD PTR [rcx+272]
|
||||
mov esi, DWORD PTR [rdx+rdi]
|
||||
mov r10d, DWORD PTR [rdx+rdi+4]
|
||||
mov ebp, DWORD PTR [rdx+rdi+12]
|
||||
mov r14d, DWORD PTR [rdx+rdi+8]
|
||||
mov rdx, QWORD PTR [rsp+64]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
add ebp, 256
|
||||
movd xmm1, r11d
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movq rdi, xmm8
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
punpckldq xmm2, xmm1
|
||||
movq xmm1, r8
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
xor eax, r15d
|
||||
movd xmm3, eax
|
||||
movq rax, xmm7
|
||||
punpckldq xmm3, xmm0
|
||||
movq xmm0, r13
|
||||
punpcklqdq xmm1, xmm0
|
||||
punpckldq xmm3, xmm2
|
||||
pxor xmm3, xmm1
|
||||
movq r9, xmm3
|
||||
mov r10d, r9d
|
||||
and r10d, 1048560
|
||||
movdqa xmm0, xmm3
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx+rdi], xmm0
|
||||
psrldq xmm0, 11
|
||||
movq rcx, xmm0
|
||||
movzx ecx, cl
|
||||
mov cl, BYTE PTR [rcx+rax]
|
||||
mov BYTE PTR [rdi+rdx+11], cl
|
||||
mov rbx, QWORD PTR [r10+rdi]
|
||||
mov rcx, r9
|
||||
lea r9, QWORD PTR [r10+rdi]
|
||||
mov r11, QWORD PTR [r9+8]
|
||||
mov rax, rbx
|
||||
movdqa xmm4, xmm3
|
||||
mul rcx
|
||||
movq rcx, xmm6
|
||||
add r8, rdx
|
||||
add r13, rax
|
||||
movq rax, xmm5
|
||||
xor rax, r13
|
||||
mov QWORD PTR [r9], r8
|
||||
xor r8, rbx
|
||||
mov QWORD PTR [r9+8], rax
|
||||
movq rax, xmm9
|
||||
mov rdx, r8
|
||||
xor r13, r11
|
||||
and edx, 1048560
|
||||
mov QWORD PTR [rsp+64], rdx
|
||||
sub eax, 1
|
||||
jne cn_litev1_mainloop_soft_aes_sandybridge
|
||||
|
||||
movaps xmm6, XMMWORD PTR [rsp]
|
||||
movaps xmm7, XMMWORD PTR [rsp+16]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
movaps xmm9, XMMWORD PTR [rsp+48]
|
||||
|
||||
add rsp, 72
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
61
src/crypto/asm/win/cn_main_loop.asm
Normal file
61
src/crypto/asm/win/cn_main_loop.asm
Normal file
|
@ -0,0 +1,61 @@
|
|||
_TEXT_CN_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||
PUBLIC cnv1_mainloop_sandybridge_asm
|
||||
PUBLIC cn_litev1_mainloop_sandybridge_asm
|
||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||
PUBLIC cnv2_mainloop_ryzen_asm
|
||||
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||
|
||||
PUBLIC cnv1_mainloop_soft_aes_sandybridge_asm
|
||||
PUBLIC cn_litev1_mainloop_soft_aes_sandybridge_asm
|
||||
PUBLIC cnv2_mainloop_soft_aes_sandybridge_asm
|
||||
|
||||
ALIGN 64
|
||||
cnv1_mainloop_sandybridge_asm PROC
|
||||
INCLUDE cnv1_mainloop_sandybridge.inc
|
||||
ret 0
|
||||
cnv1_mainloop_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cn_litev1_mainloop_sandybridge_asm PROC
|
||||
INCLUDE cn_litev1_mainloop_sandybridge.inc
|
||||
ret 0
|
||||
cn_litev1_mainloop_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_mainloop_ivybridge_asm PROC
|
||||
INCLUDE cnv2_main_loop_ivybridge.inc
|
||||
ret 0
|
||||
cnv2_mainloop_ivybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_mainloop_ryzen_asm PROC
|
||||
INCLUDE cnv2_main_loop_ryzen.inc
|
||||
ret 0
|
||||
cnv2_mainloop_ryzen_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_double_mainloop_sandybridge_asm PROC
|
||||
INCLUDE cnv2_double_main_loop_sandybridge.inc
|
||||
ret 0
|
||||
cnv2_double_mainloop_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv1_mainloop_soft_aes_sandybridge_asm PROC
|
||||
INCLUDE cnv1_mainloop_soft_aes_sandybridge.inc
|
||||
ret 0
|
||||
cnv1_mainloop_soft_aes_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cn_litev1_mainloop_soft_aes_sandybridge_asm PROC
|
||||
INCLUDE cn_litev1_mainloop_soft_aes_sandybridge.inc
|
||||
ret 0
|
||||
cn_litev1_mainloop_soft_aes_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_mainloop_soft_aes_sandybridge_asm PROC
|
||||
INCLUDE cnv2_mainloop_soft_aes_sandybridge.inc
|
||||
ret 0
|
||||
cnv2_mainloop_soft_aes_sandybridge_asm ENDP
|
||||
|
||||
_TEXT_CN_MAINLOOP ENDS
|
||||
END
|
54
src/crypto/asm/win/cn_main_loop_win_gcc.S
Normal file
54
src/crypto/asm/win/cn_main_loop_win_gcc.S
Normal file
|
@ -0,0 +1,54 @@
|
|||
#define ALIGN .align
|
||||
.intel_syntax noprefix
|
||||
# define FN_PREFIX(fn) fn
|
||||
.section .text
|
||||
|
||||
.global FN_PREFIX(cnv1_mainloop_sandybridge_asm)
|
||||
.global FN_PREFIX(cn_litev1_mainloop_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm)
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv1_mainloop_sandybridge_asm):
|
||||
#include "../cnv1_mainloop_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cn_litev1_mainloop_sandybridge_asm):
|
||||
#include "../cn_litev1_mainloop_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||
#include "../cnv2_main_loop_ivybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||
#include "../cnv2_main_loop_ryzen.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||
#include "../cnv2_double_main_loop_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm):
|
||||
#include "../cnv1_mainloop_soft_aes_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm):
|
||||
#include "../cn_litev1_mainloop_soft_aes_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm):
|
||||
#include "../cnv2_mainloop_soft_aes_sandybridge.inc"
|
||||
ret 0
|
70
src/crypto/asm/win/cnv1_mainloop_sandybridge.inc
Normal file
70
src/crypto/asm/win/cnv1_mainloop_sandybridge.inc
Normal file
|
@ -0,0 +1,70 @@
|
|||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r14
|
||||
push r15
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov ebp, 524288
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
movq xmm3, rax
|
||||
mov rax, QWORD PTR [rcx+256]
|
||||
mov rdi, QWORD PTR [rcx+40]
|
||||
movq xmm0, rdx
|
||||
xor rdi, QWORD PTR [rcx+8]
|
||||
mov rdx, r8
|
||||
mov r15, QWORD PTR [rcx+264]
|
||||
and edx, 2097136
|
||||
mov r14, QWORD PTR [rax+35]
|
||||
xor r14, QWORD PTR [rcx+192]
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||
|
||||
ALIGN 64
|
||||
cnv1_mainloop_sandybridge:
|
||||
movq xmm0, rdi
|
||||
movq xmm1, r8
|
||||
punpcklqdq xmm1, xmm0
|
||||
aesenc xmm2, xmm1
|
||||
movq r10, xmm2
|
||||
mov r9d, r10d
|
||||
and r9d, 2097136
|
||||
add r9, rsi
|
||||
movdqa xmm0, xmm2
|
||||
pxor xmm0, xmm3
|
||||
movdqa xmm3, xmm2
|
||||
movdqu XMMWORD PTR [rdx+rsi], xmm0
|
||||
psrldq xmm0, 11
|
||||
movq rax, xmm0
|
||||
movzx eax, al
|
||||
movzx eax, BYTE PTR [rax+r15]
|
||||
mov BYTE PTR [rsi+rdx+11], al
|
||||
mov rbx, QWORD PTR [r9]
|
||||
mov r11, QWORD PTR [r9+8]
|
||||
mov rax, rbx
|
||||
mul r10
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r9], r8
|
||||
add rdi, rax
|
||||
mov rax, r14
|
||||
xor rax, rdi
|
||||
mov QWORD PTR [r9+8], rax
|
||||
xor r8, rbx
|
||||
mov rdx, r8
|
||||
and edx, 2097136
|
||||
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||
xor rdi, r11
|
||||
dec ebp
|
||||
jne cnv1_mainloop_sandybridge
|
||||
|
||||
mov rbx, QWORD PTR [rsp+24]
|
||||
mov rbp, QWORD PTR [rsp+32]
|
||||
mov rsi, QWORD PTR [rsp+40]
|
||||
mov rdi, QWORD PTR [rsp+48]
|
||||
pop r15
|
||||
pop r14
|
162
src/crypto/asm/win/cnv1_mainloop_soft_aes_sandybridge.inc
Normal file
162
src/crypto/asm/win/cnv1_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,162 @@
|
|||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 72
|
||||
|
||||
movaps XMMWORD PTR [rsp], xmm6
|
||||
movaps XMMWORD PTR [rsp+16], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
movaps XMMWORD PTR [rsp+48], xmm9
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
movq xmm4, rax
|
||||
mov rax, QWORD PTR [rcx+256]
|
||||
mov r13, QWORD PTR [rcx+40]
|
||||
movq xmm0, rdx
|
||||
xor r13, QWORD PTR [rcx+8]
|
||||
mov rdx, r8
|
||||
mov rdi, QWORD PTR [rcx+224]
|
||||
and edx, 2097136
|
||||
mov rax, QWORD PTR [rax+35]
|
||||
xor rax, QWORD PTR [rcx+192]
|
||||
movq xmm5, rax
|
||||
movq xmm8, rdi
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov QWORD PTR [rsp+64], rdx
|
||||
|
||||
movq xmm6, rcx
|
||||
mov rax, QWORD PTR [rcx+264]
|
||||
movq xmm7, rax
|
||||
|
||||
mov eax, 524288
|
||||
|
||||
ALIGN 64
|
||||
cnv1_mainloop_soft_aes_sandybridge:
|
||||
movq xmm9, rax
|
||||
mov r12, QWORD PTR [rcx+272]
|
||||
mov esi, DWORD PTR [rdx+rdi]
|
||||
mov r10d, DWORD PTR [rdx+rdi+4]
|
||||
mov ebp, DWORD PTR [rdx+rdi+12]
|
||||
mov r14d, DWORD PTR [rdx+rdi+8]
|
||||
mov rdx, QWORD PTR [rsp+64]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
add ebp, 256
|
||||
movd xmm1, r11d
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movq rdi, xmm8
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
punpckldq xmm2, xmm1
|
||||
movq xmm1, r8
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
xor eax, r15d
|
||||
movd xmm3, eax
|
||||
movq rax, xmm7
|
||||
punpckldq xmm3, xmm0
|
||||
movq xmm0, r13
|
||||
punpcklqdq xmm1, xmm0
|
||||
punpckldq xmm3, xmm2
|
||||
pxor xmm3, xmm1
|
||||
movq r9, xmm3
|
||||
mov r10d, r9d
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm3
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx+rdi], xmm0
|
||||
psrldq xmm0, 11
|
||||
movq rcx, xmm0
|
||||
movzx ecx, cl
|
||||
mov cl, BYTE PTR [rcx+rax]
|
||||
mov BYTE PTR [rdi+rdx+11], cl
|
||||
mov rbx, QWORD PTR [r10+rdi]
|
||||
mov rcx, r9
|
||||
lea r9, QWORD PTR [r10+rdi]
|
||||
mov r11, QWORD PTR [r9+8]
|
||||
mov rax, rbx
|
||||
movdqa xmm4, xmm3
|
||||
mul rcx
|
||||
movq rcx, xmm6
|
||||
add r8, rdx
|
||||
add r13, rax
|
||||
movq rax, xmm5
|
||||
xor rax, r13
|
||||
mov QWORD PTR [r9], r8
|
||||
xor r8, rbx
|
||||
mov QWORD PTR [r9+8], rax
|
||||
movq rax, xmm9
|
||||
mov rdx, r8
|
||||
xor r13, r11
|
||||
and edx, 2097136
|
||||
mov QWORD PTR [rsp+64], rdx
|
||||
sub eax, 1
|
||||
jne cnv1_mainloop_soft_aes_sandybridge
|
||||
|
||||
movaps xmm6, XMMWORD PTR [rsp]
|
||||
movaps xmm7, XMMWORD PTR [rsp+16]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
movaps xmm9, XMMWORD PTR [rsp+48]
|
||||
|
||||
add rsp, 72
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
410
src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc
Normal file
410
src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc
Normal file
|
@ -0,0 +1,410 @@
|
|||
mov rax, rsp
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 184
|
||||
|
||||
stmxcsr DWORD PTR [rsp+272]
|
||||
mov DWORD PTR [rsp+276], 24448
|
||||
ldmxcsr DWORD PTR [rsp+276]
|
||||
|
||||
mov r13, QWORD PTR [rcx+224]
|
||||
mov r9, rdx
|
||||
mov r10, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r10, QWORD PTR [rcx]
|
||||
mov r14d, 524288
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rsi, QWORD PTR [rdx+224]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
mov rbp, QWORD PTR [r9+40]
|
||||
xor rbp, QWORD PTR [r9+8]
|
||||
movq xmm0, rdx
|
||||
movaps XMMWORD PTR [rax-88], xmm6
|
||||
movaps XMMWORD PTR [rax-104], xmm7
|
||||
movaps XMMWORD PTR [rax-120], xmm8
|
||||
movaps XMMWORD PTR [rsp+112], xmm9
|
||||
movaps XMMWORD PTR [rsp+96], xmm10
|
||||
movaps XMMWORD PTR [rsp+80], xmm11
|
||||
movaps XMMWORD PTR [rsp+64], xmm12
|
||||
movaps XMMWORD PTR [rsp+48], xmm13
|
||||
movaps XMMWORD PTR [rsp+32], xmm14
|
||||
movaps XMMWORD PTR [rsp+16], xmm15
|
||||
mov rdx, r10
|
||||
movq xmm4, QWORD PTR [r8+96]
|
||||
and edx, 2097136
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xorps xmm13, xmm13
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
movq xmm5, QWORD PTR [r8+104]
|
||||
movq xmm7, rax
|
||||
|
||||
mov eax, 1
|
||||
shl rax, 52
|
||||
movq xmm14, rax
|
||||
punpcklqdq xmm14, xmm14
|
||||
|
||||
mov eax, 1023
|
||||
shl rax, 52
|
||||
movq xmm12, rax
|
||||
punpcklqdq xmm12, xmm12
|
||||
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
punpcklqdq xmm7, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+56]
|
||||
xor rcx, QWORD PTR [r9+24]
|
||||
movq xmm3, rax
|
||||
mov rax, QWORD PTR [r9+48]
|
||||
xor rax, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp], r13
|
||||
mov rcx, QWORD PTR [r9+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm6, rax
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp+256], r10
|
||||
mov rcx, rdi
|
||||
mov QWORD PTR [rsp+264], r11
|
||||
movq xmm8, rax
|
||||
and ecx, 2097136
|
||||
punpcklqdq xmm8, xmm0
|
||||
movq xmm0, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, QWORD PTR [r9+104]
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
movdqu xmm11, XMMWORD PTR [r8]
|
||||
punpcklqdq xmm5, xmm0
|
||||
lea r9, QWORD PTR [rdx+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r9]
|
||||
|
||||
ALIGN 64
|
||||
main_loop_double_sandybridge:
|
||||
movdqu xmm9, xmm15
|
||||
mov eax, edx
|
||||
mov ebx, edx
|
||||
xor eax, 16
|
||||
xor ebx, 32
|
||||
xor edx, 48
|
||||
|
||||
movq xmm0, r11
|
||||
movq xmm2, r10
|
||||
punpcklqdq xmm2, xmm0
|
||||
aesenc xmm9, xmm2
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||
|
||||
movq r11, xmm9
|
||||
mov edx, r11d
|
||||
and edx, 2097136
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9], xmm0
|
||||
|
||||
lea rbx, QWORD PTR [rdx+r13]
|
||||
mov r10, QWORD PTR [rdx+r13]
|
||||
|
||||
movdqu xmm10, xmm11
|
||||
movq xmm0, rbp
|
||||
movq xmm11, rdi
|
||||
punpcklqdq xmm11, xmm0
|
||||
aesenc xmm10, xmm11
|
||||
|
||||
mov eax, ecx
|
||||
mov r12d, ecx
|
||||
xor eax, 16
|
||||
xor r12d, 32
|
||||
xor ecx, 48
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||
paddq xmm0, xmm6
|
||||
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||
paddq xmm1, xmm11
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
|
||||
movq rcx, xmm10
|
||||
and ecx, 2097136
|
||||
|
||||
movdqa xmm0, xmm10
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [r8], xmm0
|
||||
mov r12, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov r9, QWORD PTR [rbx+8]
|
||||
|
||||
xor edx, 16
|
||||
mov r8d, edx
|
||||
mov r15d, edx
|
||||
|
||||
movq rdx, xmm5
|
||||
shl rdx, 32
|
||||
movq rax, xmm4
|
||||
xor rdx, rax
|
||||
xor r10, rdx
|
||||
mov rax, r10
|
||||
mul r11
|
||||
mov r11d, r8d
|
||||
xor r11d, 48
|
||||
movq xmm0, rdx
|
||||
xor rdx, [r11+r13]
|
||||
movq xmm1, rax
|
||||
xor rax, [r11+r13+8]
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||
xor r8d, 32
|
||||
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [r15+r13], xmm0
|
||||
|
||||
mov r11, QWORD PTR [rsp+256]
|
||||
add r11, rdx
|
||||
mov rdx, QWORD PTR [rsp+264]
|
||||
add rdx, rax
|
||||
mov QWORD PTR [rbx], r11
|
||||
xor r11, r10
|
||||
mov QWORD PTR [rbx+8], rdx
|
||||
xor rdx, r9
|
||||
mov QWORD PTR [rsp+256], r11
|
||||
and r11d, 2097136
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov QWORD PTR [rsp+8], r11
|
||||
lea r15, QWORD PTR [r11+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||
lea r13, QWORD PTR [rsi+rcx]
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movaps xmm2, xmm13
|
||||
movq r10, xmm0
|
||||
psllq xmm5, 1
|
||||
shl r10, 32
|
||||
movdqa xmm0, xmm9
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm1, xmm10
|
||||
movq r11, xmm0
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
psrldq xmm4, 8
|
||||
movaps xmm0, xmm13
|
||||
movq rax, xmm4
|
||||
xor r10, rax
|
||||
movaps xmm1, xmm13
|
||||
xor r10, r12
|
||||
lea rax, QWORD PTR [r11+1]
|
||||
shr rax, 1
|
||||
movdqa xmm3, xmm9
|
||||
punpcklqdq xmm3, xmm10
|
||||
paddq xmm5, xmm3
|
||||
movq rdx, xmm5
|
||||
psrldq xmm5, 8
|
||||
cvtsi2sd xmm2, rax
|
||||
or edx, -2147483647
|
||||
lea rax, QWORD PTR [r8+1]
|
||||
shr rax, 1
|
||||
movq r9, xmm5
|
||||
cvtsi2sd xmm0, rax
|
||||
or r9d, -2147483647
|
||||
cvtsi2sd xmm1, rdx
|
||||
unpcklpd xmm2, xmm0
|
||||
movaps xmm0, xmm13
|
||||
cvtsi2sd xmm0, r9
|
||||
unpcklpd xmm1, xmm0
|
||||
divpd xmm2, xmm1
|
||||
paddq xmm2, xmm14
|
||||
cvttsd2si rax, xmm2
|
||||
psrldq xmm2, 8
|
||||
mov rbx, rax
|
||||
imul rax, rdx
|
||||
sub r11, rax
|
||||
js div_fix_1_sandybridge
|
||||
div_fix_1_ret_sandybridge:
|
||||
|
||||
cvttsd2si rdx, xmm2
|
||||
mov rax, rdx
|
||||
imul rax, r9
|
||||
movd xmm2, r11d
|
||||
movd xmm4, ebx
|
||||
sub r8, rax
|
||||
js div_fix_2_sandybridge
|
||||
div_fix_2_ret_sandybridge:
|
||||
|
||||
movd xmm1, r8d
|
||||
movd xmm0, edx
|
||||
punpckldq xmm2, xmm1
|
||||
punpckldq xmm4, xmm0
|
||||
punpckldq xmm4, xmm2
|
||||
paddq xmm3, xmm4
|
||||
movdqa xmm0, xmm3
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm12
|
||||
sqrtpd xmm1, xmm0
|
||||
movq r9, xmm1
|
||||
movdqa xmm5, xmm1
|
||||
psrlq xmm5, 19
|
||||
test r9, 524287
|
||||
je sqrt_fix_1_sandybridge
|
||||
sqrt_fix_1_ret_sandybridge:
|
||||
|
||||
movq r9, xmm10
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
test r8, 524287
|
||||
je sqrt_fix_2_sandybridge
|
||||
sqrt_fix_2_ret_sandybridge:
|
||||
|
||||
mov r12d, ecx
|
||||
mov r8d, ecx
|
||||
xor r12d, 16
|
||||
xor r8d, 32
|
||||
xor ecx, 48
|
||||
mov rax, r10
|
||||
mul r9
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
punpcklqdq xmm3, xmm0
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||
pxor xmm0, xmm3
|
||||
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||
xor rdx, [r8+rsi]
|
||||
xor rax, [r8+rsi+8]
|
||||
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||
paddq xmm0, xmm6
|
||||
paddq xmm1, xmm11
|
||||
paddq xmm3, xmm8
|
||||
movdqu XMMWORD PTR [r8+rsi], xmm0
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm3
|
||||
|
||||
add rdi, rdx
|
||||
mov QWORD PTR [r13], rdi
|
||||
xor rdi, r10
|
||||
mov ecx, edi
|
||||
and ecx, 2097136
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov rdx, QWORD PTR [r13+8]
|
||||
add rbp, rax
|
||||
mov QWORD PTR [r13+8], rbp
|
||||
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||
xor rbp, rdx
|
||||
mov r13, QWORD PTR [rsp]
|
||||
movdqa xmm3, xmm7
|
||||
mov rdx, QWORD PTR [rsp+8]
|
||||
movdqa xmm8, xmm6
|
||||
mov r10, QWORD PTR [rsp+256]
|
||||
movdqa xmm7, xmm9
|
||||
mov r11, QWORD PTR [rsp+264]
|
||||
movdqa xmm6, xmm10
|
||||
mov r9, r15
|
||||
dec r14d
|
||||
jne main_loop_double_sandybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+272]
|
||||
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+184]
|
||||
movaps xmm6, XMMWORD PTR [r11-24]
|
||||
movaps xmm7, XMMWORD PTR [r11-40]
|
||||
movaps xmm8, XMMWORD PTR [r11-56]
|
||||
movaps xmm9, XMMWORD PTR [r11-72]
|
||||
movaps xmm10, XMMWORD PTR [r11-88]
|
||||
movaps xmm11, XMMWORD PTR [r11-104]
|
||||
movaps xmm12, XMMWORD PTR [r11-120]
|
||||
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp cnv2_double_mainloop_asm_sandybridge_endp
|
||||
|
||||
div_fix_1_sandybridge:
|
||||
dec rbx
|
||||
add r11, rdx
|
||||
jmp div_fix_1_ret_sandybridge
|
||||
|
||||
div_fix_2_sandybridge:
|
||||
dec rdx
|
||||
add r8, r9
|
||||
jmp div_fix_2_ret_sandybridge
|
||||
|
||||
sqrt_fix_1_sandybridge:
|
||||
movq r8, xmm3
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
dec r9
|
||||
mov r11d, -1022
|
||||
shl r11, 32
|
||||
mov rax, r9
|
||||
shr r9, 19
|
||||
shr rax, 20
|
||||
mov rdx, r9
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+r11+1]
|
||||
add rax, r11
|
||||
imul rdx, rax
|
||||
sub rdx, r8
|
||||
adc r9, 0
|
||||
movq xmm5, r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp sqrt_fix_1_ret_sandybridge
|
||||
|
||||
sqrt_fix_2_sandybridge:
|
||||
psrldq xmm3, 8
|
||||
movq r11, xmm3
|
||||
dec r8
|
||||
mov ebx, -1022
|
||||
shl rbx, 32
|
||||
mov rax, r8
|
||||
shr r8, 19
|
||||
shr rax, 20
|
||||
mov rdx, r8
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+rbx+1]
|
||||
add rax, rbx
|
||||
imul rdx, rax
|
||||
sub rdx, r11
|
||||
adc r8, 0
|
||||
movq xmm0, r8
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp sqrt_fix_2_ret_sandybridge
|
||||
|
||||
cnv2_double_mainloop_asm_sandybridge_endp:
|
182
src/crypto/asm/win/cnv2_main_loop_ivybridge.inc
Normal file
182
src/crypto/asm/win/cnv2_main_loop_ivybridge.inc
Normal file
|
@ -0,0 +1,182 @@
|
|||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov esi, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
mov r13d, -2147483647
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm4, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm3, QWORD PTR [r9+104]
|
||||
movaps XMMWORD PTR [rsp+64], xmm6
|
||||
movaps XMMWORD PTR [rsp+48], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
and r10d, 2097136
|
||||
movq xmm5, rax
|
||||
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm8, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm5, xmm0
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
|
||||
ALIGN 64
|
||||
$main_loop_ivybridge:
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
mov rdi, r15
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
movq xmm0, r11
|
||||
movq xmm7, r8
|
||||
punpcklqdq xmm7, xmm0
|
||||
aesenc xmm6, xmm7
|
||||
movq rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm1, xmm7
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
mov r10, r9
|
||||
xor r10d, 32
|
||||
movq rcx, xmm3
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
xor rdi, rax
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
xor rdi, QWORD PTR [r9+rbx]
|
||||
lea r14, QWORD PTR [r9+rbx]
|
||||
mov r12, QWORD PTR [r14+8]
|
||||
xor edx, edx
|
||||
lea r9d, DWORD PTR [ecx+ecx]
|
||||
add r9d, ebp
|
||||
movdqa xmm0, xmm6
|
||||
psrldq xmm0, 8
|
||||
or r9d, r13d
|
||||
movq rax, xmm0
|
||||
div r9
|
||||
xorps xmm3, xmm3
|
||||
mov eax, eax
|
||||
shl rdx, 32
|
||||
add rdx, rax
|
||||
lea r9, QWORD PTR [rdx+rbp]
|
||||
mov r15, rdx
|
||||
mov rax, r9
|
||||
shr rax, 12
|
||||
movq xmm0, rax
|
||||
paddq xmm0, xmm8
|
||||
sqrtsd xmm3, xmm0
|
||||
movq rdx, xmm3
|
||||
test edx, 524287
|
||||
je $sqrt_fixup_ivybridge
|
||||
psrlq xmm3, 19
|
||||
$sqrt_fixup_ivybridge_ret:
|
||||
|
||||
mov ecx, r10d
|
||||
mov rax, rdi
|
||||
mul rbp
|
||||
movq xmm2, rdx
|
||||
xor rdx, [rcx+rbx]
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
movq xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
punpcklqdq xmm2, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
xor r9d, 48
|
||||
xor r10d, 16
|
||||
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm0, xmm5
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||
paddq xmm2, xmm4
|
||||
paddq xmm1, xmm7
|
||||
movdqa xmm5, xmm4
|
||||
movdqu XMMWORD PTR [r9+rbx], xmm0
|
||||
movdqa xmm4, xmm6
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
movdqu xmm6, [rdi+rbx]
|
||||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne $main_loop_ivybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
mov rbx, QWORD PTR [rsp+160]
|
||||
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
add rsp, 80
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
jmp $cnv2_main_loop_ivybridge_endp
|
||||
|
||||
$sqrt_fixup_ivybridge:
|
||||
dec rdx
|
||||
mov r13d, -1022
|
||||
shl r13, 32
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
add rax, r13
|
||||
not r13
|
||||
sub rcx, r13
|
||||
mov r13d, -2147483647
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movq xmm3, rdx
|
||||
jmp $sqrt_fixup_ivybridge_ret
|
||||
|
||||
$cnv2_main_loop_ivybridge_endp:
|
179
src/crypto/asm/win/cnv2_main_loop_ryzen.inc
Normal file
179
src/crypto/asm/win/cnv2_main_loop_ryzen.inc
Normal file
|
@ -0,0 +1,179 @@
|
|||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 64
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov ebp, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm3, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
mov rdi, QWORD PTR [r9+104]
|
||||
and r10d, 2097136
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm4, rax
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
xorps xmm8, xmm8
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm7, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm4, xmm0
|
||||
|
||||
ALIGN 64
|
||||
$main_loop_ryzen:
|
||||
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||
movq xmm0, r11
|
||||
movq xmm6, r8
|
||||
punpcklqdq xmm6, xmm0
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
lea r9, QWORD PTR [rdi+rdi]
|
||||
shl rdi, 32
|
||||
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
aesenc xmm5, xmm6
|
||||
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
paddq xmm0, xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movaps xmm1, xmm8
|
||||
mov rsi, r15
|
||||
xor rsi, rdi
|
||||
movq r14, xmm5
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm3
|
||||
mov r10, r14
|
||||
and r10d, 2097136
|
||||
movdqa XMMWORD PTR [rdx], xmm0
|
||||
xor rsi, QWORD PTR [r10+rbx]
|
||||
lea r12, QWORD PTR [r10+rbx]
|
||||
mov r13, QWORD PTR [r10+rbx+8]
|
||||
|
||||
add r9d, r14d
|
||||
or r9d, -2147483647
|
||||
xor edx, edx
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movq rax, xmm0
|
||||
|
||||
div r9
|
||||
movq xmm0, rax
|
||||
movq xmm1, rdx
|
||||
punpckldq xmm0, xmm1
|
||||
movq r15, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqa xmm2, xmm0
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm7
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdi, xmm1
|
||||
test rdi, 524287
|
||||
je $sqrt_fixup_ryzen
|
||||
shr rdi, 19
|
||||
|
||||
$sqrt_fixup_ryzen_ret:
|
||||
mov rax, rsi
|
||||
mul r14
|
||||
movq xmm1, rax
|
||||
movq xmm0, rdx
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
mov r9d, r10d
|
||||
mov ecx, r10d
|
||||
xor r9d, 16
|
||||
xor ecx, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||
xor rdx, [rcx+rbx]
|
||||
xor rax, [rcx+rbx+8]
|
||||
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||
pxor xmm2, xmm0
|
||||
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
add r8, rdx
|
||||
add r11, rax
|
||||
mov QWORD PTR [r12], r8
|
||||
xor r8, rsi
|
||||
mov QWORD PTR [r12+8], r11
|
||||
mov r10, r8
|
||||
xor r11, r13
|
||||
and r10d, 2097136
|
||||
movdqa xmm3, xmm5
|
||||
dec ebp
|
||||
jne $main_loop_ryzen
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+64]
|
||||
mov rbx, QWORD PTR [r11+56]
|
||||
mov rbp, QWORD PTR [r11+64]
|
||||
mov rsi, QWORD PTR [r11+72]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
jmp $cnv2_main_loop_ryzen_endp
|
||||
|
||||
$sqrt_fixup_ryzen:
|
||||
movq r9, xmm2
|
||||
dec rdi
|
||||
mov edx, -1022
|
||||
shl rdx, 32
|
||||
mov rax, rdi
|
||||
shr rdi, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdi
|
||||
sub rcx, rax
|
||||
lea rcx, [rcx+rdx+1]
|
||||
add rax, rdx
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdi, 0
|
||||
jmp $sqrt_fixup_ryzen_ret
|
||||
|
||||
$cnv2_main_loop_ryzen_endp:
|
267
src/crypto/asm/win/cnv2_mainloop_soft_aes_sandybridge.inc
Normal file
267
src/crypto/asm/win/cnv2_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,267 @@
|
|||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 152
|
||||
|
||||
stmxcsr DWORD PTR [rsp+4]
|
||||
mov DWORD PTR [rsp], 24448
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm8, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+248], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+240], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN 64
|
||||
cnv2_mainloop_soft_aes_sandybridge:
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+248]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
xor edx, edx
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
movq rbx, xmm10
|
||||
xor rbx, rax
|
||||
lea r9, QWORD PTR [rcx+rcx]
|
||||
add r9d, edi
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
mov ecx, -2147483647
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
or r9, rcx
|
||||
movdqa xmm0, xmm6
|
||||
movaps xmm1, xmm9
|
||||
psrldq xmm0, 8
|
||||
movq rax, xmm0
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
div r9
|
||||
shl rdx, 32
|
||||
mov eax, eax
|
||||
add rdx, rax
|
||||
lea r9, QWORD PTR [rdx+rdi]
|
||||
movq xmm10, rdx
|
||||
mov rax, r9
|
||||
shr rax, 12
|
||||
movq xmm0, rax
|
||||
paddq xmm0, xmm8
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdx, xmm1
|
||||
test rdx, 524287
|
||||
je sqrt_fixup_soft_aes_sandybridge
|
||||
psrlq xmm1, 19
|
||||
sqrt_fixup_soft_aes_sandybridge_ret:
|
||||
|
||||
mov r9, r10
|
||||
movdqa xmm13, xmm1
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
paddq xmm1, xmm7
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
xor rax, QWORD PTR [r11+rcx+8]
|
||||
xor rdx, QWORD PTR [rcx+r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm2, xmm3
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+240]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+224]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+240], r9
|
||||
mov QWORD PTR [rsp+248], rax
|
||||
sub r12d, 1
|
||||
jne cnv2_mainloop_soft_aes_sandybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 152
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp cnv2_mainloop_soft_aes_sandybridge_asm_endp
|
||||
|
||||
sqrt_fixup_soft_aes_sandybridge:
|
||||
dec rdx
|
||||
mov r15d, -1022
|
||||
shl r15, 32
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
lea rcx, [rcx+r15+1]
|
||||
add rax, r15
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movq xmm1, rdx
|
||||
jmp sqrt_fixup_soft_aes_sandybridge_ret
|
||||
|
||||
cnv2_mainloop_soft_aes_sandybridge_asm_endp:
|
|
@ -4,8 +4,9 @@
|
|||
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
||||
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
||||
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), tube (ipbc), alloy, xtl (including autodetect for v5), msr, xhv, rto
|
||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy, xtl (including autodetect for v5), msr, xhv, rto
|
||||
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
||||
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, off
|
||||
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
||||
"colors": true, // false to disable colored output
|
||||
"cpu-affinity": null, // set process affinity to CPU core(s), mask "0x3" for cores 0 and 1
|
||||
|
|
|
@ -31,6 +31,12 @@
|
|||
class ILogBackend
|
||||
{
|
||||
public:
|
||||
# ifdef APP_DEBUG
|
||||
constexpr static const size_t kBufferSize = 1024;
|
||||
# else
|
||||
constexpr static const size_t kBufferSize = 512;
|
||||
# endif
|
||||
|
||||
virtual ~ILogBackend() {}
|
||||
|
||||
virtual void message(int level, const char* fmt, va_list args) = 0;
|
||||
|
|
|
@ -56,19 +56,20 @@ void FileLog::message(int level, const char* fmt, va_list args)
|
|||
localtime_r(&now, &stime);
|
||||
# endif
|
||||
|
||||
auto *buf = new char[512];
|
||||
int size = snprintf(buf, 23, "[%d-%02d-%02d %02d:%02d:%02d] ",
|
||||
snprintf(m_fmt, sizeof(m_fmt) - 1, "[%d-%02d-%02d %02d:%02d:%02d] %s",
|
||||
stime.tm_year + 1900,
|
||||
stime.tm_mon + 1,
|
||||
stime.tm_mday,
|
||||
stime.tm_hour,
|
||||
stime.tm_min,
|
||||
stime.tm_sec);
|
||||
stime.tm_sec,
|
||||
fmt);
|
||||
|
||||
size = vsnprintf(buf + size, 512 - size - 1, fmt, args) + size;
|
||||
auto *buf = new char[kBufferSize];
|
||||
const int size = vsnprintf(buf, kBufferSize - 1, m_fmt, args);
|
||||
buf[size] = '\n';
|
||||
|
||||
std::string row = std::regex_replace(std::string(buf, size+1), std::regex("\x1B\\[[0-9;]*[a-zA-Z]"), "");
|
||||
std::string row = std::regex_replace(std::string(buf, static_cast<unsigned long>(size + 1)), std::regex("\x1B\\[[0-9;]*[a-zA-Z]"), "");
|
||||
|
||||
memcpy(buf, row.c_str(), row.length());
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ private:
|
|||
|
||||
void write(char *data, size_t size);
|
||||
|
||||
char m_fmt[256];
|
||||
int m_file;
|
||||
};
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ public:
|
|||
m_socket.connect(endpointIterator, boost::bind(&BoostConnection::handleConnect, this->shared_from_this(),
|
||||
boost::asio::placeholders::error));
|
||||
} else {
|
||||
notifyError(std::string("[DNS resolve] ") + error.message());
|
||||
notifyDNSError(std::string("[DNS resolve] ") + error.message());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -73,6 +73,7 @@ Client::Client(int id, const char *agent, IClientListener *listener) :
|
|||
uv_async_init(uv_default_loop(), &onConnectedAsync, Client::onConnected);
|
||||
uv_async_init(uv_default_loop(), &onReceivedAsync, Client::onReceived);
|
||||
uv_async_init(uv_default_loop(), &onErrorAsync, Client::onError);
|
||||
uv_async_init(uv_default_loop(), &onDNSErrorAsync, Client::onDNSError);
|
||||
}
|
||||
|
||||
|
||||
|
@ -96,7 +97,7 @@ void Client::connect(const Url *url)
|
|||
|
||||
void Client::connect()
|
||||
{
|
||||
LOG_DEBUG("connect");
|
||||
LOG_DEBUG("[%d] connect", m_id);
|
||||
|
||||
m_connection = establishConnection(shared_from_this(),
|
||||
m_url.useTls() ? CONNECTION_TYPE_TLS : CONNECTION_TYPE_TCP,
|
||||
|
@ -106,13 +107,15 @@ void Client::connect()
|
|||
|
||||
void Client::disconnect()
|
||||
{
|
||||
LOG_DEBUG("disconnect");
|
||||
LOG_DEBUG("[%d] disconnect", m_id);
|
||||
|
||||
uv_timer_stop(&m_keepAliveTimer);
|
||||
|
||||
m_expire = 0;
|
||||
m_failures = -1;
|
||||
|
||||
LOG_DEBUG("[%d] disconnect set m_failure to -1", m_id);
|
||||
|
||||
close();
|
||||
}
|
||||
|
||||
|
@ -330,7 +333,7 @@ int64_t Client::send(char* buf, size_t size)
|
|||
|
||||
void Client::close()
|
||||
{
|
||||
LOG_DEBUG("close");
|
||||
LOG_DEBUG("[%d] close", m_id);
|
||||
|
||||
if (m_connection) {
|
||||
m_connection->disconnect();
|
||||
|
@ -364,7 +367,8 @@ void Client::login()
|
|||
|
||||
rapidjson::Value supportedPowVariantsList(rapidjson::kArrayType);
|
||||
for (auto& supportedPowVariant : getSupportedPowVariants()) {
|
||||
supportedPowVariantsList.PushBack(rapidjson::StringRef(supportedPowVariant.c_str()), allocator);
|
||||
rapidjson::Value val(supportedPowVariant.c_str(), allocator);
|
||||
supportedPowVariantsList.PushBack(val, allocator);
|
||||
}
|
||||
|
||||
params.AddMember("supported-variants", supportedPowVariantsList, allocator);
|
||||
|
@ -517,7 +521,9 @@ void Client::parseResponse(int64_t id, const rapidjson::Value &result, const rap
|
|||
return reconnect();
|
||||
}
|
||||
|
||||
LOG_DEBUG("[%d] login set m_failure to 0", m_id);
|
||||
m_failures = 0;
|
||||
|
||||
m_listener->onLoginSuccess(this);
|
||||
m_listener->onJobReceived(this, m_job);
|
||||
return;
|
||||
|
@ -553,7 +559,7 @@ void Client::ping()
|
|||
|
||||
void Client::reconnect()
|
||||
{
|
||||
LOG_DEBUG("reconnect");
|
||||
LOG_DEBUG("[%d] reconnect", m_id);
|
||||
|
||||
close();
|
||||
|
||||
|
@ -562,11 +568,12 @@ void Client::reconnect()
|
|||
}
|
||||
|
||||
if (m_failures == -1) {
|
||||
LOG_DEBUG("reconnect -> m_failures == -1");
|
||||
LOG_DEBUG("[%d] reconnect -> m_failures == -1", m_id);
|
||||
return m_listener->onClose(this, -1);
|
||||
}
|
||||
|
||||
m_failures++;
|
||||
LOG_DEBUG("[%d] increment m_failures to: %d", m_id, m_failures);
|
||||
m_listener->onClose(this, (int) m_failures);
|
||||
|
||||
m_expire = uv_now(uv_default_loop()) + m_retryPause;
|
||||
|
@ -636,10 +643,9 @@ void Client::scheduleOnReceived(char* data, std::size_t size)
|
|||
|
||||
void Client::onError(uv_async_t *handle)
|
||||
{
|
||||
LOG_DEBUG("onError");
|
||||
|
||||
auto client = getClient(handle->data);
|
||||
if (client) {
|
||||
LOG_DEBUG("[%d] onError", client->m_id);
|
||||
client->reconnect();
|
||||
}
|
||||
}
|
||||
|
@ -655,3 +661,28 @@ void Client::scheduleOnError(const std::string &error)
|
|||
onErrorAsync.data = this;
|
||||
uv_async_send(&onErrorAsync);
|
||||
}
|
||||
|
||||
void Client::onDNSError(uv_async_t *handle)
|
||||
{
|
||||
auto client = getClient(handle->data);
|
||||
if (client) {
|
||||
if (client->m_failures == -1) {
|
||||
client->m_failures = 0;
|
||||
}
|
||||
|
||||
LOG_DEBUG("[%d] onDNSError", client->m_id);
|
||||
client->reconnect();
|
||||
}
|
||||
}
|
||||
|
||||
void Client::scheduleOnDNSError(const std::string &error)
|
||||
{
|
||||
LOG_DEBUG("scheduleOnDNSError");
|
||||
|
||||
if (!m_quiet) {
|
||||
LOG_ERR("[%s:%u] DNS Error: \"%s\"", m_url.host(), m_url.port(), error.c_str());
|
||||
}
|
||||
|
||||
onDNSErrorAsync.data = this;
|
||||
uv_async_send(&onDNSErrorAsync);
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@ public:
|
|||
static void onConnected(uv_async_t *handle);
|
||||
static void onReceived(uv_async_t *handle);
|
||||
static void onError(uv_async_t *handle);
|
||||
static void onDNSError(uv_async_t *handle);
|
||||
|
||||
private:
|
||||
bool isCriticalError(const char *message);
|
||||
|
@ -92,6 +93,7 @@ private:
|
|||
virtual void scheduleOnConnected();
|
||||
virtual void scheduleOnReceived(char *data, size_t size);
|
||||
virtual void scheduleOnError(const std::string &error);
|
||||
virtual void scheduleOnDNSError(const std::string &error);
|
||||
|
||||
static inline Client *getClient(void *data) { return static_cast<Client*>(data); }
|
||||
|
||||
|
@ -122,6 +124,7 @@ private:
|
|||
uv_async_t onConnectedAsync;
|
||||
uv_async_t onReceivedAsync;
|
||||
uv_async_t onErrorAsync;
|
||||
uv_async_t onDNSErrorAsync;
|
||||
|
||||
uv_timer_t m_keepAliveTimer;
|
||||
|
||||
|
|
|
@ -60,6 +60,14 @@ void Connection::notifyError(const std::string& error)
|
|||
}
|
||||
}
|
||||
|
||||
void Connection::notifyDNSError(const std::string& error)
|
||||
{
|
||||
ConnectionListener::Ptr listener = listener_.lock();
|
||||
if (listener)
|
||||
{
|
||||
listener->scheduleOnDNSError(error);
|
||||
}
|
||||
}
|
||||
|
||||
Connection::Ptr establishConnection(const ConnectionListener::Ptr& listener,
|
||||
ConnectionType type, const std::string& host, uint16_t port)
|
||||
|
|
|
@ -39,6 +39,7 @@ public:
|
|||
virtual void scheduleOnConnected() = 0;
|
||||
virtual void scheduleOnReceived(char *data, std::size_t size) = 0;
|
||||
virtual void scheduleOnError(const std::string &error) = 0;
|
||||
virtual void scheduleOnDNSError(const std::string &error) = 0;
|
||||
};
|
||||
|
||||
class Connection : private boost::noncopyable
|
||||
|
@ -61,6 +62,7 @@ public:
|
|||
void notifyConnected();
|
||||
void notifyRead(char* data, size_t size);
|
||||
void notifyError(const std::string& error);
|
||||
void notifyDNSError(const std::string& error);
|
||||
|
||||
private:
|
||||
ConnectionListener::WeakPtr listener_;
|
||||
|
|
|
@ -138,24 +138,24 @@ bool Job::setTarget(const char *target)
|
|||
|
||||
PowVariant Job::powVariant() const
|
||||
{
|
||||
if (m_powVariant == PowVariant::POW_AUTODETECT)
|
||||
{
|
||||
return (m_blob[0] > 6 ? PowVariant::POW_V1 : PowVariant::POW_V0);
|
||||
if (m_powVariant == PowVariant::POW_AUTODETECT) {
|
||||
if (m_blob[0] > 7) {
|
||||
return PowVariant::POW_V2;
|
||||
} else if (m_blob[0] > 6) {
|
||||
return PowVariant::POW_V1;
|
||||
} else {
|
||||
return PowVariant::POW_V0;
|
||||
}
|
||||
else if (m_powVariant == PowVariant::POW_XTL && m_blob[0] < 4)
|
||||
{
|
||||
}
|
||||
else if (m_powVariant == PowVariant::POW_XTL && m_blob[0] < 4) {
|
||||
return POW_V1;
|
||||
}
|
||||
else if (m_powVariant == PowVariant::POW_MSR && m_blob[0] < 7)
|
||||
{
|
||||
else if (m_powVariant == PowVariant::POW_MSR && m_blob[0] < 7) {
|
||||
return POW_V1;
|
||||
}
|
||||
else if (m_powVariant == PowVariant::POW_XHV && m_blob[0] < 3)
|
||||
{
|
||||
else if (m_powVariant == PowVariant::POW_XHV && m_blob[0] < 3) {
|
||||
return POW_V0;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
return m_powVariant;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,13 +36,13 @@
|
|||
#define APP_DESC "XMRigCC CPU miner"
|
||||
#define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id"
|
||||
#endif
|
||||
#define APP_VERSION "1.7.0 (based on XMRig)"
|
||||
#define APP_VERSION "1.8.0 (based on XMRig)"
|
||||
#define APP_DOMAIN ""
|
||||
#define APP_SITE "https://github.com/Bendr0id/xmrigCC"
|
||||
#define APP_KIND "cpu"
|
||||
|
||||
#define APP_VER_MAJOR 1
|
||||
#define APP_VER_MINOR 7
|
||||
#define APP_VER_MINOR 8
|
||||
#define APP_VER_BUILD 0
|
||||
#define APP_VER_REV 0
|
||||
|
||||
|
|
|
@ -140,7 +140,7 @@ void MultiWorker::start()
|
|||
*Job::nonce(m_state->blob + i * m_state->job.size()) = ++m_state->nonces[i];
|
||||
}
|
||||
|
||||
CryptoNight::hash(m_hashFactor, m_state->job.powVariant(), m_state->blob, m_state->job.size(), m_hash, scratchPads);
|
||||
CryptoNight::hash(m_hashFactor, Options::i()->asmOptimization(), m_state->job.powVariant(), m_state->blob, m_state->job.size(), m_hash, scratchPads);
|
||||
|
||||
for (size_t i=0; i < m_hashFactor; ++i) {
|
||||
if (*reinterpret_cast<uint64_t *>(m_hash + 24 + i * 32) < m_state->job.target()) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue