#1.9.0 (#236)
# 1.9.0 - Integrated Monero CN-R variant so called CNv4, aka CN-R, aka CNv5, aka Cryptonight-R #233 (algo: "cryptonight", variant: "r") - Integrated Wownero CN-R variant #233 (algo: "cryptonight", variant: "wow") - Integrated Graft variant (algo: "cryptonight", variant: "rwz" OR variant: "graft") - Integrated X-Cash variant #234 (algo: "cryptonight", variant: "double" OR variant: "heavyx" OR variant: "xcash") - Integrated Zelerius variant (algo: "cryptonight", variant: "zls" OR variant: "zelerius") - Add miner version column to the Dashboard (version turns red when its outdated) - Fixed crash when remote logging is disabled
This commit is contained in:
parent
e48436cc91
commit
862c34b31e
50 changed files with 15657 additions and 2303 deletions
|
@ -1,3 +1,11 @@
|
|||
# 1.9.0
|
||||
- Integrated Monero CN-R variant so called CNv4, aka CN-R, aka CNv5, aka Cryptonight-R #233 (algo: "cryptonight", variant: "r")
|
||||
- Integrated Wownero CN-R variant #233 (algo: "cryptonight", variant: "wow")
|
||||
- Integrated Graft variant (algo: "cryptonight", variant: "rwz" OR variant: "graft")
|
||||
- Integrated X-Cash variant #234 (algo: "cryptonight", variant: "double" OR variant: "heavyx" OR variant: "xcash")
|
||||
- Integrated Zelerius variant (algo: "cryptonight", variant: "zls" OR variant: "zelerius")
|
||||
- Add miner version column to the Dashboard (version turns red when its outdated)
|
||||
- Fixed crash when remote logging is disabled
|
||||
# 1.8.13
|
||||
- Integrated HOSP variant (algo: "cryptonight", variant: "hosp")
|
||||
- Added ASM code/optimization for HOSP and RTO on Intel CPUs
|
||||
|
|
|
@ -54,6 +54,7 @@ set(SOURCES_CRYPTO
|
|||
src/crypto/c_jh.c
|
||||
src/crypto/c_skein.c
|
||||
src/crypto/CryptoNight.cpp
|
||||
src/crypto/CryptoNightR_gen.cpp
|
||||
)
|
||||
|
||||
set(SOURCES_COMMON
|
||||
|
@ -131,7 +132,7 @@ if (WIN32)
|
|||
add_definitions(-DBOOST_ALL_NO_LIB)
|
||||
endif(WIN32)
|
||||
|
||||
find_package(Boost 1.63.0 COMPONENTS system REQUIRED)
|
||||
find_package(Boost 1.62.0 COMPONENTS system REQUIRED)
|
||||
|
||||
include(cmake/flags.cmake)
|
||||
|
||||
|
|
|
@ -56,6 +56,40 @@ configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/
|
|||
configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_fastv2_sandybridge.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_fastv2_soft_aes_sandybridge.inc")
|
||||
|
||||
# CN XCASH
|
||||
set(ALGO "xcash")
|
||||
set(ITERATIONS "1048576") #0x100000
|
||||
set(MASK "2097136") #0x1FFFF0
|
||||
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_ivybridge.inc")
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_bulldozer.inc")
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_ryzen.inc")
|
||||
configure_file("src/crypto/asm/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/cnv2_double_main_loop_xcash_sandybridge.inc")
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_soft_aes_sandybridge.inc")
|
||||
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_ivybridge.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_bulldozer.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_ryzen.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_xcash_sandybridge.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_soft_aes_sandybridge.inc")
|
||||
|
||||
# CN ZELERIUS
|
||||
set(ALGO "zelerius")
|
||||
set(ITERATIONS "393216") #0x60000
|
||||
set(MASK "2097136") #0x1FFFF0
|
||||
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_ivybridge.inc")
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_bulldozer.inc")
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_ryzen.inc")
|
||||
configure_file("src/crypto/asm/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/cnv2_double_main_loop_zelerius_sandybridge.inc")
|
||||
configure_file("src/crypto/asm/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_soft_aes_sandybridge.inc")
|
||||
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_ivybridge.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_bulldozer.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_ryzen.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_zelerius_sandybridge.inc")
|
||||
configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_soft_aes_sandybridge.inc")
|
||||
|
||||
# CN LITE
|
||||
|
||||
set(ALGO "lite")
|
||||
|
@ -99,16 +133,19 @@ configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "
|
|||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
enable_language(ASM_MASM)
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop.asm")
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop.asm"
|
||||
"src/crypto/asm/win/CryptonightR_template.asm")
|
||||
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
|
||||
include_directories(${CMAKE_BINARY_DIR}/src/crypto/asm/win)
|
||||
else()
|
||||
enable_language(ASM)
|
||||
|
||||
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop_win_gcc.S")
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop_win_gcc.S"
|
||||
"src/crypto/asm/win/CryptonightR_template.S")
|
||||
else()
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop.S")
|
||||
set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop.S"
|
||||
"src/crypto/asm/CryptonightR_template.S")
|
||||
endif()
|
||||
|
||||
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
|
||||
|
|
16
index.html
16
index.html
|
@ -65,6 +65,9 @@
|
|||
var currentServerTime = 0;
|
||||
var clockDrift = 0;
|
||||
|
||||
var latestVersion = 0;
|
||||
var currentVersion = 0;
|
||||
|
||||
$.fn.dataTable.ext.search.push(
|
||||
function( settings, data, dataIndex ) {
|
||||
|
||||
|
@ -108,6 +111,7 @@
|
|||
orderable: false
|
||||
},
|
||||
{data: "client_status.client_id", render: clientInfo},
|
||||
{data: "client_status.version", render: version},
|
||||
{data: "client_status.current_pool"},
|
||||
{data: "client_status.current_status", render: clientStatus},
|
||||
{data: "client_status.current_algo_name", render: algoAndPowVariantName},
|
||||
|
@ -674,6 +678,16 @@
|
|||
}
|
||||
}
|
||||
|
||||
function version( data, type, row ) {
|
||||
var clientVersion = parseInt(row.client_status.version.split('.').join(""));
|
||||
|
||||
if (latestVersion > clientVersion) {
|
||||
return '<span data-toggle="tooltip" title="Outdated"><div class="offline">' + data + '</div></span>';
|
||||
} else {
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
function clientStatus( data, type, row ) {
|
||||
var lastStatus = row.client_status.last_status_update * 1000;
|
||||
|
||||
|
@ -822,6 +836,7 @@
|
|||
<tr>
|
||||
<th class="center" width="2%"><i class="fa fa-square-o" id="selectAllTop"></i></th>
|
||||
<th>Miner Id</th>
|
||||
<th>Version</th>
|
||||
<th>Pool</th>
|
||||
<th>Status</th>
|
||||
<th>Algo / PoW</th>
|
||||
|
@ -861,6 +876,7 @@
|
|||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</tfoot>
|
||||
</table>
|
||||
|
|
|
@ -155,10 +155,14 @@ int App::start()
|
|||
return EINVAL;
|
||||
} else {
|
||||
if (Options::i()->colors()) {
|
||||
LOG_INFO(WHITE_BOLD("%s hash self-test... ") GREEN_BOLD("successful") ".", m_options->algoName());
|
||||
LOG_INFO(WHITE_BOLD("%s hash self-test... %s."),
|
||||
m_options->algoName(),
|
||||
Options::i()->skipSelfCheck() ? YELLOW_BOLD("skipped") : GREEN_BOLD("successful"));
|
||||
}
|
||||
else {
|
||||
LOG_INFO("%s hash self-test... successful.", m_options->algoName());
|
||||
LOG_INFO("%s hash self-test... %s.",
|
||||
m_options->algoName(),
|
||||
Options::i()->skipSelfCheck() ? "skipped" : "successful");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,11 @@
|
|||
|
||||
void CpuImpl::initCommon()
|
||||
{
|
||||
memcpy(m_brand, "Unknown", 7);
|
||||
# ifdef XMRIG_ARMv8
|
||||
memcpy(m_brand, "ARMv8", 5);
|
||||
# else
|
||||
memcpy(m_brand, "ARMv7", 5);
|
||||
# endif
|
||||
|
||||
# if defined(XMRIG_ARMv8)
|
||||
m_flags |= Cpu::X86_64;
|
||||
|
|
10
src/Mem.cpp
10
src/Mem.cpp
|
@ -67,9 +67,17 @@ ScratchPadMem Mem::create(ScratchPad** scratchPads, int threadId)
|
|||
allocate(scratchPadMem, m_useHugePages);
|
||||
|
||||
for (size_t i = 0; i < getThreadHashFactor(threadId); ++i) {
|
||||
ScratchPad* scratchPad = static_cast<ScratchPad *>(_mm_malloc(sizeof(ScratchPad), 4096));
|
||||
auto* scratchPad = static_cast<ScratchPad *>(_mm_malloc(sizeof(ScratchPad), 4096));
|
||||
scratchPad->memory = scratchPadMem.memory + (i * scratchPadSize);
|
||||
|
||||
auto* p = reinterpret_cast<uint8_t*>(allocateExecutableMemory(0x4000));
|
||||
scratchPad->generated_code = reinterpret_cast<cn_mainloop_fun_ms_abi>(p);
|
||||
scratchPad->generated_code_double = reinterpret_cast<cn_mainloop_double_fun_ms_abi>(p + 0x2000);
|
||||
|
||||
scratchPad->generated_code_data.variant = PowVariant::LAST_ITEM;
|
||||
scratchPad->generated_code_data.height = (uint64_t)(-1);
|
||||
scratchPad->generated_code_double_data = scratchPad->generated_code_data;
|
||||
|
||||
scratchPads[i] = scratchPad;
|
||||
}
|
||||
|
||||
|
|
|
@ -75,6 +75,9 @@ public:
|
|||
static ScratchPadMem create(ScratchPad** scratchPads, int threadId);
|
||||
static void release(ScratchPad** scratchPads, ScratchPadMem& scratchPadMem, int threadId);
|
||||
|
||||
static void *allocateExecutableMemory(size_t size);
|
||||
static void flushInstructionCache(void *p, size_t size);
|
||||
|
||||
static inline size_t hashFactor() { return m_hashFactor; }
|
||||
static inline size_t getThreadHashFactor(int threadId)
|
||||
{
|
||||
|
|
|
@ -86,3 +86,19 @@ void Mem::release(ScratchPadMem &scratchPadMem)
|
|||
_mm_free(scratchPadMem.memory);
|
||||
}
|
||||
}
|
||||
|
||||
void *Mem::allocateExecutableMemory(size_t size)
|
||||
{
|
||||
# if defined(__APPLE__)
|
||||
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
# else
|
||||
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
# endif
|
||||
}
|
||||
|
||||
void Mem::flushInstructionCache(void *p, size_t size)
|
||||
{
|
||||
# ifndef __FreeBSD__
|
||||
__builtin___clear_cache(reinterpret_cast<char*>(p), reinterpret_cast<char*>(p) + size);
|
||||
# endif
|
||||
}
|
||||
|
|
|
@ -182,4 +182,14 @@ void Mem::release(ScratchPadMem &scratchPadMem)
|
|||
else {
|
||||
_mm_free(scratchPadMem.memory);
|
||||
}
|
||||
}
|
||||
|
||||
void *Mem::allocateExecutableMemory(size_t size)
|
||||
{
|
||||
return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
|
||||
}
|
||||
|
||||
void Mem::flushInstructionCache(void *p, size_t size)
|
||||
{
|
||||
::FlushInstructionCache(GetCurrentProcess(), p, size);
|
||||
}
|
|
@ -73,7 +73,7 @@ Options:\n"
|
|||
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
|
||||
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
|
||||
-R, --retry-pause=N time to pause between retries (default: 5)\n\
|
||||
--pow-variant=V specificy the PoW variat to use: -> 'auto' (default), '0' (v0), '1' (v1, aka cnv7), '2' (v2, aka cnv8), 'ipbc' (tube), 'xao', 'xtl' (including autodetect for > v5), 'rto', 'xfh', 'upx', 'turtle', 'hosp'\n\
|
||||
--pow-variant=V specificy the PoW variat to use: \n'auto' (default), '0', '1', '2', 'ipbc', 'xao', 'xtl', 'rto', 'xfh', 'upx', 'turtle', 'hosp', 'r', 'wow', 'double (xcash)', 'zls' (zelerius), 'rwz' (graft)\n\
|
||||
for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations\n\
|
||||
--asm-optimization=V specificy the ASM optimization to use: -> 'auto' (default), 'intel', 'ryzen', 'bulldozer', 'off' \n\
|
||||
--multihash-factor=N number of hash blocks to process at a time (don't set or 0 enables automatic selection of optimal number of hash blocks)\n\
|
||||
|
@ -92,7 +92,8 @@ Options:\n"
|
|||
--api-access-token=T access token for API\n\
|
||||
--api-worker-id=ID custom worker-id for API\n\
|
||||
--reboot-cmd command/bat to execute to Reboot miner\n\
|
||||
--force-pow-variant disable pow/variant parsing from pool\n"
|
||||
--force-pow-variant skip pow/variant parsing from pool\n\
|
||||
--skip-self-check skip self check on startup\n"
|
||||
# ifndef XMRIG_NO_CC
|
||||
"\
|
||||
--cc-url=URL url of the CC Server\n\
|
||||
|
@ -179,6 +180,7 @@ static struct option const options[] = {
|
|||
{ "force-pow-variant", 0, nullptr, 1016 },
|
||||
{ "pow-variant", 1, nullptr, 1017 },
|
||||
{ "variant", 1, nullptr, 1017 },
|
||||
{ "skip-self-check", 0, nullptr, 1018 },
|
||||
{ "api-port", 1, nullptr, 4000 },
|
||||
{ "api-access-token", 1, nullptr, 4001 },
|
||||
{ "api-worker-id", 1, nullptr, 4002 },
|
||||
|
@ -237,6 +239,7 @@ static struct option const config_options[] = {
|
|||
{ "force-pow-variant", 0, nullptr, 1016 },
|
||||
{ "pow-variant", 1, nullptr, 1017 },
|
||||
{ "variant", 1, nullptr, 1017 },
|
||||
{ "skip-self-check", 0, nullptr, 1018 },
|
||||
{ "doublehash-thread-mask", 1, nullptr, 4013 },
|
||||
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
||||
{ "asm-optimization", 1, nullptr, 4020 },
|
||||
|
@ -331,7 +334,10 @@ constexpr static const char *pow_variant_names[] = {
|
|||
"fast2",
|
||||
"upx",
|
||||
"turtle",
|
||||
"hosp"
|
||||
"hosp",
|
||||
"wow",
|
||||
"r",
|
||||
"xcash"
|
||||
};
|
||||
|
||||
constexpr static const char *asm_optimization_names[] = {
|
||||
|
@ -380,6 +386,7 @@ Options::Options(int argc, char **argv) :
|
|||
m_ccPushPeriodicStatus(false),
|
||||
m_ccPushZeroHashrateMiners(false),
|
||||
m_forcePowVariant(false),
|
||||
m_skipSelfCheck(false),
|
||||
m_fileName(Platform::defaultConfigName()),
|
||||
m_apiToken(nullptr),
|
||||
m_apiWorkerId(nullptr),
|
||||
|
@ -643,11 +650,14 @@ bool Options::parseArg(int key, const char *arg)
|
|||
return parseBoolean(key, true);
|
||||
|
||||
case 1016: /* --force-pow-variant */
|
||||
return parseBoolean(key, false);
|
||||
return parseBoolean(key, true);
|
||||
|
||||
case 1017: /* --pow-variant/--variant */
|
||||
return parsePowVariant(arg);
|
||||
|
||||
case 1018: /* --skip-self-check */
|
||||
return parseBoolean(key, true);
|
||||
|
||||
case 4016: /* --cc-use-tls */
|
||||
return parseBoolean(key, true);
|
||||
|
||||
|
@ -912,6 +922,10 @@ bool Options::parseBoolean(int key, bool enable)
|
|||
m_forcePowVariant = enable;
|
||||
break;
|
||||
|
||||
case 1018: /* --skip-self-check */
|
||||
m_skipSelfCheck = enable;
|
||||
break;
|
||||
|
||||
case 2000: /* --colors */
|
||||
m_colors = enable;
|
||||
break;
|
||||
|
@ -1206,6 +1220,31 @@ bool Options::parsePowVariant(const char *powVariant)
|
|||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && !strcmp(powVariant, "wow")) {
|
||||
m_powVariant = POW_WOW;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "4") || !strcmp(powVariant, "r") || !strcmp(powVariant, "cnv4") || !strcmp(powVariant, "cnv5"))) {
|
||||
m_powVariant = POW_V4;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "xcash") || !strcmp(powVariant, "heavyx") || !strcmp(powVariant, "double"))) {
|
||||
m_powVariant = POW_DOUBLE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "zelerius") || !strcmp(powVariant, "zls") || !strcmp(powVariant, "zlx"))) {
|
||||
m_powVariant = POW_ZELERIUS;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "rwz") || !strcmp(powVariant, "graft"))) {
|
||||
m_powVariant = POW_RWZ;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == ARRAY_SIZE(pow_variant_names) - 1) {
|
||||
showUsage(1);
|
||||
return false;
|
||||
|
|
|
@ -84,7 +84,8 @@ public:
|
|||
inline bool ccPushZeroHashrateMiners() const { return m_ccPushZeroHashrateMiners; }
|
||||
inline bool ccUsePushover() const { return ccPushoverUser() && ccPushoverToken(); }
|
||||
inline bool ccUseTelegram() const { return ccTelegramBotToken() && ccTelegramChatId(); }
|
||||
inline bool forcePowVariant() const { return m_forcePowVariant; };
|
||||
inline bool forcePowVariant() const { return m_forcePowVariant; }
|
||||
inline bool skipSelfCheck() const { return m_skipSelfCheck; }
|
||||
inline const char *fileName() const { return m_fileName; }
|
||||
inline const char *apiToken() const { return m_apiToken; }
|
||||
inline const char *apiWorkerId() const { return m_apiWorkerId; }
|
||||
|
@ -171,6 +172,7 @@ private:
|
|||
bool m_ccPushPeriodicStatus;
|
||||
bool m_ccPushZeroHashrateMiners;
|
||||
bool m_forcePowVariant;
|
||||
bool m_skipSelfCheck;
|
||||
const char* m_fileName;
|
||||
char *m_apiToken;
|
||||
char *m_apiWorkerId;
|
||||
|
|
|
@ -39,6 +39,11 @@ enum PowVariant
|
|||
POW_UPX,
|
||||
POW_TURTLE,
|
||||
POW_HOSP,
|
||||
POW_WOW,
|
||||
POW_V4,
|
||||
POW_DOUBLE,
|
||||
POW_ZELERIUS,
|
||||
POW_RWZ,
|
||||
LAST_ITEM
|
||||
};
|
||||
|
||||
|
@ -74,6 +79,16 @@ inline std::string getPowVariantName(PowVariant powVariant)
|
|||
return "turtle";
|
||||
case POW_HOSP:
|
||||
return "hosp";
|
||||
case POW_WOW:
|
||||
return "wow";
|
||||
case POW_V4:
|
||||
return "r";
|
||||
case POW_DOUBLE:
|
||||
return "double";
|
||||
case POW_ZELERIUS:
|
||||
return "zls";
|
||||
case POW_RWZ:
|
||||
return "rwz";
|
||||
case POW_AUTODETECT:
|
||||
default:
|
||||
return "-1";
|
||||
|
@ -149,6 +164,16 @@ inline PowVariant parseVariant(const std::string variant)
|
|||
powVariant = PowVariant::POW_TURTLE;
|
||||
} else if (variant == "hosp" || variant == "hospital") {
|
||||
powVariant = PowVariant::POW_HOSP;
|
||||
} else if (variant == "wow" || variant == "wownero") {
|
||||
powVariant = PowVariant::POW_WOW;
|
||||
} else if (variant == "r" || variant == "4" || variant == "cnv4" || variant == "cnv5") {
|
||||
powVariant = PowVariant::POW_V4;
|
||||
} else if (variant == "xcash" || variant == "heavyx" || variant == "double") {
|
||||
powVariant = PowVariant::POW_DOUBLE;
|
||||
} else if (variant == "zelerius" || variant == "zls" || variant == "zlx") {
|
||||
powVariant = PowVariant::POW_ZELERIUS;
|
||||
} else if (variant == "rwz" || variant == "graft") {
|
||||
powVariant = PowVariant::POW_RWZ;
|
||||
}
|
||||
|
||||
return powVariant;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
||||
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
||||
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy (xao), xtl (including autodetect for > v5), msr, xhv, rto, xfh, upx, turtle, hosp
|
||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), '0', '1', '2', 'ipbc', 'xao', 'xtl', 'rto', 'xfh', 'upx', 'turtle', 'hosp', 'r', 'wow', 'double (xcash)', 'zls' (zelerius), 'rwz' (graft)
|
||||
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
||||
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, bulldozer, off
|
||||
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
||||
|
@ -21,6 +21,7 @@
|
|||
"syslog": false, // use system log for output messages
|
||||
"reboot-cmd" : "", // command to execute to reboot the OS
|
||||
"force-pow-variant" : false, // force pow variant, dont parse pow/variant from pool job
|
||||
"skip-self-check" : false, // skip the self check on startup
|
||||
"pools": [
|
||||
{
|
||||
"url": "donate2.graef.in:80", // URL of mining server
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <Mem.h>
|
||||
#include "crypto/CryptoNight.h"
|
||||
|
||||
#if defined(XMRIG_ARM)
|
||||
|
@ -34,282 +35,398 @@
|
|||
#include "crypto/CryptoNight_test.h"
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
if (powVersion == PowVariant::POW_V1) {
|
||||
if (variant == PowVariant::POW_V1) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_V2) {
|
||||
} else if (variant == PowVariant::POW_V2) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
|
||||
(asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
|
||||
(asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_ALLOY) {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_XTL) {
|
||||
} else if (variant == PowVariant::POW_V4) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
|
||||
#else
|
||||
if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V4, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_WOW) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
|
||||
#else
|
||||
if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_ALLOY) {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ALLOY, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
} else if (variant == PowVariant::POW_XTL) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_FAST_2) {
|
||||
} else if (variant == PowVariant::POW_FAST_2) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
|
||||
(asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
|
||||
(asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_MSR) {
|
||||
} else if (variant == PowVariant::POW_DOUBLE) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
|
||||
(asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
|
||||
(asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_ZELERIUS) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
|
||||
(asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
|
||||
(asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_RWZ) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false,POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if ((asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2)) {
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_MSR) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_RTO || powVersion == PowVariant::POW_HOSP) {
|
||||
} else if (variant == PowVariant::POW_RTO) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RTO, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_XFH) {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
if (powVersion == PowVariant::POW_V1) {
|
||||
} else if (variant == PowVariant::POW_HOSP) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_HOSP, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_V2) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_FAST_2) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_ALLOY) {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_XTL) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_MSR) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_RTO || powVersion == PowVariant::POW_HOSP) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_XFH) {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
||||
} else if (variant == PowVariant::POW_XFH) {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
if (powVersion == PowVariant::POW_V1) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_UPX) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
if (powVersion == PowVariant::POW_V1) {
|
||||
static void cryptonight_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
if (variant == PowVariant::POW_V1) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (powVersion == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
} else if (powVersion == PowVariant::POW_UPX) {
|
||||
} else if (variant == PowVariant::POW_V2) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_V4) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
|
||||
#else
|
||||
if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V4, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_WOW) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_WOW,NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
|
||||
#else
|
||||
if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_FAST_2) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_DOUBLE) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_ZELERIUS) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_RWZ) {
|
||||
CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
} else if (variant == PowVariant::POW_ALLOY) {
|
||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ALLOY, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
} else if (variant == PowVariant::POW_XTL) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_MSR) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_RTO) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RTO, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_HOSP) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_HOSP, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_XFH) {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_super_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
if (variant == PowVariant::POW_V1) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_TUBE, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
} else if (variant == PowVariant::POW_UPX) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
if (variant == PowVariant::POW_V1) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else if (variant == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_TUBE, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||
} else if (variant == PowVariant::POW_UPX) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_super_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_super_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_super_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_ultra_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_ultra_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
|
||||
(asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
|
||||
(asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_ultra_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_ultra_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
#if defined(XMRIG_ARM)
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
#else
|
||||
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
|
||||
} else {
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
# if !defined(XMRIG_ARMv7)
|
||||
if (powVersion == PowVariant::POW_XHV) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
||||
if (variant == PowVariant::POW_XHV) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, POW_XHV, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
|
||||
}
|
||||
else if (powVersion == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
|
||||
else if (variant == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, POW_TUBE, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
|
||||
}
|
||||
else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
template <size_t NUM_HASH_BLOCKS>
|
||||
static void cryptonight_heavy_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
if (powVersion == PowVariant::POW_XHV) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
|
||||
static void cryptonight_heavy_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
|
||||
if (variant == PowVariant::POW_XHV) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, POW_XHV, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
|
||||
}
|
||||
else if (powVersion == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
|
||||
else if (variant == PowVariant::POW_TUBE) {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, POW_TUBE, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
|
||||
}
|
||||
else {
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
|
||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
|
||||
}
|
||||
}
|
||||
|
||||
void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
|
||||
void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
|
||||
|
||||
template <size_t HASH_FACTOR>
|
||||
void setCryptoNightHashMethods(Options::Algo algo, bool aesni)
|
||||
|
@ -377,15 +494,16 @@ bool CryptoNight::init(int algo, bool aesni)
|
|||
}
|
||||
|
||||
setCryptoNightHashMethods<MAX_NUM_HASH_BLOCKS>(static_cast<Options::Algo>(algo), aesni);
|
||||
return selfTest(algo);
|
||||
|
||||
return Options::i()->skipSelfCheck() ? true : selfCheck(algo);
|
||||
}
|
||||
|
||||
void CryptoNight::hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
|
||||
void CryptoNight::hash(size_t factor, AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
|
||||
{
|
||||
cryptonight_hash_ctx[factor-1](asmOptimization, powVersion, input, size, output, scratchPad);
|
||||
cryptonight_hash_ctx[factor-1](asmOptimization, height, variant, input, size, output, scratchPad);
|
||||
}
|
||||
|
||||
bool CryptoNight::selfTest(int algo)
|
||||
bool CryptoNight::selfCheck(int algo)
|
||||
{
|
||||
if (cryptonight_hash_ctx[0] == nullptr
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
|
@ -413,6 +531,14 @@ bool CryptoNight::selfTest(int algo)
|
|||
ScratchPad* scratchPad = static_cast<ScratchPad *>(_mm_malloc(sizeof(ScratchPad), 4096));
|
||||
scratchPad->memory = (uint8_t *) _mm_malloc(MEMORY * 6, 16);
|
||||
|
||||
auto* p = reinterpret_cast<uint8_t*>(Mem::allocateExecutableMemory(0x4000));
|
||||
scratchPad->generated_code = reinterpret_cast<cn_mainloop_fun_ms_abi>(p);
|
||||
scratchPad->generated_code_double = reinterpret_cast<cn_mainloop_double_fun_ms_abi>(p + 0x2000);
|
||||
|
||||
scratchPad->generated_code_data.variant = PowVariant::LAST_ITEM;
|
||||
scratchPad->generated_code_data.height = (uint64_t)(-1);
|
||||
scratchPad->generated_code_double_data = scratchPad->generated_code_data;
|
||||
|
||||
scratchPads[i] = scratchPad;
|
||||
}
|
||||
|
||||
|
@ -427,129 +553,128 @@ bool CryptoNight::selfTest(int algo)
|
|||
if (algo == Options::ALGO_CRYPTONIGHT_HEAVY) {
|
||||
// cn-heavy
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 96) == 0;
|
||||
#endif
|
||||
|
||||
// cn-heavy haven
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 96) == 0;
|
||||
#endif
|
||||
|
||||
// cn-heavy bittube
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 96) == 0;
|
||||
#endif
|
||||
|
||||
} else if (algo == Options::ALGO_CRYPTONIGHT_LITE) {
|
||||
// cn-lite v0
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v0_lite, 160) == 0;
|
||||
#endif
|
||||
|
||||
// cn-lite v7 tests
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 32) == 0;
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_v1_lite, 160) == 0;
|
||||
#endif
|
||||
|
||||
|
||||
// cn-lite ibpc tests
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 160) == 0;
|
||||
#endif
|
||||
|
||||
// cn-lite upx
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_UPX, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_UPX, test_input, 76, output, scratchPads);
|
||||
resultLite = resultLite && memcmp(output, test_output_upx, 32) == 0;
|
||||
|
||||
} else if (algo == Options::ALGO_CRYPTONIGHT_SUPERLITE) {
|
||||
|
@ -559,123 +684,173 @@ bool CryptoNight::selfTest(int algo)
|
|||
} else if (algo == Options::ALGO_CRYPTONIGHT_ULTRALITE) {
|
||||
// cn ultralite (cnv8 + turtle)
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
|
||||
resultUltraLite = resultUltraLite && memcmp(output, test_output_turtle, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
|
||||
resultUltraLite = resultUltraLite && memcmp(output, test_output_turtle, 64) == 0;
|
||||
#endif
|
||||
} else {
|
||||
// cn v0 aka orignal
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0,test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0,test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v0, 160) == 0;
|
||||
#endif
|
||||
|
||||
// cn v7 aka cnv1
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v1, 160) == 0;
|
||||
#endif
|
||||
|
||||
// cnv7 + xtl
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XTL,test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_XTL,test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xtl, 32) == 0;
|
||||
|
||||
// cnv7 + msr aka cn-fast
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_MSR,test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_MSR,test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_msr, 32) == 0;
|
||||
|
||||
// cnv7 + alloy
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_ALLOY,test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_ALLOY,test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_alloy, 32) == 0;
|
||||
|
||||
// cnv7 + hosp/rto
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_HOSP,test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_HOSP,test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_hosp, 32) == 0;
|
||||
|
||||
// cnv8 aka cnv2
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v2, 160) == 0;
|
||||
#endif
|
||||
|
||||
// cn xfh aka cn-heavy-superfast
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XFH, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_XFH, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xfh, 32) == 0;
|
||||
|
||||
// cnv8 + xtl aka cn-fast2
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xtl_v9, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xtl_v9, 64) == 0;
|
||||
#endif
|
||||
|
||||
// cnv8 + xcash
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_DOUBLE, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_xcash, 32) == 0;
|
||||
|
||||
// cnv8 + zelerius
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_ZELERIUS, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_zelerius, 32) == 0;
|
||||
|
||||
// cnv8 + rwz
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_RWZ, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_rwz, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_RWZ, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_rwz, 64) == 0;
|
||||
#endif
|
||||
|
||||
// cnv9 aka cnv4 aka cnv5 aka cnr
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v4, 32) == 0;
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 1
|
||||
cryptonight_hash_ctx[1](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v4, 64) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 2
|
||||
cryptonight_hash_ctx[2](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v4, 96) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 3
|
||||
cryptonight_hash_ctx[3](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v4, 128) == 0;
|
||||
#endif
|
||||
|
||||
#if MAX_NUM_HASH_BLOCKS > 4
|
||||
cryptonight_hash_ctx[4](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v4, 160) == 0;
|
||||
#endif
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, 10001, PowVariant::POW_V4, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v4_1, 32) == 0;
|
||||
|
||||
cryptonight_hash_ctx[0](asmOptimization, 10002, PowVariant::POW_V4, test_input, 76, output, scratchPads);
|
||||
result = result && memcmp(output, test_output_v4_2, 32) == 0;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < MAX_NUM_HASH_BLOCKS; ++i) {
|
||||
|
|
|
@ -42,8 +42,25 @@
|
|||
#define POW_DEFAULT_INDEX_SHIFT 3
|
||||
#define POW_XLT_V4_INDEX_SHIFT 4
|
||||
|
||||
#if defined _MSC_VER || defined XMRIG_ARM
|
||||
#define ABI_ATTRIBUTE
|
||||
#else
|
||||
#define ABI_ATTRIBUTE __attribute__((ms_abi))
|
||||
#endif
|
||||
|
||||
struct ScratchPad;
|
||||
typedef void(*cn_mainloop_fun_ms_abi)(ScratchPad*) ABI_ATTRIBUTE;
|
||||
typedef void(*cn_mainloop_double_fun_ms_abi)(ScratchPad*, ScratchPad*) ABI_ATTRIBUTE;
|
||||
|
||||
struct cryptonight_r_data {
|
||||
int variant;
|
||||
uint64_t height;
|
||||
|
||||
bool match(const int v, const uint64_t h) const { return (v == variant) && (h == height); }
|
||||
};
|
||||
|
||||
struct ScratchPad {
|
||||
alignas(16) uint8_t state[224]; // 224 instead of 200 to maintain aligned to 16 byte boundaries
|
||||
alignas(16) uint8_t state[224];
|
||||
alignas(16) uint8_t* memory;
|
||||
|
||||
// Additional stuff for asm impl
|
||||
|
@ -51,6 +68,11 @@ struct ScratchPad {
|
|||
const void* input;
|
||||
uint8_t* variant_table;
|
||||
const uint32_t* t_fn;
|
||||
|
||||
cn_mainloop_fun_ms_abi generated_code;
|
||||
cn_mainloop_double_fun_ms_abi generated_code_double;
|
||||
cryptonight_r_data generated_code_data;
|
||||
cryptonight_r_data generated_code_double_data;
|
||||
};
|
||||
|
||||
alignas(64) static uint8_t variant1_table[256];
|
||||
|
@ -63,12 +85,12 @@ class CryptoNight
|
|||
{
|
||||
public:
|
||||
static bool init(int algo, bool aesni);
|
||||
static void hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
|
||||
static void hash(size_t factor, AsmOptimization asmOptimization, uint64_t height, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
|
||||
|
||||
public:
|
||||
|
||||
private:
|
||||
static bool selfTest(int algo);
|
||||
static bool selfCheck(int algo);
|
||||
};
|
||||
|
||||
|
||||
|
|
190
src/crypto/CryptoNightR_gen.cpp
Normal file
190
src/crypto/CryptoNightR_gen.cpp
Normal file
|
@ -0,0 +1,190 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
|
||||
typedef void(*void_func)();
|
||||
|
||||
#include "crypto/asm/CryptonightR_template.h"
|
||||
#include "Mem.h"
|
||||
|
||||
#if !defined XMRIG_ARM && !defined XMRIG_NO_ASM
|
||||
|
||||
#include "crypto/CryptoNight_x86.h"
|
||||
|
||||
static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)())
|
||||
{
|
||||
const ptrdiff_t size = reinterpret_cast<const uint8_t*>(p2) - reinterpret_cast<const uint8_t*>(p1);
|
||||
if (size > 0) {
|
||||
memcpy(p, reinterpret_cast<void*>(p1), size);
|
||||
p += size;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, AsmOptimization ASM)
|
||||
{
|
||||
uint32_t prev_rot_src = (uint32_t)(-1);
|
||||
|
||||
for (int i = 0;; ++i) {
|
||||
const V4_Instruction inst = code[i];
|
||||
if (inst.opcode == RET) {
|
||||
break;
|
||||
}
|
||||
|
||||
uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
|
||||
uint8_t dst_index = inst.dst_index;
|
||||
uint8_t src_index = inst.src_index;
|
||||
|
||||
const uint32_t a = inst.dst_index;
|
||||
const uint32_t b = inst.src_index;
|
||||
const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
|
||||
|
||||
switch (inst.opcode) {
|
||||
case ROR:
|
||||
case ROL:
|
||||
if (b != prev_rot_src) {
|
||||
prev_rot_src = b;
|
||||
add_code(p, instructions_mov[c], instructions_mov[c + 1]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (a == prev_rot_src) {
|
||||
prev_rot_src = (uint32_t)(-1);
|
||||
}
|
||||
|
||||
void_func begin = instructions[c];
|
||||
|
||||
if ((ASM = ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) {
|
||||
// AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
|
||||
// Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
|
||||
uint8_t* prefix = reinterpret_cast<uint8_t*>(begin);
|
||||
|
||||
if (*prefix == 0x49) {
|
||||
*(p++) = 0x41;
|
||||
}
|
||||
|
||||
begin = reinterpret_cast<void_func>(prefix + 1);
|
||||
}
|
||||
|
||||
add_code(p, begin, instructions[c + 1]);
|
||||
|
||||
if (inst.opcode == ADD) {
|
||||
*(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
|
||||
if (is_64_bit) {
|
||||
prev_rot_src = (uint32_t)(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
|
||||
{
|
||||
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(p, CryptonightWOW_template_part1, CryptonightWOW_template_part2);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightWOW_template_part2, CryptonightWOW_template_part3);
|
||||
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_mainloop) - ((const uint8_t*)CryptonightWOW_template_part1)) - (p - p0));
|
||||
add_code(p, CryptonightWOW_template_part3, CryptonightWOW_template_end);
|
||||
|
||||
Mem::flushInstructionCache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
|
||||
{
|
||||
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(p, CryptonightR_template_part1, CryptonightR_template_part2);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightR_template_part2, CryptonightR_template_part3);
|
||||
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
|
||||
add_code(p, CryptonightR_template_part3, CryptonightR_template_end);
|
||||
|
||||
Mem::flushInstructionCache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
|
||||
{
|
||||
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(p, CryptonightWOW_template_double_part1, CryptonightWOW_template_double_part2);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightWOW_template_double_part2, CryptonightWOW_template_double_part3);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightWOW_template_double_part3, CryptonightWOW_template_double_part4);
|
||||
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_double_mainloop) - ((const uint8_t*)CryptonightWOW_template_double_part1)) - (p - p0));
|
||||
add_code(p, CryptonightWOW_template_double_part4, CryptonightWOW_template_double_end);
|
||||
|
||||
Mem::flushInstructionCache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
|
||||
{
|
||||
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4);
|
||||
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0));
|
||||
add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end);
|
||||
|
||||
Mem::flushInstructionCache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
|
||||
{
|
||||
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3);
|
||||
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0));
|
||||
add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end);
|
||||
|
||||
Mem::flushInstructionCache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
|
||||
{
|
||||
uint8_t* p0 = reinterpret_cast<uint8_t*>(machine_code);
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
|
||||
add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
|
||||
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
|
||||
add_code(p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
|
||||
|
||||
Mem::flushInstructionCache(machine_code, p - p0);
|
||||
}
|
||||
#endif
|
File diff suppressed because it is too large
Load diff
|
@ -138,6 +138,53 @@ const static uint8_t test_output_xtl_v9[64] = {
|
|||
0xF1, 0xF0, 0x55, 0x34, 0x15, 0x29, 0x93, 0x04, 0x2D, 0xED, 0xD2, 0x33, 0x50, 0x6E, 0xBE, 0x25
|
||||
};
|
||||
|
||||
// CN XCASH
|
||||
const static uint8_t test_output_xcash[32] = {
|
||||
0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,
|
||||
0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21
|
||||
};
|
||||
|
||||
// CN ZELERIUS
|
||||
const static uint8_t test_output_zelerius[32] = {
|
||||
0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E,
|
||||
0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2
|
||||
};
|
||||
|
||||
// CN RWZ
|
||||
const static uint8_t test_output_rwz[64] = {
|
||||
0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85,
|
||||
0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8,
|
||||
0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52,
|
||||
0x49, 0x54, 0x0b, 0x91, 0xea, 0x61, 0x7f, 0x98, 0x7d, 0x39, 0x17, 0xb7, 0xd7, 0x65, 0xff, 0x75
|
||||
};
|
||||
|
||||
// CN V9 aka CN V4/V5 aka CN-R (height 10000)
|
||||
const static uint8_t test_output_v4[160] = {
|
||||
0x90, 0x20, 0x14, 0x86, 0x1E, 0xCD, 0x01, 0xC5, 0x43, 0xB5, 0x61, 0xFA, 0xC8, 0x3D, 0xFF, 0x7D,
|
||||
0x76, 0x67, 0xC2, 0xD7, 0xB3, 0xD4, 0xE3, 0x4B, 0x4C, 0x7E, 0x6D, 0x04, 0x31, 0x79, 0xE6, 0x96,
|
||||
0xEA, 0xF4, 0x14, 0x76, 0x38, 0x94, 0x7C, 0xCE, 0x02, 0x50, 0x7A, 0x31, 0xB8, 0x4D, 0xDD, 0x3B,
|
||||
0x92, 0xAA, 0xC6, 0x49, 0xA1, 0x64, 0xA1, 0xA8, 0x7C, 0xD9, 0x43, 0x14, 0xC5, 0x12, 0x86, 0x61,
|
||||
0x0A, 0x18, 0xBD, 0x11, 0x36, 0x06, 0x31, 0x0D, 0x9D, 0xC0, 0x8C, 0x41, 0x88, 0xCB, 0x7C, 0xE9,
|
||||
0x5D, 0xD2, 0xBA, 0xA5, 0xFB, 0x0D, 0x2B, 0xA6, 0x6E, 0x7C, 0x78, 0x72, 0x38, 0xFE, 0x53, 0x17,
|
||||
0x1A, 0x96, 0x89, 0x0E, 0x14, 0xFF, 0x34, 0x42, 0xC0, 0x5A, 0xAB, 0xC0, 0x3F, 0x39, 0x4E, 0x43,
|
||||
0x91, 0x38, 0x67, 0x79, 0x5B, 0xAE, 0xCC, 0xA7, 0xDB, 0x4C, 0xFE, 0x8B, 0x75, 0x76, 0x1F, 0xC4,
|
||||
0x98, 0x71, 0xE6, 0xC1, 0x08, 0x9D, 0xED, 0xCC, 0x47, 0xC3, 0xF3, 0x7A, 0xA9, 0x4A, 0x3A, 0xB9,
|
||||
0xAC, 0xB8, 0x5C, 0x9F, 0xCC, 0xCB, 0xC1, 0x93, 0x9E, 0xC6, 0x6D, 0xCC, 0x45, 0xF4, 0xBA, 0xBD
|
||||
};
|
||||
|
||||
// CN V9 aka CN V4/V5 aka CN-R (height 10001)
|
||||
const static uint8_t test_output_v4_1[32] = {
|
||||
0x82, 0x58, 0x7D, 0x63, 0x7B, 0x6C, 0x0C, 0x96, 0x6A, 0x50, 0xF6, 0xC0, 0xAB, 0xB5, 0xEA, 0x1A,
|
||||
0x58, 0x2B, 0xEA, 0x7E, 0xF0, 0x2F, 0x3C, 0xA1, 0x7C, 0x1C, 0x7C, 0x2E, 0xF9, 0xE5, 0x66, 0xF2
|
||||
};
|
||||
|
||||
// CN V9 aka CN V4/V5 aka CN-R (height 10002)
|
||||
const static uint8_t test_output_v4_2[32] = {
|
||||
0x64, 0xB2, 0x4E, 0x48, 0x4A, 0x28, 0xBF, 0x11, 0xC4, 0x8A, 0x68, 0xE7, 0xB7, 0x4B, 0xFD, 0xA7,
|
||||
0xFB, 0x95, 0x66, 0x05, 0x0C, 0xF7, 0xFA, 0xA7, 0x4B, 0xD9, 0x18, 0x59, 0x88, 0x7F, 0x47, 0xA2
|
||||
};
|
||||
|
||||
|
||||
// CN-LITE
|
||||
const static uint8_t test_output_v0_lite[160] = {
|
||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||
|
|
File diff suppressed because it is too large
Load diff
279
src/crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
279
src/crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
|
@ -0,0 +1,279 @@
|
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
pxor xmm6, xmm1
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
movaps xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movd r9d, xmm0
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov edi, edi
|
||||
shl rbp, 32
|
||||
or rbp, rdi
|
||||
xor r8, rbp
|
||||
|
||||
mov ebx, ebx
|
||||
shl rsi, 32
|
||||
or rsi, rbx
|
||||
xor QWORD PTR [rsp+320], rsi
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm1
|
||||
paddq xmm1, xmm7
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_end):
|
1595
src/crypto/asm/CryptonightR_template.S
Normal file
1595
src/crypto/asm/CryptonightR_template.S
Normal file
File diff suppressed because it is too large
Load diff
1087
src/crypto/asm/CryptonightR_template.h
Normal file
1087
src/crypto/asm/CryptonightR_template.h
Normal file
File diff suppressed because it is too large
Load diff
529
src/crypto/asm/CryptonightR_template.inc
Normal file
529
src/crypto/asm/CryptonightR_template.inc
Normal file
|
@ -0,0 +1,529 @@
|
|||
PUBLIC FN_PREFIX(CryptonightR_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_end)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_part1):
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rdi
|
||||
sub rsp, 64
|
||||
mov r12, rcx
|
||||
mov r8, QWORD PTR [r12+32]
|
||||
mov rdx, r12
|
||||
xor r8, QWORD PTR [r12]
|
||||
mov r15, QWORD PTR [r12+40]
|
||||
mov r9, r8
|
||||
xor r15, QWORD PTR [r12+8]
|
||||
mov r11, QWORD PTR [r12+224]
|
||||
mov r12, QWORD PTR [r12+56]
|
||||
xor r12, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm0, r12
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
movaps XMMWORD PTR [rsp], xmm9
|
||||
mov r12, QWORD PTR [rdx+88]
|
||||
xor r12, QWORD PTR [rdx+72]
|
||||
movq xmm6, rax
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
and r9d, 2097136
|
||||
movq xmm0, r12
|
||||
movq xmm7, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r10d, r9d
|
||||
movq xmm9, rsp
|
||||
mov rsp, r8
|
||||
mov r8d, 524288
|
||||
|
||||
mov ebx, [rdx+96]
|
||||
mov esi, [rdx+100]
|
||||
mov edi, [rdx+104]
|
||||
mov ebp, [rdx+108]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_mainloop):
|
||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
||||
movq xmm0, r15
|
||||
movq xmm4, rsp
|
||||
punpcklqdq xmm4, xmm0
|
||||
lea rdx, QWORD PTR [r9+r11]
|
||||
|
||||
aesenc xmm5, xmm4
|
||||
|
||||
mov r12d, r9d
|
||||
mov eax, r9d
|
||||
xor r9d, 48
|
||||
xor r12d, 16
|
||||
xor eax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||
movaps xmm3, xmm0
|
||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm5, xmm1
|
||||
pxor xmm5, xmm0
|
||||
paddq xmm3, xmm7
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm1, xmm4
|
||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||
|
||||
movq r12, xmm5
|
||||
movd r10d, xmm5
|
||||
and r10d, 2097136
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
|
||||
lea r13d, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or r13, rdx
|
||||
|
||||
xor r13, QWORD PTR [r10+r11]
|
||||
mov r14, QWORD PTR [r10+r11+8]
|
||||
|
||||
movd eax, xmm6
|
||||
movd edx, xmm7
|
||||
pextrd r9d, xmm7, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_part2):
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor rsp, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov rax, r13
|
||||
mul r12
|
||||
|
||||
mov r9d, r10d
|
||||
mov r12d, r10d
|
||||
xor r9d, 16
|
||||
xor r12d, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
||||
movaps xmm3, xmm1
|
||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqa xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm1, xmm2
|
||||
pxor xmm5, xmm0
|
||||
pxor xmm5, xmm1
|
||||
paddq xmm3, xmm4
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqu XMMWORD PTR [r12+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
add r15, rax
|
||||
add rsp, rdx
|
||||
xor r10, 48
|
||||
mov QWORD PTR [r10+r11], rsp
|
||||
xor rsp, r13
|
||||
mov r9d, esp
|
||||
mov QWORD PTR [r10+r11+8], r15
|
||||
and r9d, 2097136
|
||||
xor r15, r14
|
||||
movdqa xmm6, xmm5
|
||||
dec r8d
|
||||
jnz FN_PREFIX(CryptonightR_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_template_part3):
|
||||
movq rsp, xmm9
|
||||
|
||||
mov rbx, QWORD PTR [rsp+136]
|
||||
mov rbp, QWORD PTR [rsp+144]
|
||||
mov rsi, QWORD PTR [rsp+152]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
||||
movaps xmm9, XMMWORD PTR [rsp]
|
||||
add rsp, 64
|
||||
pop rdi
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightR_template_end):
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_double_part1):
|
||||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 320
|
||||
mov r14, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r14, QWORD PTR [rcx]
|
||||
mov r12, QWORD PTR [rcx+40]
|
||||
mov ebx, r14d
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
and ebx, 2097136
|
||||
xor r12, QWORD PTR [rcx+8]
|
||||
mov rcx, QWORD PTR [rcx+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov r15, QWORD PTR [rdx+32]
|
||||
xor r15, QWORD PTR [rdx]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
mov r13, QWORD PTR [rdx+40]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
xor r13, QWORD PTR [rdx+8]
|
||||
movaps XMMWORD PTR [rsp+160], xmm6
|
||||
movaps XMMWORD PTR [rsp+176], xmm7
|
||||
movaps XMMWORD PTR [rsp+192], xmm8
|
||||
movaps XMMWORD PTR [rsp+208], xmm9
|
||||
movaps XMMWORD PTR [rsp+224], xmm10
|
||||
movaps XMMWORD PTR [rsp+240], xmm11
|
||||
movaps XMMWORD PTR [rsp+256], xmm12
|
||||
movaps XMMWORD PTR [rsp+272], xmm13
|
||||
movaps XMMWORD PTR [rsp+288], xmm14
|
||||
movaps XMMWORD PTR [rsp+304], xmm15
|
||||
movq xmm7, rax
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
|
||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
||||
movaps xmm2, XMMWORD PTR [r8+96]
|
||||
movaps XMMWORD PTR [rsp], xmm1
|
||||
movaps XMMWORD PTR [rsp+16], xmm2
|
||||
|
||||
mov r8d, r15d
|
||||
punpcklqdq xmm7, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movq xmm9, rax
|
||||
mov QWORD PTR [rsp+128], rsi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
punpcklqdq xmm9, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+88]
|
||||
xor rcx, QWORD PTR [rdx+72]
|
||||
movq xmm8, rax
|
||||
mov QWORD PTR [rsp+136], rdi
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
and r8d, 2097136
|
||||
movq xmm0, rcx
|
||||
mov r11d, 524288
|
||||
movq xmm10, rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
movq xmm14, QWORD PTR [rsp+128]
|
||||
movq xmm15, QWORD PTR [rsp+136]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_double_mainloop):
|
||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
||||
movq xmm0, r12
|
||||
mov ecx, ebx
|
||||
movq xmm3, r14
|
||||
punpcklqdq xmm3, xmm0
|
||||
xor ebx, 16
|
||||
aesenc xmm6, xmm3
|
||||
movq xmm4, r15
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
xor ebx, 48
|
||||
paddq xmm0, xmm7
|
||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
||||
paddq xmm1, xmm3
|
||||
xor ebx, 16
|
||||
mov eax, ebx
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
movq rdx, xmm6
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
||||
paddq xmm0, xmm9
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
||||
mov esi, edx
|
||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
||||
and esi, 2097136
|
||||
mov ecx, r8d
|
||||
movq xmm0, r13
|
||||
punpcklqdq xmm4, xmm0
|
||||
xor r8d, 16
|
||||
aesenc xmm5, xmm4
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
xor r8d, 48
|
||||
paddq xmm0, xmm8
|
||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm1
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
||||
paddq xmm1, xmm4
|
||||
xor r8d, 16
|
||||
mov eax, r8d
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
||||
movq rdi, xmm5
|
||||
movq rcx, xmm14
|
||||
mov ebp, edi
|
||||
mov r8, QWORD PTR [rcx+rsi]
|
||||
mov r10, QWORD PTR [rcx+rsi+8]
|
||||
lea r9, QWORD PTR [rcx+rsi]
|
||||
xor esi, 16
|
||||
|
||||
movq xmm0, rsp
|
||||
movq xmm1, rsi
|
||||
movq xmm2, rdi
|
||||
movq xmm11, rbp
|
||||
movq xmm12, r15
|
||||
movq xmm13, rdx
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp+16]
|
||||
mov esi, DWORD PTR [rsp+20]
|
||||
mov edi, DWORD PTR [rsp+24]
|
||||
mov ebp, DWORD PTR [rsp+28]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r8, rax
|
||||
|
||||
movd esp, xmm3
|
||||
pextrd r15d, xmm3, 2
|
||||
movd eax, xmm7
|
||||
movd edx, xmm9
|
||||
pextrd r9d, xmm9, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part2):
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r14, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r12, rax
|
||||
|
||||
movq rsp, xmm0
|
||||
mov DWORD PTR [rsp+16], ebx
|
||||
mov DWORD PTR [rsp+20], esi
|
||||
mov DWORD PTR [rsp+24], edi
|
||||
mov DWORD PTR [rsp+28], ebp
|
||||
|
||||
movq rsi, xmm1
|
||||
movq rdi, xmm2
|
||||
movq rbp, xmm11
|
||||
movq r15, xmm12
|
||||
movq rdx, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rbx, r8
|
||||
mov rax, r8
|
||||
mul rdx
|
||||
and ebp, 2097136
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
xor esi, 48
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
||||
pxor xmm6, xmm2
|
||||
paddq xmm2, xmm3
|
||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
||||
xor esi, 16
|
||||
mov eax, esi
|
||||
mov rsi, rcx
|
||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
||||
pxor xmm6, xmm0
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
||||
paddq xmm0, xmm9
|
||||
add r12, r8
|
||||
xor rax, 32
|
||||
add r14, rdx
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
mov QWORD PTR [r9+8], r12
|
||||
xor r12, r10
|
||||
mov QWORD PTR [r9], r14
|
||||
movq rcx, xmm15
|
||||
xor r14, rbx
|
||||
mov r10d, ebp
|
||||
mov ebx, r14d
|
||||
xor ebp, 16
|
||||
and ebx, 2097136
|
||||
mov r8, QWORD PTR [r10+rcx]
|
||||
mov r9, QWORD PTR [r10+rcx+8]
|
||||
|
||||
movq xmm0, rsp
|
||||
movq xmm1, rbx
|
||||
movq xmm2, rsi
|
||||
movq xmm11, rdi
|
||||
movq xmm12, rbp
|
||||
movq xmm13, r15
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp]
|
||||
mov esi, DWORD PTR [rsp+4]
|
||||
mov edi, DWORD PTR [rsp+8]
|
||||
mov ebp, DWORD PTR [rsp+12]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
|
||||
xor r8, rax
|
||||
movq xmm3, r8
|
||||
|
||||
movd esp, xmm4
|
||||
pextrd r15d, xmm4, 2
|
||||
movd eax, xmm8
|
||||
movd edx, xmm10
|
||||
pextrd r9d, xmm10, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part3):
|
||||
|
||||
movq r15, xmm13
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r13, rax
|
||||
|
||||
movq rsp, xmm0
|
||||
mov DWORD PTR [rsp], ebx
|
||||
mov DWORD PTR [rsp+4], esi
|
||||
mov DWORD PTR [rsp+8], edi
|
||||
mov DWORD PTR [rsp+12], ebp
|
||||
|
||||
movq rbx, xmm1
|
||||
movq rsi, xmm2
|
||||
movq rdi, xmm11
|
||||
movq rbp, xmm12
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rax, r8
|
||||
mul rdi
|
||||
mov rdi, rcx
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm1
|
||||
xor ebp, 48
|
||||
paddq xmm1, xmm8
|
||||
add r13, r8
|
||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm2
|
||||
add r15, rdx
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
||||
paddq xmm2, xmm4
|
||||
xor ebp, 16
|
||||
mov eax, ebp
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
movq rax, xmm3
|
||||
movdqa xmm10, xmm8
|
||||
mov QWORD PTR [r10+rcx], r15
|
||||
movdqa xmm8, xmm5
|
||||
xor r15, rax
|
||||
mov QWORD PTR [r10+rcx+8], r13
|
||||
mov r8d, r15d
|
||||
xor r13, r9
|
||||
and r8d, 2097136
|
||||
dec r11d
|
||||
jnz FN_PREFIX(CryptonightR_template_double_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part4):
|
||||
|
||||
mov rbx, QWORD PTR [rsp+400]
|
||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
||||
add rsp, 320
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightR_template_double_end):
|
266
src/crypto/asm/CryptonightWOW_soft_aes_template.inc
Normal file
266
src/crypto/asm/CryptonightWOW_soft_aes_template.inc
Normal file
|
@ -0,0 +1,266 @@
|
|||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
paddq xmm1, xmm7
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
xor rax, QWORD PTR [r11+rcx+8]
|
||||
xor rdx, QWORD PTR [rcx+r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm2, xmm3
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_end):
|
486
src/crypto/asm/CryptonightWOW_template.inc
Normal file
486
src/crypto/asm/CryptonightWOW_template.inc
Normal file
|
@ -0,0 +1,486 @@
|
|||
PUBLIC FN_PREFIX(CryptonightWOW_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_end)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_template_part1):
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rdi
|
||||
sub rsp, 64
|
||||
mov r12, rcx
|
||||
mov r8, QWORD PTR [r12+32]
|
||||
mov rdx, r12
|
||||
xor r8, QWORD PTR [r12]
|
||||
mov r15, QWORD PTR [r12+40]
|
||||
mov r9, r8
|
||||
xor r15, QWORD PTR [r12+8]
|
||||
mov r11, QWORD PTR [r12+224]
|
||||
mov r12, QWORD PTR [r12+56]
|
||||
xor r12, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm0, r12
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
movaps XMMWORD PTR [rsp], xmm9
|
||||
mov r12, QWORD PTR [rdx+88]
|
||||
xor r12, QWORD PTR [rdx+72]
|
||||
movq xmm6, rax
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
and r9d, 2097136
|
||||
movq xmm0, r12
|
||||
movq xmm7, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r10d, r9d
|
||||
movq xmm9, rsp
|
||||
mov rsp, r8
|
||||
mov r8d, 524288
|
||||
|
||||
mov ebx, [rdx+96]
|
||||
mov esi, [rdx+100]
|
||||
mov edi, [rdx+104]
|
||||
mov ebp, [rdx+108]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_template_mainloop):
|
||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
||||
movq xmm0, r15
|
||||
movq xmm4, rsp
|
||||
punpcklqdq xmm4, xmm0
|
||||
lea rdx, QWORD PTR [r9+r11]
|
||||
|
||||
aesenc xmm5, xmm4
|
||||
movd r10d, xmm5
|
||||
and r10d, 2097136
|
||||
|
||||
mov r12d, r9d
|
||||
mov eax, r9d
|
||||
xor r9d, 48
|
||||
xor r12d, 16
|
||||
xor eax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm1, xmm4
|
||||
movdqu XMMWORD PTR [r12+r11], xmm0
|
||||
movq r12, xmm5
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
|
||||
lea r13d, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or r13, rdx
|
||||
|
||||
xor r13, QWORD PTR [r10+r11]
|
||||
mov r14, QWORD PTR [r10+r11+8]
|
||||
|
||||
movd eax, xmm6
|
||||
movd edx, xmm7
|
||||
pextrd r9d, xmm7, 2
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_part2):
|
||||
mov rax, r13
|
||||
mul r12
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
punpcklqdq xmm3, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
mov r12d, r10d
|
||||
xor r9d, 16
|
||||
xor r12d, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
||||
xor rdx, QWORD PTR [r12+r11]
|
||||
xor rax, QWORD PTR [r11+r12+8]
|
||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
||||
pxor xmm3, xmm2
|
||||
paddq xmm7, XMMWORD PTR [r10+r11]
|
||||
paddq xmm1, xmm4
|
||||
paddq xmm3, xmm6
|
||||
movdqu XMMWORD PTR [r9+r11], xmm7
|
||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
add r15, rax
|
||||
add rsp, rdx
|
||||
xor r10, 48
|
||||
mov QWORD PTR [r10+r11], rsp
|
||||
xor rsp, r13
|
||||
mov r9d, esp
|
||||
mov QWORD PTR [r10+r11+8], r15
|
||||
and r9d, 2097136
|
||||
xor r15, r14
|
||||
movdqa xmm6, xmm5
|
||||
dec r8d
|
||||
jnz FN_PREFIX(CryptonightWOW_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_part3):
|
||||
movq rsp, xmm9
|
||||
|
||||
mov rbx, QWORD PTR [rsp+136]
|
||||
mov rbp, QWORD PTR [rsp+144]
|
||||
mov rsi, QWORD PTR [rsp+152]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
||||
movaps xmm9, XMMWORD PTR [rsp]
|
||||
add rsp, 64
|
||||
pop rdi
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightWOW_template_end):
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_template_double_part1):
|
||||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 320
|
||||
mov r14, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r14, QWORD PTR [rcx]
|
||||
mov r12, QWORD PTR [rcx+40]
|
||||
mov ebx, r14d
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
and ebx, 2097136
|
||||
xor r12, QWORD PTR [rcx+8]
|
||||
mov rcx, QWORD PTR [rcx+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov r15, QWORD PTR [rdx+32]
|
||||
xor r15, QWORD PTR [rdx]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
mov r13, QWORD PTR [rdx+40]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
xor r13, QWORD PTR [rdx+8]
|
||||
movaps XMMWORD PTR [rsp+160], xmm6
|
||||
movaps XMMWORD PTR [rsp+176], xmm7
|
||||
movaps XMMWORD PTR [rsp+192], xmm8
|
||||
movaps XMMWORD PTR [rsp+208], xmm9
|
||||
movaps XMMWORD PTR [rsp+224], xmm10
|
||||
movaps XMMWORD PTR [rsp+240], xmm11
|
||||
movaps XMMWORD PTR [rsp+256], xmm12
|
||||
movaps XMMWORD PTR [rsp+272], xmm13
|
||||
movaps XMMWORD PTR [rsp+288], xmm14
|
||||
movaps XMMWORD PTR [rsp+304], xmm15
|
||||
movq xmm7, rax
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
|
||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
||||
movaps xmm2, XMMWORD PTR [r8+96]
|
||||
movaps XMMWORD PTR [rsp], xmm1
|
||||
movaps XMMWORD PTR [rsp+16], xmm2
|
||||
|
||||
mov r8d, r15d
|
||||
punpcklqdq xmm7, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movq xmm9, rax
|
||||
mov QWORD PTR [rsp+128], rsi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
punpcklqdq xmm9, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+88]
|
||||
xor rcx, QWORD PTR [rdx+72]
|
||||
movq xmm8, rax
|
||||
mov QWORD PTR [rsp+136], rdi
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
and r8d, 2097136
|
||||
movq xmm0, rcx
|
||||
mov r11d, 524288
|
||||
movq xmm10, rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
movq xmm14, QWORD PTR [rsp+128]
|
||||
movq xmm15, QWORD PTR [rsp+136]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_template_double_mainloop):
|
||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
||||
movq xmm0, r12
|
||||
mov ecx, ebx
|
||||
movq xmm3, r14
|
||||
punpcklqdq xmm3, xmm0
|
||||
xor ebx, 16
|
||||
aesenc xmm6, xmm3
|
||||
movq rdx, xmm6
|
||||
movq xmm4, r15
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
xor ebx, 48
|
||||
paddq xmm0, xmm7
|
||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
||||
paddq xmm1, xmm3
|
||||
xor ebx, 16
|
||||
mov eax, ebx
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
||||
paddq xmm0, xmm9
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
||||
mov esi, edx
|
||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
||||
and esi, 2097136
|
||||
mov ecx, r8d
|
||||
movq xmm0, r13
|
||||
punpcklqdq xmm4, xmm0
|
||||
xor r8d, 16
|
||||
aesenc xmm5, xmm4
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
xor r8d, 48
|
||||
paddq xmm0, xmm8
|
||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
||||
paddq xmm1, xmm4
|
||||
xor r8d, 16
|
||||
mov eax, r8d
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
||||
movq rdi, xmm5
|
||||
movq rcx, xmm14
|
||||
mov ebp, edi
|
||||
mov r8, QWORD PTR [rcx+rsi]
|
||||
mov r10, QWORD PTR [rcx+rsi+8]
|
||||
lea r9, QWORD PTR [rcx+rsi]
|
||||
xor esi, 16
|
||||
|
||||
movq xmm0, rsp
|
||||
movq xmm1, rsi
|
||||
movq xmm2, rdi
|
||||
movq xmm11, rbp
|
||||
movq xmm12, r15
|
||||
movq xmm13, rdx
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp+16]
|
||||
mov esi, DWORD PTR [rsp+20]
|
||||
mov edi, DWORD PTR [rsp+24]
|
||||
mov ebp, DWORD PTR [rsp+28]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r8, rax
|
||||
|
||||
movd esp, xmm3
|
||||
pextrd r15d, xmm3, 2
|
||||
movd eax, xmm7
|
||||
movd edx, xmm9
|
||||
pextrd r9d, xmm9, 2
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_double_part2):
|
||||
|
||||
movq rsp, xmm0
|
||||
mov DWORD PTR [rsp+16], ebx
|
||||
mov DWORD PTR [rsp+20], esi
|
||||
mov DWORD PTR [rsp+24], edi
|
||||
mov DWORD PTR [rsp+28], ebp
|
||||
|
||||
movq rsi, xmm1
|
||||
movq rdi, xmm2
|
||||
movq rbp, xmm11
|
||||
movq r15, xmm12
|
||||
movq rdx, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rbx, r8
|
||||
mov rax, r8
|
||||
mul rdx
|
||||
and ebp, 2097136
|
||||
mov r8, rax
|
||||
movq xmm1, rdx
|
||||
movq xmm0, r8
|
||||
punpcklqdq xmm1, xmm0
|
||||
pxor xmm1, XMMWORD PTR [rcx+rsi]
|
||||
xor esi, 48
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
||||
xor rdx, QWORD PTR [rsi+rcx]
|
||||
paddq xmm2, xmm3
|
||||
xor r8, QWORD PTR [rsi+rcx+8]
|
||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
||||
xor esi, 16
|
||||
mov eax, esi
|
||||
mov rsi, rcx
|
||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
||||
paddq xmm0, xmm9
|
||||
add r12, r8
|
||||
xor rax, 32
|
||||
add r14, rdx
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
mov QWORD PTR [r9+8], r12
|
||||
xor r12, r10
|
||||
mov QWORD PTR [r9], r14
|
||||
movq rcx, xmm15
|
||||
xor r14, rbx
|
||||
mov r10d, ebp
|
||||
mov ebx, r14d
|
||||
xor ebp, 16
|
||||
and ebx, 2097136
|
||||
mov r8, QWORD PTR [r10+rcx]
|
||||
mov r9, QWORD PTR [r10+rcx+8]
|
||||
|
||||
movq xmm0, rsp
|
||||
movq xmm1, rbx
|
||||
movq xmm2, rsi
|
||||
movq xmm11, rdi
|
||||
movq xmm12, rbp
|
||||
movq xmm13, r15
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp]
|
||||
mov esi, DWORD PTR [rsp+4]
|
||||
mov edi, DWORD PTR [rsp+8]
|
||||
mov ebp, DWORD PTR [rsp+12]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
|
||||
xor r8, rax
|
||||
movq xmm3, r8
|
||||
|
||||
movd esp, xmm4
|
||||
pextrd r15d, xmm4, 2
|
||||
movd eax, xmm8
|
||||
movd edx, xmm10
|
||||
pextrd r9d, xmm10, 2
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_double_part3):
|
||||
|
||||
movq rsp, xmm0
|
||||
mov DWORD PTR [rsp], ebx
|
||||
mov DWORD PTR [rsp+4], esi
|
||||
mov DWORD PTR [rsp+8], edi
|
||||
mov DWORD PTR [rsp+12], ebp
|
||||
|
||||
movq rbx, xmm1
|
||||
movq rsi, xmm2
|
||||
movq rdi, xmm11
|
||||
movq rbp, xmm12
|
||||
movq r15, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rax, r8
|
||||
mul rdi
|
||||
movq xmm1, rdx
|
||||
movq xmm0, rax
|
||||
punpcklqdq xmm1, xmm0
|
||||
mov rdi, rcx
|
||||
mov r8, rax
|
||||
pxor xmm1, XMMWORD PTR [rbp+rcx]
|
||||
xor ebp, 48
|
||||
paddq xmm1, xmm8
|
||||
xor r8, QWORD PTR [rbp+rcx+8]
|
||||
xor rdx, QWORD PTR [rbp+rcx]
|
||||
add r13, r8
|
||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
||||
add r15, rdx
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
||||
paddq xmm2, xmm4
|
||||
xor ebp, 16
|
||||
mov eax, ebp
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
movq rax, xmm3
|
||||
movdqa xmm10, xmm8
|
||||
mov QWORD PTR [r10+rcx], r15
|
||||
movdqa xmm8, xmm5
|
||||
xor r15, rax
|
||||
mov QWORD PTR [r10+rcx+8], r13
|
||||
mov r8d, r15d
|
||||
xor r13, r9
|
||||
and r8d, 2097136
|
||||
dec r11d
|
||||
jnz FN_PREFIX(CryptonightWOW_template_double_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_double_part4):
|
||||
|
||||
mov rbx, QWORD PTR [rsp+400]
|
||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
||||
add rsp, 320
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightWOW_template_double_end):
|
|
@ -28,6 +28,19 @@
|
|||
.global FN_PREFIX(cnv2_main_loop_ultralite_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_ultralite_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_xcash_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_xcash_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_xcash_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_xcash_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_zelerius_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_zelerius_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_rwz_all_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_rwz_all_asm)
|
||||
|
||||
.global FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv1_main_loop_fast_soft_aes_sandybridge_asm)
|
||||
|
@ -37,6 +50,8 @@
|
|||
.global FN_PREFIX(cnv2_main_loop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_fastv2_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_ultralite_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_xcash_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_zelerius_soft_aes_sandybridge_asm)
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
|
@ -245,6 +260,129 @@ FN_PREFIX(cnv2_double_main_loop_ultralite_sandybridge_asm):
|
|||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_xcash_ivybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_xcash_ivybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_xcash_ryzen_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_xcash_ryzen.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_xcash_bulldozer_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_xcash_bulldozer.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_double_main_loop_xcash_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
mov rdx, rsi
|
||||
#include "cnv2_double_main_loop_xcash_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_zelerius_ivybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_zelerius_ivybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_zelerius_ryzen_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_zelerius_ryzen.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_zelerius_bulldozer.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
mov rdx, rsi
|
||||
#include "cnv2_double_main_loop_zelerius_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_rwz_all_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_rwz_all.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_double_main_loop_rwz_all_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
mov rdx, rsi
|
||||
#include "cnv2_double_main_loop_rwz_all.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
|
@ -340,4 +478,29 @@ FN_PREFIX(cnv2_main_loop_ultralite_soft_aes_sandybridge_asm):
|
|||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_ultralite_soft_aes_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_xcash_soft_aes_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_xcash_soft_aes_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
|
||||
#ifdef __APPLE__
|
||||
ALIGN 16
|
||||
#else
|
||||
ALIGN 64
|
||||
#endif
|
||||
FN_PREFIX(cnv2_main_loop_zelerius_soft_aes_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cnv2_main_loop_zelerius_soft_aes_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
410
src/crypto/asm/cnv2_double_main_loop_rwz_all.inc
Normal file
410
src/crypto/asm/cnv2_double_main_loop_rwz_all.inc
Normal file
|
@ -0,0 +1,410 @@
|
|||
mov rax, rsp
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 184
|
||||
|
||||
stmxcsr DWORD PTR [rsp+272]
|
||||
mov DWORD PTR [rsp+276], 24448
|
||||
ldmxcsr DWORD PTR [rsp+276]
|
||||
|
||||
mov r13, QWORD PTR [rcx+224]
|
||||
mov r9, rdx
|
||||
mov r10, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r10, QWORD PTR [rcx]
|
||||
mov r14d, 393216
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rsi, QWORD PTR [rdx+224]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
mov rbp, QWORD PTR [r9+40]
|
||||
xor rbp, QWORD PTR [r9+8]
|
||||
movq xmm0, rdx
|
||||
movaps XMMWORD PTR [rax-88], xmm6
|
||||
movaps XMMWORD PTR [rax-104], xmm7
|
||||
movaps XMMWORD PTR [rax-120], xmm8
|
||||
movaps XMMWORD PTR [rsp+112], xmm9
|
||||
movaps XMMWORD PTR [rsp+96], xmm10
|
||||
movaps XMMWORD PTR [rsp+80], xmm11
|
||||
movaps XMMWORD PTR [rsp+64], xmm12
|
||||
movaps XMMWORD PTR [rsp+48], xmm13
|
||||
movaps XMMWORD PTR [rsp+32], xmm14
|
||||
movaps XMMWORD PTR [rsp+16], xmm15
|
||||
mov rdx, r10
|
||||
movq xmm4, QWORD PTR [r8+96]
|
||||
and edx, 2097136
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xorps xmm13, xmm13
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
movq xmm5, QWORD PTR [r8+104]
|
||||
movq xmm7, rax
|
||||
|
||||
mov eax, 1
|
||||
shl rax, 52
|
||||
movq xmm14, rax
|
||||
punpcklqdq xmm14, xmm14
|
||||
|
||||
mov eax, 1023
|
||||
shl rax, 52
|
||||
movq xmm12, rax
|
||||
punpcklqdq xmm12, xmm12
|
||||
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
punpcklqdq xmm7, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+56]
|
||||
xor rcx, QWORD PTR [r9+24]
|
||||
movq xmm3, rax
|
||||
mov rax, QWORD PTR [r9+48]
|
||||
xor rax, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp], r13
|
||||
mov rcx, QWORD PTR [r9+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm6, rax
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp+256], r10
|
||||
mov rcx, rdi
|
||||
mov QWORD PTR [rsp+264], r11
|
||||
movq xmm8, rax
|
||||
and ecx, 2097136
|
||||
punpcklqdq xmm8, xmm0
|
||||
movq xmm0, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, QWORD PTR [r9+104]
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
movdqu xmm11, XMMWORD PTR [r8]
|
||||
punpcklqdq xmm5, xmm0
|
||||
lea r9, QWORD PTR [rdx+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r9]
|
||||
|
||||
ALIGN(64)
|
||||
rwz_main_loop_double:
|
||||
movdqu xmm9, xmm15
|
||||
mov eax, edx
|
||||
mov ebx, edx
|
||||
xor eax, 16
|
||||
xor ebx, 32
|
||||
xor edx, 48
|
||||
|
||||
movq xmm0, r11
|
||||
movq xmm2, r10
|
||||
punpcklqdq xmm2, xmm0
|
||||
aesenc xmm9, xmm2
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||
|
||||
movq r11, xmm9
|
||||
mov edx, r11d
|
||||
and edx, 2097136
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9], xmm0
|
||||
|
||||
lea rbx, QWORD PTR [rdx+r13]
|
||||
mov r10, QWORD PTR [rdx+r13]
|
||||
|
||||
movdqu xmm10, xmm11
|
||||
movq xmm0, rbp
|
||||
movq xmm11, rdi
|
||||
punpcklqdq xmm11, xmm0
|
||||
aesenc xmm10, xmm11
|
||||
|
||||
mov eax, ecx
|
||||
mov r12d, ecx
|
||||
xor eax, 16
|
||||
xor r12d, 32
|
||||
xor ecx, 48
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||
paddq xmm0, xmm6
|
||||
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||
paddq xmm1, xmm11
|
||||
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
|
||||
movq rcx, xmm10
|
||||
and ecx, 2097136
|
||||
|
||||
movdqa xmm0, xmm10
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [r8], xmm0
|
||||
mov r12, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov r9, QWORD PTR [rbx+8]
|
||||
|
||||
xor edx, 16
|
||||
mov r8d, edx
|
||||
mov r15d, edx
|
||||
|
||||
movq rdx, xmm5
|
||||
shl rdx, 32
|
||||
movq rax, xmm4
|
||||
xor rdx, rax
|
||||
xor r10, rdx
|
||||
mov rax, r10
|
||||
mul r11
|
||||
mov r11d, r8d
|
||||
xor r11d, 48
|
||||
movq xmm0, rdx
|
||||
xor rdx, [r11+r13]
|
||||
movq xmm1, rax
|
||||
xor rax, [r11+r13+8]
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||
paddq xmm0, xmm3
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [r8+r13], xmm0
|
||||
xor r8d, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||
paddq xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||
|
||||
mov r11, QWORD PTR [rsp+256]
|
||||
add r11, rdx
|
||||
mov rdx, QWORD PTR [rsp+264]
|
||||
add rdx, rax
|
||||
mov QWORD PTR [rbx], r11
|
||||
xor r11, r10
|
||||
mov QWORD PTR [rbx+8], rdx
|
||||
xor rdx, r9
|
||||
mov QWORD PTR [rsp+256], r11
|
||||
and r11d, 2097136
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov QWORD PTR [rsp+8], r11
|
||||
lea r15, QWORD PTR [r11+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||
lea r13, QWORD PTR [rsi+rcx]
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movaps xmm2, xmm13
|
||||
movq r10, xmm0
|
||||
psllq xmm5, 1
|
||||
shl r10, 32
|
||||
movdqa xmm0, xmm9
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm1, xmm10
|
||||
movq r11, xmm0
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
psrldq xmm4, 8
|
||||
movaps xmm0, xmm13
|
||||
movq rax, xmm4
|
||||
xor r10, rax
|
||||
movaps xmm1, xmm13
|
||||
xor r10, r12
|
||||
lea rax, QWORD PTR [r11+1]
|
||||
shr rax, 1
|
||||
movdqa xmm3, xmm9
|
||||
punpcklqdq xmm3, xmm10
|
||||
paddq xmm5, xmm3
|
||||
movq rdx, xmm5
|
||||
psrldq xmm5, 8
|
||||
cvtsi2sd xmm2, rax
|
||||
or edx, -2147483647
|
||||
lea rax, QWORD PTR [r8+1]
|
||||
shr rax, 1
|
||||
movq r9, xmm5
|
||||
cvtsi2sd xmm0, rax
|
||||
or r9d, -2147483647
|
||||
cvtsi2sd xmm1, rdx
|
||||
unpcklpd xmm2, xmm0
|
||||
movaps xmm0, xmm13
|
||||
cvtsi2sd xmm0, r9
|
||||
unpcklpd xmm1, xmm0
|
||||
divpd xmm2, xmm1
|
||||
paddq xmm2, xmm14
|
||||
cvttsd2si rax, xmm2
|
||||
psrldq xmm2, 8
|
||||
mov rbx, rax
|
||||
imul rax, rdx
|
||||
sub r11, rax
|
||||
js rwz_div_fix_1
|
||||
rwz_div_fix_1_ret:
|
||||
|
||||
cvttsd2si rdx, xmm2
|
||||
mov rax, rdx
|
||||
imul rax, r9
|
||||
movd xmm2, r11d
|
||||
movd xmm4, ebx
|
||||
sub r8, rax
|
||||
js rwz_div_fix_2
|
||||
rwz_div_fix_2_ret:
|
||||
|
||||
movd xmm1, r8d
|
||||
movd xmm0, edx
|
||||
punpckldq xmm2, xmm1
|
||||
punpckldq xmm4, xmm0
|
||||
punpckldq xmm4, xmm2
|
||||
paddq xmm3, xmm4
|
||||
movdqa xmm0, xmm3
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm12
|
||||
sqrtpd xmm1, xmm0
|
||||
movq r9, xmm1
|
||||
movdqa xmm5, xmm1
|
||||
psrlq xmm5, 19
|
||||
test r9, 524287
|
||||
je rwz_sqrt_fix_1
|
||||
rwz_sqrt_fix_1_ret:
|
||||
|
||||
movq r9, xmm10
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
test r8, 524287
|
||||
je rwz_sqrt_fix_2
|
||||
rwz_sqrt_fix_2_ret:
|
||||
|
||||
mov r12d, ecx
|
||||
mov r8d, ecx
|
||||
xor r12d, 16
|
||||
xor r8d, 32
|
||||
xor ecx, 48
|
||||
mov rax, r10
|
||||
mul r9
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
punpcklqdq xmm3, xmm0
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||
pxor xmm0, xmm3
|
||||
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||
xor rdx, [r8+rsi]
|
||||
xor rax, [r8+rsi+8]
|
||||
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||
paddq xmm3, xmm6
|
||||
paddq xmm1, xmm11
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [r8+rsi], xmm3
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||
|
||||
add rdi, rdx
|
||||
mov QWORD PTR [r13], rdi
|
||||
xor rdi, r10
|
||||
mov ecx, edi
|
||||
and ecx, 2097136
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov rdx, QWORD PTR [r13+8]
|
||||
add rbp, rax
|
||||
mov QWORD PTR [r13+8], rbp
|
||||
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||
xor rbp, rdx
|
||||
mov r13, QWORD PTR [rsp]
|
||||
movdqa xmm3, xmm7
|
||||
mov rdx, QWORD PTR [rsp+8]
|
||||
movdqa xmm8, xmm6
|
||||
mov r10, QWORD PTR [rsp+256]
|
||||
movdqa xmm7, xmm9
|
||||
mov r11, QWORD PTR [rsp+264]
|
||||
movdqa xmm6, xmm10
|
||||
mov r9, r15
|
||||
dec r14d
|
||||
jne rwz_main_loop_double
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+272]
|
||||
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+184]
|
||||
movaps xmm6, XMMWORD PTR [r11-24]
|
||||
movaps xmm7, XMMWORD PTR [r11-40]
|
||||
movaps xmm8, XMMWORD PTR [r11-56]
|
||||
movaps xmm9, XMMWORD PTR [r11-72]
|
||||
movaps xmm10, XMMWORD PTR [r11-88]
|
||||
movaps xmm11, XMMWORD PTR [r11-104]
|
||||
movaps xmm12, XMMWORD PTR [r11-120]
|
||||
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp rwz_cnv2_double_mainloop_asm_endp
|
||||
|
||||
rwz_div_fix_1:
|
||||
dec rbx
|
||||
add r11, rdx
|
||||
jmp rwz_div_fix_1_ret
|
||||
|
||||
rwz_div_fix_2:
|
||||
dec rdx
|
||||
add r8, r9
|
||||
jmp rwz_div_fix_2_ret
|
||||
|
||||
rwz_sqrt_fix_1:
|
||||
movq r8, xmm3
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
dec r9
|
||||
mov r11d, -1022
|
||||
shl r11, 32
|
||||
mov rax, r9
|
||||
shr r9, 19
|
||||
shr rax, 20
|
||||
mov rdx, r9
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+r11+1]
|
||||
add rax, r11
|
||||
imul rdx, rax
|
||||
sub rdx, r8
|
||||
adc r9, 0
|
||||
movq xmm5, r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp rwz_sqrt_fix_1_ret
|
||||
|
||||
rwz_sqrt_fix_2:
|
||||
psrldq xmm3, 8
|
||||
movq r11, xmm3
|
||||
dec r8
|
||||
mov ebx, -1022
|
||||
shl rbx, 32
|
||||
mov rax, r8
|
||||
shr r8, 19
|
||||
shr rax, 20
|
||||
mov rdx, r8
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+rbx+1]
|
||||
add rax, rbx
|
||||
imul rdx, rax
|
||||
sub rdx, r11
|
||||
adc r8, 0
|
||||
movq xmm0, r8
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp rwz_sqrt_fix_2_ret
|
||||
|
||||
rwz_cnv2_double_mainloop_asm_endp:
|
186
src/crypto/asm/cnv2_main_loop_rwz_all.inc
Normal file
186
src/crypto/asm/cnv2_main_loop_rwz_all.inc
Normal file
|
@ -0,0 +1,186 @@
|
|||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov esi, 393216
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
mov r13d, -2147483647
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm4, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm3, QWORD PTR [r9+104]
|
||||
movaps XMMWORD PTR [rsp+64], xmm6
|
||||
movaps XMMWORD PTR [rsp+48], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
and r10d, 2097136
|
||||
movq xmm5, rax
|
||||
|
||||
xor eax, eax
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm8, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm5, xmm0
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
|
||||
ALIGN(64)
|
||||
rwz_main_loop:
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
mov rdi, r15
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
movq xmm0, r11
|
||||
movq xmm7, r8
|
||||
punpcklqdq xmm7, xmm0
|
||||
aesenc xmm6, xmm7
|
||||
movq rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm2, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm1, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
mov r10, r9
|
||||
xor r10d, 32
|
||||
movq rcx, xmm3
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
xor rdi, rax
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
xor rdi, QWORD PTR [r9+rbx]
|
||||
lea r14, QWORD PTR [r9+rbx]
|
||||
mov r12, QWORD PTR [r14+8]
|
||||
xor edx, edx
|
||||
lea r9d, DWORD PTR [ecx+ecx]
|
||||
add r9d, ebp
|
||||
movdqa xmm0, xmm6
|
||||
psrldq xmm0, 8
|
||||
or r9d, r13d
|
||||
movq rax, xmm0
|
||||
div r9
|
||||
xorps xmm3, xmm3
|
||||
mov eax, eax
|
||||
shl rdx, 32
|
||||
add rdx, rax
|
||||
lea r9, QWORD PTR [rdx+rbp]
|
||||
mov r15, rdx
|
||||
mov rax, r9
|
||||
shr rax, 12
|
||||
movq xmm0, rax
|
||||
paddq xmm0, xmm8
|
||||
sqrtsd xmm3, xmm0
|
||||
psubq xmm3, XMMWORD PTR [rsp+16]
|
||||
movq rdx, xmm3
|
||||
test edx, 524287
|
||||
je rwz_sqrt_fixup
|
||||
psrlq xmm3, 19
|
||||
rwz_sqrt_fixup_ret:
|
||||
|
||||
mov ecx, r10d
|
||||
mov rax, rdi
|
||||
mul rbp
|
||||
movq xmm2, rdx
|
||||
xor rdx, [rcx+rbx]
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
movq xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
punpcklqdq xmm2, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
xor r9d, 48
|
||||
xor r10d, 16
|
||||
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm0, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||
paddq xmm2, xmm5
|
||||
paddq xmm1, xmm7
|
||||
movdqa xmm5, xmm4
|
||||
movdqu XMMWORD PTR [r9+rbx], xmm2
|
||||
movdqa xmm4, xmm6
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
movdqu xmm6, [rdi+rbx]
|
||||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne rwz_main_loop
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
mov rbx, QWORD PTR [rsp+160]
|
||||
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
add rsp, 80
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
jmp cnv2_rwz_main_loop_endp
|
||||
|
||||
rwz_sqrt_fixup:
|
||||
dec rdx
|
||||
mov r13d, -1022
|
||||
shl r13, 32
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
add rax, r13
|
||||
not r13
|
||||
sub rcx, r13
|
||||
mov r13d, -2147483647
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movq xmm3, rdx
|
||||
jmp rwz_sqrt_fixup_ret
|
||||
|
||||
cnv2_rwz_main_loop_endp:
|
279
src/crypto/asm/win/CryptonightR_soft_aes_template.inc
Normal file
279
src/crypto/asm/win/CryptonightR_soft_aes_template.inc
Normal file
|
@ -0,0 +1,279 @@
|
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movd xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movd xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movd xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movd xmm10, QWORD PTR [r10+96]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movd xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movd xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movd xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movd r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
pxor xmm6, xmm1
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movd rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movd rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
movaps xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movd r9d, xmm0
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov edi, edi
|
||||
shl rbp, 32
|
||||
or rbp, rdi
|
||||
xor r8, rbp
|
||||
|
||||
mov ebx, ebx
|
||||
shl rsi, 32
|
||||
or rsi, rbx
|
||||
xor QWORD PTR [rsp+320], rsi
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm1
|
||||
paddq xmm1, xmm7
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_end):
|
279
src/crypto/asm/win/CryptonightR_soft_aes_template_win.inc
Normal file
279
src/crypto/asm/win/CryptonightR_soft_aes_template_win.inc
Normal file
|
@ -0,0 +1,279 @@
|
|||
PUBLIC CryptonightR_soft_aes_template_part1
|
||||
PUBLIC CryptonightR_soft_aes_template_mainloop
|
||||
PUBLIC CryptonightR_soft_aes_template_part2
|
||||
PUBLIC CryptonightR_soft_aes_template_part3
|
||||
PUBLIC CryptonightR_soft_aes_template_end
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_soft_aes_template_part1:
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movd xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movd xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movd xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movd xmm10, QWORD PTR [r10+96]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movd xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_soft_aes_template_mainloop:
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movd xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movd xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movd r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
pxor xmm6, xmm1
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movd rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movd rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
movaps xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movd r9d, xmm0
|
||||
|
||||
CryptonightR_soft_aes_template_part2:
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov edi, edi
|
||||
shl rbp, 32
|
||||
or rbp, rdi
|
||||
xor r8, rbp
|
||||
|
||||
mov ebx, ebx
|
||||
shl rsi, 32
|
||||
or rsi, rbx
|
||||
xor QWORD PTR [rsp+320], rsi
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm1
|
||||
paddq xmm1, xmm7
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne CryptonightR_soft_aes_template_mainloop
|
||||
|
||||
CryptonightR_soft_aes_template_part3:
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
CryptonightR_soft_aes_template_end:
|
1595
src/crypto/asm/win/CryptonightR_template.S
Normal file
1595
src/crypto/asm/win/CryptonightR_template.S
Normal file
File diff suppressed because it is too large
Load diff
1585
src/crypto/asm/win/CryptonightR_template.asm
Normal file
1585
src/crypto/asm/win/CryptonightR_template.asm
Normal file
File diff suppressed because it is too large
Load diff
529
src/crypto/asm/win/CryptonightR_template.inc
Normal file
529
src/crypto/asm/win/CryptonightR_template.inc
Normal file
|
@ -0,0 +1,529 @@
|
|||
PUBLIC FN_PREFIX(CryptonightR_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_end)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_part1):
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rdi
|
||||
sub rsp, 64
|
||||
mov r12, rcx
|
||||
mov r8, QWORD PTR [r12+32]
|
||||
mov rdx, r12
|
||||
xor r8, QWORD PTR [r12]
|
||||
mov r15, QWORD PTR [r12+40]
|
||||
mov r9, r8
|
||||
xor r15, QWORD PTR [r12+8]
|
||||
mov r11, QWORD PTR [r12+224]
|
||||
mov r12, QWORD PTR [r12+56]
|
||||
xor r12, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movd xmm0, r12
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
movaps XMMWORD PTR [rsp], xmm9
|
||||
mov r12, QWORD PTR [rdx+88]
|
||||
xor r12, QWORD PTR [rdx+72]
|
||||
movd xmm6, rax
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
and r9d, 2097136
|
||||
movd xmm0, r12
|
||||
movd xmm7, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r10d, r9d
|
||||
movd xmm9, rsp
|
||||
mov rsp, r8
|
||||
mov r8d, 524288
|
||||
|
||||
mov ebx, [rdx+96]
|
||||
mov esi, [rdx+100]
|
||||
mov edi, [rdx+104]
|
||||
mov ebp, [rdx+108]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_mainloop):
|
||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
||||
movd xmm0, r15
|
||||
movd xmm4, rsp
|
||||
punpcklqdq xmm4, xmm0
|
||||
lea rdx, QWORD PTR [r9+r11]
|
||||
|
||||
aesenc xmm5, xmm4
|
||||
|
||||
mov r12d, r9d
|
||||
mov eax, r9d
|
||||
xor r9d, 48
|
||||
xor r12d, 16
|
||||
xor eax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||
movaps xmm3, xmm0
|
||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm5, xmm1
|
||||
pxor xmm5, xmm0
|
||||
paddq xmm3, xmm7
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm1, xmm4
|
||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||
|
||||
movd r12, xmm5
|
||||
movd r10d, xmm5
|
||||
and r10d, 2097136
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
|
||||
lea r13d, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or r13, rdx
|
||||
|
||||
xor r13, QWORD PTR [r10+r11]
|
||||
mov r14, QWORD PTR [r10+r11+8]
|
||||
|
||||
movd eax, xmm6
|
||||
movd edx, xmm7
|
||||
pextrd r9d, xmm7, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_part2):
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor rsp, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov rax, r13
|
||||
mul r12
|
||||
|
||||
mov r9d, r10d
|
||||
mov r12d, r10d
|
||||
xor r9d, 16
|
||||
xor r12d, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
||||
movaps xmm3, xmm1
|
||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqa xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm1, xmm2
|
||||
pxor xmm5, xmm0
|
||||
pxor xmm5, xmm1
|
||||
paddq xmm3, xmm4
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqu XMMWORD PTR [r12+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
add r15, rax
|
||||
add rsp, rdx
|
||||
xor r10, 48
|
||||
mov QWORD PTR [r10+r11], rsp
|
||||
xor rsp, r13
|
||||
mov r9d, esp
|
||||
mov QWORD PTR [r10+r11+8], r15
|
||||
and r9d, 2097136
|
||||
xor r15, r14
|
||||
movdqa xmm6, xmm5
|
||||
dec r8d
|
||||
jnz FN_PREFIX(CryptonightR_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_template_part3):
|
||||
movd rsp, xmm9
|
||||
|
||||
mov rbx, QWORD PTR [rsp+136]
|
||||
mov rbp, QWORD PTR [rsp+144]
|
||||
mov rsi, QWORD PTR [rsp+152]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
||||
movaps xmm9, XMMWORD PTR [rsp]
|
||||
add rsp, 64
|
||||
pop rdi
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightR_template_end):
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_double_part1):
|
||||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 320
|
||||
mov r14, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r14, QWORD PTR [rcx]
|
||||
mov r12, QWORD PTR [rcx+40]
|
||||
mov ebx, r14d
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
and ebx, 2097136
|
||||
xor r12, QWORD PTR [rcx+8]
|
||||
mov rcx, QWORD PTR [rcx+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov r15, QWORD PTR [rdx+32]
|
||||
xor r15, QWORD PTR [rdx]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
mov r13, QWORD PTR [rdx+40]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
xor r13, QWORD PTR [rdx+8]
|
||||
movaps XMMWORD PTR [rsp+160], xmm6
|
||||
movaps XMMWORD PTR [rsp+176], xmm7
|
||||
movaps XMMWORD PTR [rsp+192], xmm8
|
||||
movaps XMMWORD PTR [rsp+208], xmm9
|
||||
movaps XMMWORD PTR [rsp+224], xmm10
|
||||
movaps XMMWORD PTR [rsp+240], xmm11
|
||||
movaps XMMWORD PTR [rsp+256], xmm12
|
||||
movaps XMMWORD PTR [rsp+272], xmm13
|
||||
movaps XMMWORD PTR [rsp+288], xmm14
|
||||
movaps XMMWORD PTR [rsp+304], xmm15
|
||||
movd xmm7, rax
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
|
||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
||||
movaps xmm2, XMMWORD PTR [r8+96]
|
||||
movaps XMMWORD PTR [rsp], xmm1
|
||||
movaps XMMWORD PTR [rsp+16], xmm2
|
||||
|
||||
mov r8d, r15d
|
||||
punpcklqdq xmm7, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movd xmm9, rax
|
||||
mov QWORD PTR [rsp+128], rsi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
punpcklqdq xmm9, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+88]
|
||||
xor rcx, QWORD PTR [rdx+72]
|
||||
movd xmm8, rax
|
||||
mov QWORD PTR [rsp+136], rdi
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
and r8d, 2097136
|
||||
movd xmm0, rcx
|
||||
mov r11d, 524288
|
||||
movd xmm10, rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
movd xmm14, QWORD PTR [rsp+128]
|
||||
movd xmm15, QWORD PTR [rsp+136]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_double_mainloop):
|
||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
||||
movd xmm0, r12
|
||||
mov ecx, ebx
|
||||
movd xmm3, r14
|
||||
punpcklqdq xmm3, xmm0
|
||||
xor ebx, 16
|
||||
aesenc xmm6, xmm3
|
||||
movd xmm4, r15
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
xor ebx, 48
|
||||
paddq xmm0, xmm7
|
||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
||||
paddq xmm1, xmm3
|
||||
xor ebx, 16
|
||||
mov eax, ebx
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
movd rdx, xmm6
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
||||
paddq xmm0, xmm9
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
||||
mov esi, edx
|
||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
||||
and esi, 2097136
|
||||
mov ecx, r8d
|
||||
movd xmm0, r13
|
||||
punpcklqdq xmm4, xmm0
|
||||
xor r8d, 16
|
||||
aesenc xmm5, xmm4
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
xor r8d, 48
|
||||
paddq xmm0, xmm8
|
||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm1
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
||||
paddq xmm1, xmm4
|
||||
xor r8d, 16
|
||||
mov eax, r8d
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
||||
movd rdi, xmm5
|
||||
movd rcx, xmm14
|
||||
mov ebp, edi
|
||||
mov r8, QWORD PTR [rcx+rsi]
|
||||
mov r10, QWORD PTR [rcx+rsi+8]
|
||||
lea r9, QWORD PTR [rcx+rsi]
|
||||
xor esi, 16
|
||||
|
||||
movd xmm0, rsp
|
||||
movd xmm1, rsi
|
||||
movd xmm2, rdi
|
||||
movd xmm11, rbp
|
||||
movd xmm12, r15
|
||||
movd xmm13, rdx
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp+16]
|
||||
mov esi, DWORD PTR [rsp+20]
|
||||
mov edi, DWORD PTR [rsp+24]
|
||||
mov ebp, DWORD PTR [rsp+28]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r8, rax
|
||||
|
||||
movd esp, xmm3
|
||||
pextrd r15d, xmm3, 2
|
||||
movd eax, xmm7
|
||||
movd edx, xmm9
|
||||
pextrd r9d, xmm9, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part2):
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r14, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r12, rax
|
||||
|
||||
movd rsp, xmm0
|
||||
mov DWORD PTR [rsp+16], ebx
|
||||
mov DWORD PTR [rsp+20], esi
|
||||
mov DWORD PTR [rsp+24], edi
|
||||
mov DWORD PTR [rsp+28], ebp
|
||||
|
||||
movd rsi, xmm1
|
||||
movd rdi, xmm2
|
||||
movd rbp, xmm11
|
||||
movd r15, xmm12
|
||||
movd rdx, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rbx, r8
|
||||
mov rax, r8
|
||||
mul rdx
|
||||
and ebp, 2097136
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
xor esi, 48
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
||||
pxor xmm6, xmm2
|
||||
paddq xmm2, xmm3
|
||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
||||
xor esi, 16
|
||||
mov eax, esi
|
||||
mov rsi, rcx
|
||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
||||
pxor xmm6, xmm0
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
||||
paddq xmm0, xmm9
|
||||
add r12, r8
|
||||
xor rax, 32
|
||||
add r14, rdx
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
mov QWORD PTR [r9+8], r12
|
||||
xor r12, r10
|
||||
mov QWORD PTR [r9], r14
|
||||
movd rcx, xmm15
|
||||
xor r14, rbx
|
||||
mov r10d, ebp
|
||||
mov ebx, r14d
|
||||
xor ebp, 16
|
||||
and ebx, 2097136
|
||||
mov r8, QWORD PTR [r10+rcx]
|
||||
mov r9, QWORD PTR [r10+rcx+8]
|
||||
|
||||
movd xmm0, rsp
|
||||
movd xmm1, rbx
|
||||
movd xmm2, rsi
|
||||
movd xmm11, rdi
|
||||
movd xmm12, rbp
|
||||
movd xmm13, r15
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp]
|
||||
mov esi, DWORD PTR [rsp+4]
|
||||
mov edi, DWORD PTR [rsp+8]
|
||||
mov ebp, DWORD PTR [rsp+12]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
|
||||
xor r8, rax
|
||||
movd xmm3, r8
|
||||
|
||||
movd esp, xmm4
|
||||
pextrd r15d, xmm4, 2
|
||||
movd eax, xmm8
|
||||
movd edx, xmm10
|
||||
pextrd r9d, xmm10, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part3):
|
||||
|
||||
movd r15, xmm13
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r13, rax
|
||||
|
||||
movd rsp, xmm0
|
||||
mov DWORD PTR [rsp], ebx
|
||||
mov DWORD PTR [rsp+4], esi
|
||||
mov DWORD PTR [rsp+8], edi
|
||||
mov DWORD PTR [rsp+12], ebp
|
||||
|
||||
movd rbx, xmm1
|
||||
movd rsi, xmm2
|
||||
movd rdi, xmm11
|
||||
movd rbp, xmm12
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rax, r8
|
||||
mul rdi
|
||||
mov rdi, rcx
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm1
|
||||
xor ebp, 48
|
||||
paddq xmm1, xmm8
|
||||
add r13, r8
|
||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm2
|
||||
add r15, rdx
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
||||
paddq xmm2, xmm4
|
||||
xor ebp, 16
|
||||
mov eax, ebp
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
movd rax, xmm3
|
||||
movdqa xmm10, xmm8
|
||||
mov QWORD PTR [r10+rcx], r15
|
||||
movdqa xmm8, xmm5
|
||||
xor r15, rax
|
||||
mov QWORD PTR [r10+rcx+8], r13
|
||||
mov r8d, r15d
|
||||
xor r13, r9
|
||||
and r8d, 2097136
|
||||
dec r11d
|
||||
jnz FN_PREFIX(CryptonightR_template_double_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part4):
|
||||
|
||||
mov rbx, QWORD PTR [rsp+400]
|
||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
||||
add rsp, 320
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightR_template_double_end):
|
529
src/crypto/asm/win/CryptonightR_template_win.inc
Normal file
529
src/crypto/asm/win/CryptonightR_template_win.inc
Normal file
|
@ -0,0 +1,529 @@
|
|||
PUBLIC CryptonightR_template_part1
|
||||
PUBLIC CryptonightR_template_mainloop
|
||||
PUBLIC CryptonightR_template_part2
|
||||
PUBLIC CryptonightR_template_part3
|
||||
PUBLIC CryptonightR_template_end
|
||||
PUBLIC CryptonightR_template_double_part1
|
||||
PUBLIC CryptonightR_template_double_mainloop
|
||||
PUBLIC CryptonightR_template_double_part2
|
||||
PUBLIC CryptonightR_template_double_part3
|
||||
PUBLIC CryptonightR_template_double_part4
|
||||
PUBLIC CryptonightR_template_double_end
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_template_part1:
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rdi
|
||||
sub rsp, 64
|
||||
mov r12, rcx
|
||||
mov r8, QWORD PTR [r12+32]
|
||||
mov rdx, r12
|
||||
xor r8, QWORD PTR [r12]
|
||||
mov r15, QWORD PTR [r12+40]
|
||||
mov r9, r8
|
||||
xor r15, QWORD PTR [r12+8]
|
||||
mov r11, QWORD PTR [r12+224]
|
||||
mov r12, QWORD PTR [r12+56]
|
||||
xor r12, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movd xmm0, r12
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
movaps XMMWORD PTR [rsp], xmm9
|
||||
mov r12, QWORD PTR [rdx+88]
|
||||
xor r12, QWORD PTR [rdx+72]
|
||||
movd xmm6, rax
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
and r9d, 2097136
|
||||
movd xmm0, r12
|
||||
movd xmm7, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r10d, r9d
|
||||
movd xmm9, rsp
|
||||
mov rsp, r8
|
||||
mov r8d, 524288
|
||||
|
||||
mov ebx, [rdx+96]
|
||||
mov esi, [rdx+100]
|
||||
mov edi, [rdx+104]
|
||||
mov ebp, [rdx+108]
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_template_mainloop:
|
||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
||||
movd xmm0, r15
|
||||
movd xmm4, rsp
|
||||
punpcklqdq xmm4, xmm0
|
||||
lea rdx, QWORD PTR [r9+r11]
|
||||
|
||||
aesenc xmm5, xmm4
|
||||
|
||||
mov r12d, r9d
|
||||
mov eax, r9d
|
||||
xor r9d, 48
|
||||
xor r12d, 16
|
||||
xor eax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||
movaps xmm3, xmm0
|
||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm5, xmm1
|
||||
pxor xmm5, xmm0
|
||||
paddq xmm3, xmm7
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm1, xmm4
|
||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||
|
||||
movd r12, xmm5
|
||||
movd r10d, xmm5
|
||||
and r10d, 2097136
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
|
||||
lea r13d, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or r13, rdx
|
||||
|
||||
xor r13, QWORD PTR [r10+r11]
|
||||
mov r14, QWORD PTR [r10+r11+8]
|
||||
|
||||
movd eax, xmm6
|
||||
movd edx, xmm7
|
||||
pextrd r9d, xmm7, 2
|
||||
|
||||
CryptonightR_template_part2:
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor rsp, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov rax, r13
|
||||
mul r12
|
||||
|
||||
mov r9d, r10d
|
||||
mov r12d, r10d
|
||||
xor r9d, 16
|
||||
xor r12d, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
||||
movaps xmm3, xmm1
|
||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqa xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm1, xmm2
|
||||
pxor xmm5, xmm0
|
||||
pxor xmm5, xmm1
|
||||
paddq xmm3, xmm4
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqu XMMWORD PTR [r12+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
add r15, rax
|
||||
add rsp, rdx
|
||||
xor r10, 48
|
||||
mov QWORD PTR [r10+r11], rsp
|
||||
xor rsp, r13
|
||||
mov r9d, esp
|
||||
mov QWORD PTR [r10+r11+8], r15
|
||||
and r9d, 2097136
|
||||
xor r15, r14
|
||||
movdqa xmm6, xmm5
|
||||
dec r8d
|
||||
jnz CryptonightR_template_mainloop
|
||||
|
||||
CryptonightR_template_part3:
|
||||
movd rsp, xmm9
|
||||
|
||||
mov rbx, QWORD PTR [rsp+136]
|
||||
mov rbp, QWORD PTR [rsp+144]
|
||||
mov rsi, QWORD PTR [rsp+152]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
||||
movaps xmm9, XMMWORD PTR [rsp]
|
||||
add rsp, 64
|
||||
pop rdi
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
ret 0
|
||||
CryptonightR_template_end:
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_template_double_part1:
|
||||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 320
|
||||
mov r14, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r14, QWORD PTR [rcx]
|
||||
mov r12, QWORD PTR [rcx+40]
|
||||
mov ebx, r14d
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
and ebx, 2097136
|
||||
xor r12, QWORD PTR [rcx+8]
|
||||
mov rcx, QWORD PTR [rcx+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov r15, QWORD PTR [rdx+32]
|
||||
xor r15, QWORD PTR [rdx]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
mov r13, QWORD PTR [rdx+40]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
xor r13, QWORD PTR [rdx+8]
|
||||
movaps XMMWORD PTR [rsp+160], xmm6
|
||||
movaps XMMWORD PTR [rsp+176], xmm7
|
||||
movaps XMMWORD PTR [rsp+192], xmm8
|
||||
movaps XMMWORD PTR [rsp+208], xmm9
|
||||
movaps XMMWORD PTR [rsp+224], xmm10
|
||||
movaps XMMWORD PTR [rsp+240], xmm11
|
||||
movaps XMMWORD PTR [rsp+256], xmm12
|
||||
movaps XMMWORD PTR [rsp+272], xmm13
|
||||
movaps XMMWORD PTR [rsp+288], xmm14
|
||||
movaps XMMWORD PTR [rsp+304], xmm15
|
||||
movd xmm7, rax
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
|
||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
||||
movaps xmm2, XMMWORD PTR [r8+96]
|
||||
movaps XMMWORD PTR [rsp], xmm1
|
||||
movaps XMMWORD PTR [rsp+16], xmm2
|
||||
|
||||
mov r8d, r15d
|
||||
punpcklqdq xmm7, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movd xmm9, rax
|
||||
mov QWORD PTR [rsp+128], rsi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
punpcklqdq xmm9, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+88]
|
||||
xor rcx, QWORD PTR [rdx+72]
|
||||
movd xmm8, rax
|
||||
mov QWORD PTR [rsp+136], rdi
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
and r8d, 2097136
|
||||
movd xmm0, rcx
|
||||
mov r11d, 524288
|
||||
movd xmm10, rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
movd xmm14, QWORD PTR [rsp+128]
|
||||
movd xmm15, QWORD PTR [rsp+136]
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightR_template_double_mainloop:
|
||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
||||
movd xmm0, r12
|
||||
mov ecx, ebx
|
||||
movd xmm3, r14
|
||||
punpcklqdq xmm3, xmm0
|
||||
xor ebx, 16
|
||||
aesenc xmm6, xmm3
|
||||
movd xmm4, r15
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
xor ebx, 48
|
||||
paddq xmm0, xmm7
|
||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
||||
paddq xmm1, xmm3
|
||||
xor ebx, 16
|
||||
mov eax, ebx
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
movd rdx, xmm6
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
||||
paddq xmm0, xmm9
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
||||
mov esi, edx
|
||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
||||
and esi, 2097136
|
||||
mov ecx, r8d
|
||||
movd xmm0, r13
|
||||
punpcklqdq xmm4, xmm0
|
||||
xor r8d, 16
|
||||
aesenc xmm5, xmm4
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
xor r8d, 48
|
||||
paddq xmm0, xmm8
|
||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm1
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
||||
paddq xmm1, xmm4
|
||||
xor r8d, 16
|
||||
mov eax, r8d
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
||||
movd rdi, xmm5
|
||||
movd rcx, xmm14
|
||||
mov ebp, edi
|
||||
mov r8, QWORD PTR [rcx+rsi]
|
||||
mov r10, QWORD PTR [rcx+rsi+8]
|
||||
lea r9, QWORD PTR [rcx+rsi]
|
||||
xor esi, 16
|
||||
|
||||
movd xmm0, rsp
|
||||
movd xmm1, rsi
|
||||
movd xmm2, rdi
|
||||
movd xmm11, rbp
|
||||
movd xmm12, r15
|
||||
movd xmm13, rdx
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp+16]
|
||||
mov esi, DWORD PTR [rsp+20]
|
||||
mov edi, DWORD PTR [rsp+24]
|
||||
mov ebp, DWORD PTR [rsp+28]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r8, rax
|
||||
|
||||
movd esp, xmm3
|
||||
pextrd r15d, xmm3, 2
|
||||
movd eax, xmm7
|
||||
movd edx, xmm9
|
||||
pextrd r9d, xmm9, 2
|
||||
|
||||
CryptonightR_template_double_part2:
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r14, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r12, rax
|
||||
|
||||
movd rsp, xmm0
|
||||
mov DWORD PTR [rsp+16], ebx
|
||||
mov DWORD PTR [rsp+20], esi
|
||||
mov DWORD PTR [rsp+24], edi
|
||||
mov DWORD PTR [rsp+28], ebp
|
||||
|
||||
movd rsi, xmm1
|
||||
movd rdi, xmm2
|
||||
movd rbp, xmm11
|
||||
movd r15, xmm12
|
||||
movd rdx, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rbx, r8
|
||||
mov rax, r8
|
||||
mul rdx
|
||||
and ebp, 2097136
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
xor esi, 48
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
||||
pxor xmm6, xmm2
|
||||
paddq xmm2, xmm3
|
||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
||||
xor esi, 16
|
||||
mov eax, esi
|
||||
mov rsi, rcx
|
||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
||||
pxor xmm6, xmm0
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
||||
paddq xmm0, xmm9
|
||||
add r12, r8
|
||||
xor rax, 32
|
||||
add r14, rdx
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
mov QWORD PTR [r9+8], r12
|
||||
xor r12, r10
|
||||
mov QWORD PTR [r9], r14
|
||||
movd rcx, xmm15
|
||||
xor r14, rbx
|
||||
mov r10d, ebp
|
||||
mov ebx, r14d
|
||||
xor ebp, 16
|
||||
and ebx, 2097136
|
||||
mov r8, QWORD PTR [r10+rcx]
|
||||
mov r9, QWORD PTR [r10+rcx+8]
|
||||
|
||||
movd xmm0, rsp
|
||||
movd xmm1, rbx
|
||||
movd xmm2, rsi
|
||||
movd xmm11, rdi
|
||||
movd xmm12, rbp
|
||||
movd xmm13, r15
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp]
|
||||
mov esi, DWORD PTR [rsp+4]
|
||||
mov edi, DWORD PTR [rsp+8]
|
||||
mov ebp, DWORD PTR [rsp+12]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
|
||||
xor r8, rax
|
||||
movd xmm3, r8
|
||||
|
||||
movd esp, xmm4
|
||||
pextrd r15d, xmm4, 2
|
||||
movd eax, xmm8
|
||||
movd edx, xmm10
|
||||
pextrd r9d, xmm10, 2
|
||||
|
||||
CryptonightR_template_double_part3:
|
||||
|
||||
movd r15, xmm13
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r13, rax
|
||||
|
||||
movd rsp, xmm0
|
||||
mov DWORD PTR [rsp], ebx
|
||||
mov DWORD PTR [rsp+4], esi
|
||||
mov DWORD PTR [rsp+8], edi
|
||||
mov DWORD PTR [rsp+12], ebp
|
||||
|
||||
movd rbx, xmm1
|
||||
movd rsi, xmm2
|
||||
movd rdi, xmm11
|
||||
movd rbp, xmm12
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rax, r8
|
||||
mul rdi
|
||||
mov rdi, rcx
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm1
|
||||
xor ebp, 48
|
||||
paddq xmm1, xmm8
|
||||
add r13, r8
|
||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm2
|
||||
add r15, rdx
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
||||
paddq xmm2, xmm4
|
||||
xor ebp, 16
|
||||
mov eax, ebp
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
movd rax, xmm3
|
||||
movdqa xmm10, xmm8
|
||||
mov QWORD PTR [r10+rcx], r15
|
||||
movdqa xmm8, xmm5
|
||||
xor r15, rax
|
||||
mov QWORD PTR [r10+rcx+8], r13
|
||||
mov r8d, r15d
|
||||
xor r13, r9
|
||||
and r8d, 2097136
|
||||
dec r11d
|
||||
jnz CryptonightR_template_double_mainloop
|
||||
|
||||
CryptonightR_template_double_part4:
|
||||
|
||||
mov rbx, QWORD PTR [rsp+400]
|
||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
||||
add rsp, 320
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
ret 0
|
||||
CryptonightR_template_double_end:
|
266
src/crypto/asm/win/CryptonightWOW_soft_aes_template.inc
Normal file
266
src/crypto/asm/win/CryptonightWOW_soft_aes_template.inc
Normal file
|
@ -0,0 +1,266 @@
|
|||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movd xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movd xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movd xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movd xmm10, QWORD PTR [r10+96]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movd xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movd xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movd xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movd r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movd rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movd rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
paddq xmm1, xmm7
|
||||
movd xmm0, rax
|
||||
movd xmm3, rdx
|
||||
xor rax, QWORD PTR [r11+rcx+8]
|
||||
xor rdx, QWORD PTR [rcx+r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm2, xmm3
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightWOW_soft_aes_template_end):
|
266
src/crypto/asm/win/CryptonightWOW_soft_aes_template_win.inc
Normal file
266
src/crypto/asm/win/CryptonightWOW_soft_aes_template_win.inc
Normal file
|
@ -0,0 +1,266 @@
|
|||
PUBLIC CryptonightWOW_soft_aes_template_part1
|
||||
PUBLIC CryptonightWOW_soft_aes_template_mainloop
|
||||
PUBLIC CryptonightWOW_soft_aes_template_part2
|
||||
PUBLIC CryptonightWOW_soft_aes_template_part3
|
||||
PUBLIC CryptonightWOW_soft_aes_template_end
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_soft_aes_template_part1:
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movd xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movd xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movd xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movd xmm10, QWORD PTR [r10+96]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movd xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_soft_aes_template_mainloop:
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movd xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movd xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movd r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movd rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movd rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
|
||||
CryptonightWOW_soft_aes_template_part2:
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
paddq xmm1, xmm7
|
||||
movd xmm0, rax
|
||||
movd xmm3, rdx
|
||||
xor rax, QWORD PTR [r11+rcx+8]
|
||||
xor rdx, QWORD PTR [rcx+r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm2, xmm3
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne CryptonightWOW_soft_aes_template_mainloop
|
||||
|
||||
CryptonightWOW_soft_aes_template_part3:
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
CryptonightWOW_soft_aes_template_end:
|
486
src/crypto/asm/win/CryptonightWOW_template.inc
Normal file
486
src/crypto/asm/win/CryptonightWOW_template.inc
Normal file
|
@ -0,0 +1,486 @@
|
|||
PUBLIC FN_PREFIX(CryptonightWOW_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_end)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4)
|
||||
PUBLIC FN_PREFIX(CryptonightWOW_template_double_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_template_part1):
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rdi
|
||||
sub rsp, 64
|
||||
mov r12, rcx
|
||||
mov r8, QWORD PTR [r12+32]
|
||||
mov rdx, r12
|
||||
xor r8, QWORD PTR [r12]
|
||||
mov r15, QWORD PTR [r12+40]
|
||||
mov r9, r8
|
||||
xor r15, QWORD PTR [r12+8]
|
||||
mov r11, QWORD PTR [r12+224]
|
||||
mov r12, QWORD PTR [r12+56]
|
||||
xor r12, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movd xmm0, r12
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
movaps XMMWORD PTR [rsp], xmm9
|
||||
mov r12, QWORD PTR [rdx+88]
|
||||
xor r12, QWORD PTR [rdx+72]
|
||||
movd xmm6, rax
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
and r9d, 2097136
|
||||
movd xmm0, r12
|
||||
movd xmm7, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r10d, r9d
|
||||
movd xmm9, rsp
|
||||
mov rsp, r8
|
||||
mov r8d, 524288
|
||||
|
||||
mov ebx, [rdx+96]
|
||||
mov esi, [rdx+100]
|
||||
mov edi, [rdx+104]
|
||||
mov ebp, [rdx+108]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_template_mainloop):
|
||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
||||
movd xmm0, r15
|
||||
movd xmm4, rsp
|
||||
punpcklqdq xmm4, xmm0
|
||||
lea rdx, QWORD PTR [r9+r11]
|
||||
|
||||
aesenc xmm5, xmm4
|
||||
movd r10d, xmm5
|
||||
and r10d, 2097136
|
||||
|
||||
mov r12d, r9d
|
||||
mov eax, r9d
|
||||
xor r9d, 48
|
||||
xor r12d, 16
|
||||
xor eax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm1, xmm4
|
||||
movdqu XMMWORD PTR [r12+r11], xmm0
|
||||
movd r12, xmm5
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
|
||||
lea r13d, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or r13, rdx
|
||||
|
||||
xor r13, QWORD PTR [r10+r11]
|
||||
mov r14, QWORD PTR [r10+r11+8]
|
||||
|
||||
movd eax, xmm6
|
||||
movd edx, xmm7
|
||||
pextrd r9d, xmm7, 2
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_part2):
|
||||
mov rax, r13
|
||||
mul r12
|
||||
movd xmm0, rax
|
||||
movd xmm3, rdx
|
||||
punpcklqdq xmm3, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
mov r12d, r10d
|
||||
xor r9d, 16
|
||||
xor r12d, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
||||
xor rdx, QWORD PTR [r12+r11]
|
||||
xor rax, QWORD PTR [r11+r12+8]
|
||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
||||
pxor xmm3, xmm2
|
||||
paddq xmm7, XMMWORD PTR [r10+r11]
|
||||
paddq xmm1, xmm4
|
||||
paddq xmm3, xmm6
|
||||
movdqu XMMWORD PTR [r9+r11], xmm7
|
||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
add r15, rax
|
||||
add rsp, rdx
|
||||
xor r10, 48
|
||||
mov QWORD PTR [r10+r11], rsp
|
||||
xor rsp, r13
|
||||
mov r9d, esp
|
||||
mov QWORD PTR [r10+r11+8], r15
|
||||
and r9d, 2097136
|
||||
xor r15, r14
|
||||
movdqa xmm6, xmm5
|
||||
dec r8d
|
||||
jnz FN_PREFIX(CryptonightWOW_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_part3):
|
||||
movd rsp, xmm9
|
||||
|
||||
mov rbx, QWORD PTR [rsp+136]
|
||||
mov rbp, QWORD PTR [rsp+144]
|
||||
mov rsi, QWORD PTR [rsp+152]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
||||
movaps xmm9, XMMWORD PTR [rsp]
|
||||
add rsp, 64
|
||||
pop rdi
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightWOW_template_end):
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_template_double_part1):
|
||||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 320
|
||||
mov r14, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r14, QWORD PTR [rcx]
|
||||
mov r12, QWORD PTR [rcx+40]
|
||||
mov ebx, r14d
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
and ebx, 2097136
|
||||
xor r12, QWORD PTR [rcx+8]
|
||||
mov rcx, QWORD PTR [rcx+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov r15, QWORD PTR [rdx+32]
|
||||
xor r15, QWORD PTR [rdx]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
mov r13, QWORD PTR [rdx+40]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
xor r13, QWORD PTR [rdx+8]
|
||||
movaps XMMWORD PTR [rsp+160], xmm6
|
||||
movaps XMMWORD PTR [rsp+176], xmm7
|
||||
movaps XMMWORD PTR [rsp+192], xmm8
|
||||
movaps XMMWORD PTR [rsp+208], xmm9
|
||||
movaps XMMWORD PTR [rsp+224], xmm10
|
||||
movaps XMMWORD PTR [rsp+240], xmm11
|
||||
movaps XMMWORD PTR [rsp+256], xmm12
|
||||
movaps XMMWORD PTR [rsp+272], xmm13
|
||||
movaps XMMWORD PTR [rsp+288], xmm14
|
||||
movaps XMMWORD PTR [rsp+304], xmm15
|
||||
movd xmm7, rax
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
|
||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
||||
movaps xmm2, XMMWORD PTR [r8+96]
|
||||
movaps XMMWORD PTR [rsp], xmm1
|
||||
movaps XMMWORD PTR [rsp+16], xmm2
|
||||
|
||||
mov r8d, r15d
|
||||
punpcklqdq xmm7, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movd xmm9, rax
|
||||
mov QWORD PTR [rsp+128], rsi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
punpcklqdq xmm9, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+88]
|
||||
xor rcx, QWORD PTR [rdx+72]
|
||||
movd xmm8, rax
|
||||
mov QWORD PTR [rsp+136], rdi
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
and r8d, 2097136
|
||||
movd xmm0, rcx
|
||||
mov r11d, 524288
|
||||
movd xmm10, rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
movd xmm14, QWORD PTR [rsp+128]
|
||||
movd xmm15, QWORD PTR [rsp+136]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightWOW_template_double_mainloop):
|
||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
||||
movd xmm0, r12
|
||||
mov ecx, ebx
|
||||
movd xmm3, r14
|
||||
punpcklqdq xmm3, xmm0
|
||||
xor ebx, 16
|
||||
aesenc xmm6, xmm3
|
||||
movd rdx, xmm6
|
||||
movd xmm4, r15
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
xor ebx, 48
|
||||
paddq xmm0, xmm7
|
||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
||||
paddq xmm1, xmm3
|
||||
xor ebx, 16
|
||||
mov eax, ebx
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
||||
paddq xmm0, xmm9
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
||||
mov esi, edx
|
||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
||||
and esi, 2097136
|
||||
mov ecx, r8d
|
||||
movd xmm0, r13
|
||||
punpcklqdq xmm4, xmm0
|
||||
xor r8d, 16
|
||||
aesenc xmm5, xmm4
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
xor r8d, 48
|
||||
paddq xmm0, xmm8
|
||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
||||
paddq xmm1, xmm4
|
||||
xor r8d, 16
|
||||
mov eax, r8d
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
||||
movd rdi, xmm5
|
||||
movd rcx, xmm14
|
||||
mov ebp, edi
|
||||
mov r8, QWORD PTR [rcx+rsi]
|
||||
mov r10, QWORD PTR [rcx+rsi+8]
|
||||
lea r9, QWORD PTR [rcx+rsi]
|
||||
xor esi, 16
|
||||
|
||||
movd xmm0, rsp
|
||||
movd xmm1, rsi
|
||||
movd xmm2, rdi
|
||||
movd xmm11, rbp
|
||||
movd xmm12, r15
|
||||
movd xmm13, rdx
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp+16]
|
||||
mov esi, DWORD PTR [rsp+20]
|
||||
mov edi, DWORD PTR [rsp+24]
|
||||
mov ebp, DWORD PTR [rsp+28]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r8, rax
|
||||
|
||||
movd esp, xmm3
|
||||
pextrd r15d, xmm3, 2
|
||||
movd eax, xmm7
|
||||
movd edx, xmm9
|
||||
pextrd r9d, xmm9, 2
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_double_part2):
|
||||
|
||||
movd rsp, xmm0
|
||||
mov DWORD PTR [rsp+16], ebx
|
||||
mov DWORD PTR [rsp+20], esi
|
||||
mov DWORD PTR [rsp+24], edi
|
||||
mov DWORD PTR [rsp+28], ebp
|
||||
|
||||
movd rsi, xmm1
|
||||
movd rdi, xmm2
|
||||
movd rbp, xmm11
|
||||
movd r15, xmm12
|
||||
movd rdx, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rbx, r8
|
||||
mov rax, r8
|
||||
mul rdx
|
||||
and ebp, 2097136
|
||||
mov r8, rax
|
||||
movd xmm1, rdx
|
||||
movd xmm0, r8
|
||||
punpcklqdq xmm1, xmm0
|
||||
pxor xmm1, XMMWORD PTR [rcx+rsi]
|
||||
xor esi, 48
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
||||
xor rdx, QWORD PTR [rsi+rcx]
|
||||
paddq xmm2, xmm3
|
||||
xor r8, QWORD PTR [rsi+rcx+8]
|
||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
||||
xor esi, 16
|
||||
mov eax, esi
|
||||
mov rsi, rcx
|
||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
||||
paddq xmm0, xmm9
|
||||
add r12, r8
|
||||
xor rax, 32
|
||||
add r14, rdx
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
mov QWORD PTR [r9+8], r12
|
||||
xor r12, r10
|
||||
mov QWORD PTR [r9], r14
|
||||
movd rcx, xmm15
|
||||
xor r14, rbx
|
||||
mov r10d, ebp
|
||||
mov ebx, r14d
|
||||
xor ebp, 16
|
||||
and ebx, 2097136
|
||||
mov r8, QWORD PTR [r10+rcx]
|
||||
mov r9, QWORD PTR [r10+rcx+8]
|
||||
|
||||
movd xmm0, rsp
|
||||
movd xmm1, rbx
|
||||
movd xmm2, rsi
|
||||
movd xmm11, rdi
|
||||
movd xmm12, rbp
|
||||
movd xmm13, r15
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp]
|
||||
mov esi, DWORD PTR [rsp+4]
|
||||
mov edi, DWORD PTR [rsp+8]
|
||||
mov ebp, DWORD PTR [rsp+12]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
|
||||
xor r8, rax
|
||||
movd xmm3, r8
|
||||
|
||||
movd esp, xmm4
|
||||
pextrd r15d, xmm4, 2
|
||||
movd eax, xmm8
|
||||
movd edx, xmm10
|
||||
pextrd r9d, xmm10, 2
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_double_part3):
|
||||
|
||||
movd rsp, xmm0
|
||||
mov DWORD PTR [rsp], ebx
|
||||
mov DWORD PTR [rsp+4], esi
|
||||
mov DWORD PTR [rsp+8], edi
|
||||
mov DWORD PTR [rsp+12], ebp
|
||||
|
||||
movd rbx, xmm1
|
||||
movd rsi, xmm2
|
||||
movd rdi, xmm11
|
||||
movd rbp, xmm12
|
||||
movd r15, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rax, r8
|
||||
mul rdi
|
||||
movd xmm1, rdx
|
||||
movd xmm0, rax
|
||||
punpcklqdq xmm1, xmm0
|
||||
mov rdi, rcx
|
||||
mov r8, rax
|
||||
pxor xmm1, XMMWORD PTR [rbp+rcx]
|
||||
xor ebp, 48
|
||||
paddq xmm1, xmm8
|
||||
xor r8, QWORD PTR [rbp+rcx+8]
|
||||
xor rdx, QWORD PTR [rbp+rcx]
|
||||
add r13, r8
|
||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
||||
add r15, rdx
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
||||
paddq xmm2, xmm4
|
||||
xor ebp, 16
|
||||
mov eax, ebp
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
movd rax, xmm3
|
||||
movdqa xmm10, xmm8
|
||||
mov QWORD PTR [r10+rcx], r15
|
||||
movdqa xmm8, xmm5
|
||||
xor r15, rax
|
||||
mov QWORD PTR [r10+rcx+8], r13
|
||||
mov r8d, r15d
|
||||
xor r13, r9
|
||||
and r8d, 2097136
|
||||
dec r11d
|
||||
jnz FN_PREFIX(CryptonightWOW_template_double_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightWOW_template_double_part4):
|
||||
|
||||
mov rbx, QWORD PTR [rsp+400]
|
||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
||||
add rsp, 320
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightWOW_template_double_end):
|
486
src/crypto/asm/win/CryptonightWOW_template_win.inc
Normal file
486
src/crypto/asm/win/CryptonightWOW_template_win.inc
Normal file
|
@ -0,0 +1,486 @@
|
|||
PUBLIC CryptonightWOW_template_part1
|
||||
PUBLIC CryptonightWOW_template_mainloop
|
||||
PUBLIC CryptonightWOW_template_part2
|
||||
PUBLIC CryptonightWOW_template_part3
|
||||
PUBLIC CryptonightWOW_template_end
|
||||
PUBLIC CryptonightWOW_template_double_part1
|
||||
PUBLIC CryptonightWOW_template_double_mainloop
|
||||
PUBLIC CryptonightWOW_template_double_part2
|
||||
PUBLIC CryptonightWOW_template_double_part3
|
||||
PUBLIC CryptonightWOW_template_double_part4
|
||||
PUBLIC CryptonightWOW_template_double_end
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_template_part1:
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rdi
|
||||
sub rsp, 64
|
||||
mov r12, rcx
|
||||
mov r8, QWORD PTR [r12+32]
|
||||
mov rdx, r12
|
||||
xor r8, QWORD PTR [r12]
|
||||
mov r15, QWORD PTR [r12+40]
|
||||
mov r9, r8
|
||||
xor r15, QWORD PTR [r12+8]
|
||||
mov r11, QWORD PTR [r12+224]
|
||||
mov r12, QWORD PTR [r12+56]
|
||||
xor r12, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movd xmm0, r12
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
movaps XMMWORD PTR [rsp], xmm9
|
||||
mov r12, QWORD PTR [rdx+88]
|
||||
xor r12, QWORD PTR [rdx+72]
|
||||
movd xmm6, rax
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
and r9d, 2097136
|
||||
movd xmm0, r12
|
||||
movd xmm7, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r10d, r9d
|
||||
movd xmm9, rsp
|
||||
mov rsp, r8
|
||||
mov r8d, 524288
|
||||
|
||||
mov ebx, [rdx+96]
|
||||
mov esi, [rdx+100]
|
||||
mov edi, [rdx+104]
|
||||
mov ebp, [rdx+108]
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_template_mainloop:
|
||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
||||
movd xmm0, r15
|
||||
movd xmm4, rsp
|
||||
punpcklqdq xmm4, xmm0
|
||||
lea rdx, QWORD PTR [r9+r11]
|
||||
|
||||
aesenc xmm5, xmm4
|
||||
movd r10d, xmm5
|
||||
and r10d, 2097136
|
||||
|
||||
mov r12d, r9d
|
||||
mov eax, r9d
|
||||
xor r9d, 48
|
||||
xor r12d, 16
|
||||
xor eax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm2, XMMWORD PTR [r12+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm1, xmm4
|
||||
movdqu XMMWORD PTR [r12+r11], xmm0
|
||||
movd r12, xmm5
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
|
||||
lea r13d, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or r13, rdx
|
||||
|
||||
xor r13, QWORD PTR [r10+r11]
|
||||
mov r14, QWORD PTR [r10+r11+8]
|
||||
|
||||
movd eax, xmm6
|
||||
movd edx, xmm7
|
||||
pextrd r9d, xmm7, 2
|
||||
|
||||
CryptonightWOW_template_part2:
|
||||
mov rax, r13
|
||||
mul r12
|
||||
movd xmm0, rax
|
||||
movd xmm3, rdx
|
||||
punpcklqdq xmm3, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
mov r12d, r10d
|
||||
xor r9d, 16
|
||||
xor r12d, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
||||
xor rdx, QWORD PTR [r12+r11]
|
||||
xor rax, QWORD PTR [r11+r12+8]
|
||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
||||
pxor xmm3, xmm2
|
||||
paddq xmm7, XMMWORD PTR [r10+r11]
|
||||
paddq xmm1, xmm4
|
||||
paddq xmm3, xmm6
|
||||
movdqu XMMWORD PTR [r9+r11], xmm7
|
||||
movdqu XMMWORD PTR [r12+r11], xmm3
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
add r15, rax
|
||||
add rsp, rdx
|
||||
xor r10, 48
|
||||
mov QWORD PTR [r10+r11], rsp
|
||||
xor rsp, r13
|
||||
mov r9d, esp
|
||||
mov QWORD PTR [r10+r11+8], r15
|
||||
and r9d, 2097136
|
||||
xor r15, r14
|
||||
movdqa xmm6, xmm5
|
||||
dec r8d
|
||||
jnz CryptonightWOW_template_mainloop
|
||||
|
||||
CryptonightWOW_template_part3:
|
||||
movd rsp, xmm9
|
||||
|
||||
mov rbx, QWORD PTR [rsp+136]
|
||||
mov rbp, QWORD PTR [rsp+144]
|
||||
mov rsi, QWORD PTR [rsp+152]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
||||
movaps xmm9, XMMWORD PTR [rsp]
|
||||
add rsp, 64
|
||||
pop rdi
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
ret 0
|
||||
CryptonightWOW_template_end:
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_template_double_part1:
|
||||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 320
|
||||
mov r14, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r14, QWORD PTR [rcx]
|
||||
mov r12, QWORD PTR [rcx+40]
|
||||
mov ebx, r14d
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
and ebx, 2097136
|
||||
xor r12, QWORD PTR [rcx+8]
|
||||
mov rcx, QWORD PTR [rcx+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov r15, QWORD PTR [rdx+32]
|
||||
xor r15, QWORD PTR [rdx]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
mov r13, QWORD PTR [rdx+40]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
xor r13, QWORD PTR [rdx+8]
|
||||
movaps XMMWORD PTR [rsp+160], xmm6
|
||||
movaps XMMWORD PTR [rsp+176], xmm7
|
||||
movaps XMMWORD PTR [rsp+192], xmm8
|
||||
movaps XMMWORD PTR [rsp+208], xmm9
|
||||
movaps XMMWORD PTR [rsp+224], xmm10
|
||||
movaps XMMWORD PTR [rsp+240], xmm11
|
||||
movaps XMMWORD PTR [rsp+256], xmm12
|
||||
movaps XMMWORD PTR [rsp+272], xmm13
|
||||
movaps XMMWORD PTR [rsp+288], xmm14
|
||||
movaps XMMWORD PTR [rsp+304], xmm15
|
||||
movd xmm7, rax
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
|
||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
||||
movaps xmm2, XMMWORD PTR [r8+96]
|
||||
movaps XMMWORD PTR [rsp], xmm1
|
||||
movaps XMMWORD PTR [rsp+16], xmm2
|
||||
|
||||
mov r8d, r15d
|
||||
punpcklqdq xmm7, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movd xmm9, rax
|
||||
mov QWORD PTR [rsp+128], rsi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
punpcklqdq xmm9, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+88]
|
||||
xor rcx, QWORD PTR [rdx+72]
|
||||
movd xmm8, rax
|
||||
mov QWORD PTR [rsp+136], rdi
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
and r8d, 2097136
|
||||
movd xmm0, rcx
|
||||
mov r11d, 524288
|
||||
movd xmm10, rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
movd xmm14, QWORD PTR [rsp+128]
|
||||
movd xmm15, QWORD PTR [rsp+136]
|
||||
|
||||
ALIGN(64)
|
||||
CryptonightWOW_template_double_mainloop:
|
||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
||||
movd xmm0, r12
|
||||
mov ecx, ebx
|
||||
movd xmm3, r14
|
||||
punpcklqdq xmm3, xmm0
|
||||
xor ebx, 16
|
||||
aesenc xmm6, xmm3
|
||||
movd rdx, xmm6
|
||||
movd xmm4, r15
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
xor ebx, 48
|
||||
paddq xmm0, xmm7
|
||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
||||
paddq xmm1, xmm3
|
||||
xor ebx, 16
|
||||
mov eax, ebx
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
||||
paddq xmm0, xmm9
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
||||
mov esi, edx
|
||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
||||
and esi, 2097136
|
||||
mov ecx, r8d
|
||||
movd xmm0, r13
|
||||
punpcklqdq xmm4, xmm0
|
||||
xor r8d, 16
|
||||
aesenc xmm5, xmm4
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
xor r8d, 48
|
||||
paddq xmm0, xmm8
|
||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
||||
paddq xmm1, xmm4
|
||||
xor r8d, 16
|
||||
mov eax, r8d
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
||||
movd rdi, xmm5
|
||||
movd rcx, xmm14
|
||||
mov ebp, edi
|
||||
mov r8, QWORD PTR [rcx+rsi]
|
||||
mov r10, QWORD PTR [rcx+rsi+8]
|
||||
lea r9, QWORD PTR [rcx+rsi]
|
||||
xor esi, 16
|
||||
|
||||
movd xmm0, rsp
|
||||
movd xmm1, rsi
|
||||
movd xmm2, rdi
|
||||
movd xmm11, rbp
|
||||
movd xmm12, r15
|
||||
movd xmm13, rdx
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp+16]
|
||||
mov esi, DWORD PTR [rsp+20]
|
||||
mov edi, DWORD PTR [rsp+24]
|
||||
mov ebp, DWORD PTR [rsp+28]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r8, rax
|
||||
|
||||
movd esp, xmm3
|
||||
pextrd r15d, xmm3, 2
|
||||
movd eax, xmm7
|
||||
movd edx, xmm9
|
||||
pextrd r9d, xmm9, 2
|
||||
|
||||
CryptonightWOW_template_double_part2:
|
||||
|
||||
movd rsp, xmm0
|
||||
mov DWORD PTR [rsp+16], ebx
|
||||
mov DWORD PTR [rsp+20], esi
|
||||
mov DWORD PTR [rsp+24], edi
|
||||
mov DWORD PTR [rsp+28], ebp
|
||||
|
||||
movd rsi, xmm1
|
||||
movd rdi, xmm2
|
||||
movd rbp, xmm11
|
||||
movd r15, xmm12
|
||||
movd rdx, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rbx, r8
|
||||
mov rax, r8
|
||||
mul rdx
|
||||
and ebp, 2097136
|
||||
mov r8, rax
|
||||
movd xmm1, rdx
|
||||
movd xmm0, r8
|
||||
punpcklqdq xmm1, xmm0
|
||||
pxor xmm1, XMMWORD PTR [rcx+rsi]
|
||||
xor esi, 48
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
||||
xor rdx, QWORD PTR [rsi+rcx]
|
||||
paddq xmm2, xmm3
|
||||
xor r8, QWORD PTR [rsi+rcx+8]
|
||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
||||
xor esi, 16
|
||||
mov eax, esi
|
||||
mov rsi, rcx
|
||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
||||
paddq xmm0, xmm9
|
||||
add r12, r8
|
||||
xor rax, 32
|
||||
add r14, rdx
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
mov QWORD PTR [r9+8], r12
|
||||
xor r12, r10
|
||||
mov QWORD PTR [r9], r14
|
||||
movd rcx, xmm15
|
||||
xor r14, rbx
|
||||
mov r10d, ebp
|
||||
mov ebx, r14d
|
||||
xor ebp, 16
|
||||
and ebx, 2097136
|
||||
mov r8, QWORD PTR [r10+rcx]
|
||||
mov r9, QWORD PTR [r10+rcx+8]
|
||||
|
||||
movd xmm0, rsp
|
||||
movd xmm1, rbx
|
||||
movd xmm2, rsi
|
||||
movd xmm11, rdi
|
||||
movd xmm12, rbp
|
||||
movd xmm13, r15
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp]
|
||||
mov esi, DWORD PTR [rsp+4]
|
||||
mov edi, DWORD PTR [rsp+8]
|
||||
mov ebp, DWORD PTR [rsp+12]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
|
||||
xor r8, rax
|
||||
movd xmm3, r8
|
||||
|
||||
movd esp, xmm4
|
||||
pextrd r15d, xmm4, 2
|
||||
movd eax, xmm8
|
||||
movd edx, xmm10
|
||||
pextrd r9d, xmm10, 2
|
||||
|
||||
CryptonightWOW_template_double_part3:
|
||||
|
||||
movd rsp, xmm0
|
||||
mov DWORD PTR [rsp], ebx
|
||||
mov DWORD PTR [rsp+4], esi
|
||||
mov DWORD PTR [rsp+8], edi
|
||||
mov DWORD PTR [rsp+12], ebp
|
||||
|
||||
movd rbx, xmm1
|
||||
movd rsi, xmm2
|
||||
movd rdi, xmm11
|
||||
movd rbp, xmm12
|
||||
movd r15, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rax, r8
|
||||
mul rdi
|
||||
movd xmm1, rdx
|
||||
movd xmm0, rax
|
||||
punpcklqdq xmm1, xmm0
|
||||
mov rdi, rcx
|
||||
mov r8, rax
|
||||
pxor xmm1, XMMWORD PTR [rbp+rcx]
|
||||
xor ebp, 48
|
||||
paddq xmm1, xmm8
|
||||
xor r8, QWORD PTR [rbp+rcx+8]
|
||||
xor rdx, QWORD PTR [rbp+rcx]
|
||||
add r13, r8
|
||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
||||
add r15, rdx
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
||||
paddq xmm2, xmm4
|
||||
xor ebp, 16
|
||||
mov eax, ebp
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
movd rax, xmm3
|
||||
movdqa xmm10, xmm8
|
||||
mov QWORD PTR [r10+rcx], r15
|
||||
movdqa xmm8, xmm5
|
||||
xor r15, rax
|
||||
mov QWORD PTR [r10+rcx+8], r13
|
||||
mov r8d, r15d
|
||||
xor r13, r9
|
||||
and r8d, 2097136
|
||||
dec r11d
|
||||
jnz CryptonightWOW_template_double_mainloop
|
||||
|
||||
CryptonightWOW_template_double_part4:
|
||||
|
||||
mov rbx, QWORD PTR [rsp+400]
|
||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
||||
add rsp, 320
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
ret 0
|
||||
CryptonightWOW_template_double_end:
|
|
@ -21,6 +21,19 @@ PUBLIC cnv2_main_loop_ultralite_ryzen_asm
|
|||
PUBLIC cnv2_main_loop_ultralite_bulldozer_asm
|
||||
PUBLIC cnv2_double_main_loop_ultralite_sandybridge_asm
|
||||
|
||||
PUBLIC cnv2_main_loop_xcash_ivybridge_asm
|
||||
PUBLIC cnv2_main_loop_xcash_ryzen_asm
|
||||
PUBLIC cnv2_main_loop_xcash_bulldozer_asm
|
||||
PUBLIC cnv2_double_main_loop_xcash_sandybridge_asm
|
||||
|
||||
PUBLIC cnv2_main_loop_zelerius_ivybridge_asm
|
||||
PUBLIC cnv2_main_loop_zelerius_ryzen_asm
|
||||
PUBLIC cnv2_main_loop_zelerius_bulldozer_asm
|
||||
PUBLIC cnv2_double_main_loop_zelerius_sandybridge_asm
|
||||
|
||||
PUBLIC cnv2_main_loop_rwz_all_asm
|
||||
PUBLIC cnv2_double_main_loop_rwz_all_asm
|
||||
|
||||
PUBLIC cnv1_main_loop_soft_aes_sandybridge_asm
|
||||
PUBLIC cnv1_main_loop_lite_soft_aes_sandybridge_asm
|
||||
PUBLIC cnv1_main_loop_fast_soft_aes_sandybridge_asm
|
||||
|
@ -30,6 +43,8 @@ PUBLIC cnv1_main_loop_rto_soft_aes_sandybridge_asm
|
|||
PUBLIC cnv2_main_loop_soft_aes_sandybridge_asm
|
||||
PUBLIC cnv2_main_loop_fastv2_soft_aes_sandybridge_asm
|
||||
PUBLIC cnv2_main_loop_ultralite_soft_aes_sandybridge_asm
|
||||
PUBLIC cnv2_main_loop_xcash_soft_aes_sandybridge_asm
|
||||
PUBLIC cnv2_main_loop_zelerius_soft_aes_sandybridge_asm
|
||||
|
||||
ALIGN 64
|
||||
cnv1_main_loop_sandybridge_asm PROC
|
||||
|
@ -133,6 +148,66 @@ cnv2_double_main_loop_ultralite_sandybridge_asm PROC
|
|||
ret 0
|
||||
cnv2_double_main_loop_ultralite_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_xcash_ivybridge_asm PROC
|
||||
INCLUDE cnv2_main_loop_xcash_ivybridge.inc
|
||||
ret 0
|
||||
cnv2_main_loop_xcash_ivybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_xcash_ryzen_asm PROC
|
||||
INCLUDE cnv2_main_loop_xcash_ryzen.inc
|
||||
ret 0
|
||||
cnv2_main_loop_xcash_ryzen_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_xcash_bulldozer_asm PROC
|
||||
INCLUDE cnv2_main_loop_xcash_bulldozer.inc
|
||||
ret 0
|
||||
cnv2_main_loop_xcash_bulldozer_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_double_main_loop_xcash_sandybridge_asm PROC
|
||||
INCLUDE cnv2_double_main_loop_xcash_sandybridge.inc
|
||||
ret 0
|
||||
cnv2_double_main_loop_xcash_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_zelerius_ivybridge_asm PROC
|
||||
INCLUDE cnv2_main_loop_zelerius_ivybridge.inc
|
||||
ret 0
|
||||
cnv2_main_loop_zelerius_ivybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_zelerius_ryzen_asm PROC
|
||||
INCLUDE cnv2_main_loop_zelerius_ryzen.inc
|
||||
ret 0
|
||||
cnv2_main_loop_zelerius_ryzen_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_zelerius_bulldozer_asm PROC
|
||||
INCLUDE cnv2_main_loop_zelerius_bulldozer.inc
|
||||
ret 0
|
||||
cnv2_main_loop_zelerius_bulldozer_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_double_main_loop_zelerius_sandybridge_asm PROC
|
||||
INCLUDE cnv2_double_main_loop_zelerius_sandybridge.inc
|
||||
ret 0
|
||||
cnv2_double_main_loop_zelerius_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_rwz_all_asm PROC
|
||||
INCLUDE cnv2_main_loop_rwz_all.inc
|
||||
ret 0
|
||||
cnv2_main_loop_rwz_all_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_double_main_loop_rwz_all_asm PROC
|
||||
INCLUDE cnv2_double_main_loop_rwz_all.inc
|
||||
ret 0
|
||||
cnv2_double_main_loop_rwz_all_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv1_main_loop_soft_aes_sandybridge_asm PROC
|
||||
INCLUDE cnv1_main_loop_soft_aes_sandybridge.inc
|
||||
|
@ -181,5 +256,17 @@ cnv2_main_loop_ultralite_soft_aes_sandybridge_asm PROC
|
|||
ret 0
|
||||
cnv2_main_loop_ultralite_soft_aes_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_xcash_soft_aes_sandybridge_asm PROC
|
||||
INCLUDE cnv2_main_loop_xcash_soft_aes_sandybridge.inc
|
||||
ret 0
|
||||
cnv2_main_loop_xcash_soft_aes_sandybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
cnv2_main_loop_zelerius_soft_aes_sandybridge_asm PROC
|
||||
INCLUDE cnv2_main_loop_zelerius_soft_aes_sandybridge.inc
|
||||
ret 0
|
||||
cnv2_main_loop_zelerius_soft_aes_sandybridge_asm ENDP
|
||||
|
||||
_TEXT_CN_MAINLOOP ENDS
|
||||
END
|
|
@ -24,6 +24,19 @@
|
|||
.global FN_PREFIX(cnv2_main_loop_ultralite_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_ultralite_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_xcash_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_xcash_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_xcash_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_xcash_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_zelerius_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_zelerius_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm)
|
||||
|
||||
.global FN_PREFIX(cnv2_main_loop_rwz_allr_asm)
|
||||
.global FN_PREFIX(cnv2_double_main_loop_rwz_all_asm)
|
||||
|
||||
.global FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv1_main_loop_lite_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv1_main_loop_fast_soft_aes_sandybridge_asm)
|
||||
|
@ -33,6 +46,8 @@
|
|||
.global FN_PREFIX(cnv2_main_loop_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_fastv2_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_ultralite_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_xcash_soft_aes_sandybridge_asm)
|
||||
.global FN_PREFIX(cnv2_main_loop_zelerius_soft_aes_sandybridge_asm)
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv1_main_loop_sandybridge_asm):
|
||||
|
@ -119,6 +134,56 @@ FN_PREFIX(cnv2_double_main_loop_ultralite_sandybridge_asm):
|
|||
#include "../cnv2_double_main_loop_ultralite_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_xcash_ivybridge_asm):
|
||||
#include "../cnv2_main_loop_xcash_ivybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_xcash_ryzen_asm):
|
||||
#include "../cnv2_main_loop_xcash_ryzen.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_xcash_bulldozer_asm):
|
||||
#include "../cnv2_main_loop_xcash_bulldozer.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_double_main_loop_xcash_sandybridge_asm):
|
||||
#include "../cnv2_double_main_loop_xcash_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_zelerius_ivybridge_asm):
|
||||
#include "../cnv2_main_loop_zelerius_ivybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_zelerius_ryzen_asm):
|
||||
#include "../cnv2_main_loop_zelerius_ryzen.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_zelerius_bulldozer_asm):
|
||||
#include "../cnv2_main_loop_zelerius_bulldozer.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_double_main_loop_zelerius_sandybridge_asm):
|
||||
#include "../cnv2_double_main_loop_zelerius_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_rwz_allr_asm):
|
||||
#include "../cnv2_main_loop_rwz_allr.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_double_main_loop_rwz_all_asm):
|
||||
#include "../cnv2_double_main_loop_rwz_all.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv1_main_loop_soft_aes_sandybridge_asm):
|
||||
#include "../cnv1_main_loop_soft_aes_sandybridge.inc"
|
||||
|
@ -157,4 +222,14 @@ FN_PREFIX(cnv2_main_loop_fastv2_soft_aes_sandybridge_asm):
|
|||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_ultralite_soft_aes_sandybridge_asm):
|
||||
#include "../cnv2_main_loop_ultralite_soft_aes_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_xcash_soft_aes_sandybridge_asm):
|
||||
#include "../cnv2_main_loop_xcash_soft_aes_sandybridge.inc"
|
||||
ret 0
|
||||
|
||||
ALIGN 64
|
||||
FN_PREFIX(cnv2_main_loop_zelerius_soft_aes_sandybridge_asm):
|
||||
#include "../cnv2_main_loop_zelerius_soft_aes_sandybridge.inc"
|
||||
ret 0
|
410
src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc
Normal file
410
src/crypto/asm/win/cnv2_double_main_loop_rwz_all.inc
Normal file
|
@ -0,0 +1,410 @@
|
|||
mov rax, rsp
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 184
|
||||
|
||||
stmxcsr DWORD PTR [rsp+272]
|
||||
mov DWORD PTR [rsp+276], 24448
|
||||
ldmxcsr DWORD PTR [rsp+276]
|
||||
|
||||
mov r13, QWORD PTR [rcx+224]
|
||||
mov r9, rdx
|
||||
mov r10, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r10, QWORD PTR [rcx]
|
||||
mov r14d, 393216
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rsi, QWORD PTR [rdx+224]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
mov rbp, QWORD PTR [r9+40]
|
||||
xor rbp, QWORD PTR [r9+8]
|
||||
movd xmm0, rdx
|
||||
movaps XMMWORD PTR [rax-88], xmm6
|
||||
movaps XMMWORD PTR [rax-104], xmm7
|
||||
movaps XMMWORD PTR [rax-120], xmm8
|
||||
movaps XMMWORD PTR [rsp+112], xmm9
|
||||
movaps XMMWORD PTR [rsp+96], xmm10
|
||||
movaps XMMWORD PTR [rsp+80], xmm11
|
||||
movaps XMMWORD PTR [rsp+64], xmm12
|
||||
movaps XMMWORD PTR [rsp+48], xmm13
|
||||
movaps XMMWORD PTR [rsp+32], xmm14
|
||||
movaps XMMWORD PTR [rsp+16], xmm15
|
||||
mov rdx, r10
|
||||
movd xmm4, QWORD PTR [r8+96]
|
||||
and edx, 2097136
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xorps xmm13, xmm13
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
movd xmm5, QWORD PTR [r8+104]
|
||||
movd xmm7, rax
|
||||
|
||||
mov eax, 1
|
||||
shl rax, 52
|
||||
movd xmm14, rax
|
||||
punpcklqdq xmm14, xmm14
|
||||
|
||||
mov eax, 1023
|
||||
shl rax, 52
|
||||
movd xmm12, rax
|
||||
punpcklqdq xmm12, xmm12
|
||||
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
punpcklqdq xmm7, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+56]
|
||||
xor rcx, QWORD PTR [r9+24]
|
||||
movd xmm3, rax
|
||||
mov rax, QWORD PTR [r9+48]
|
||||
xor rax, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movd xmm0, rcx
|
||||
mov QWORD PTR [rsp], r13
|
||||
mov rcx, QWORD PTR [r9+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movd xmm6, rax
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movd xmm0, rcx
|
||||
mov QWORD PTR [rsp+256], r10
|
||||
mov rcx, rdi
|
||||
mov QWORD PTR [rsp+264], r11
|
||||
movd xmm8, rax
|
||||
and ecx, 2097136
|
||||
punpcklqdq xmm8, xmm0
|
||||
movd xmm0, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movd xmm0, QWORD PTR [r9+104]
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
movdqu xmm11, XMMWORD PTR [r8]
|
||||
punpcklqdq xmm5, xmm0
|
||||
lea r9, QWORD PTR [rdx+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r9]
|
||||
|
||||
ALIGN(64)
|
||||
rwz_main_loop_double:
|
||||
movdqu xmm9, xmm15
|
||||
mov eax, edx
|
||||
mov ebx, edx
|
||||
xor eax, 16
|
||||
xor ebx, 32
|
||||
xor edx, 48
|
||||
|
||||
movd xmm0, r11
|
||||
movd xmm2, r10
|
||||
punpcklqdq xmm2, xmm0
|
||||
aesenc xmm9, xmm2
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||
|
||||
movd r11, xmm9
|
||||
mov edx, r11d
|
||||
and edx, 2097136
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9], xmm0
|
||||
|
||||
lea rbx, QWORD PTR [rdx+r13]
|
||||
mov r10, QWORD PTR [rdx+r13]
|
||||
|
||||
movdqu xmm10, xmm11
|
||||
movd xmm0, rbp
|
||||
movd xmm11, rdi
|
||||
punpcklqdq xmm11, xmm0
|
||||
aesenc xmm10, xmm11
|
||||
|
||||
mov eax, ecx
|
||||
mov r12d, ecx
|
||||
xor eax, 16
|
||||
xor r12d, 32
|
||||
xor ecx, 48
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||
paddq xmm0, xmm6
|
||||
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||
paddq xmm1, xmm11
|
||||
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
|
||||
movd rcx, xmm10
|
||||
and ecx, 2097136
|
||||
|
||||
movdqa xmm0, xmm10
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [r8], xmm0
|
||||
mov r12, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov r9, QWORD PTR [rbx+8]
|
||||
|
||||
xor edx, 16
|
||||
mov r8d, edx
|
||||
mov r15d, edx
|
||||
|
||||
movd rdx, xmm5
|
||||
shl rdx, 32
|
||||
movd rax, xmm4
|
||||
xor rdx, rax
|
||||
xor r10, rdx
|
||||
mov rax, r10
|
||||
mul r11
|
||||
mov r11d, r8d
|
||||
xor r11d, 48
|
||||
movd xmm0, rdx
|
||||
xor rdx, [r11+r13]
|
||||
movd xmm1, rax
|
||||
xor rax, [r11+r13+8]
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||
paddq xmm0, xmm3
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [r8+r13], xmm0
|
||||
xor r8d, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||
paddq xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||
|
||||
mov r11, QWORD PTR [rsp+256]
|
||||
add r11, rdx
|
||||
mov rdx, QWORD PTR [rsp+264]
|
||||
add rdx, rax
|
||||
mov QWORD PTR [rbx], r11
|
||||
xor r11, r10
|
||||
mov QWORD PTR [rbx+8], rdx
|
||||
xor rdx, r9
|
||||
mov QWORD PTR [rsp+256], r11
|
||||
and r11d, 2097136
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov QWORD PTR [rsp+8], r11
|
||||
lea r15, QWORD PTR [r11+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||
lea r13, QWORD PTR [rsi+rcx]
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movaps xmm2, xmm13
|
||||
movd r10, xmm0
|
||||
psllq xmm5, 1
|
||||
shl r10, 32
|
||||
movdqa xmm0, xmm9
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm1, xmm10
|
||||
movd r11, xmm0
|
||||
psrldq xmm1, 8
|
||||
movd r8, xmm1
|
||||
psrldq xmm4, 8
|
||||
movaps xmm0, xmm13
|
||||
movd rax, xmm4
|
||||
xor r10, rax
|
||||
movaps xmm1, xmm13
|
||||
xor r10, r12
|
||||
lea rax, QWORD PTR [r11+1]
|
||||
shr rax, 1
|
||||
movdqa xmm3, xmm9
|
||||
punpcklqdq xmm3, xmm10
|
||||
paddq xmm5, xmm3
|
||||
movd rdx, xmm5
|
||||
psrldq xmm5, 8
|
||||
cvtsi2sd xmm2, rax
|
||||
or edx, -2147483647
|
||||
lea rax, QWORD PTR [r8+1]
|
||||
shr rax, 1
|
||||
movd r9, xmm5
|
||||
cvtsi2sd xmm0, rax
|
||||
or r9d, -2147483647
|
||||
cvtsi2sd xmm1, rdx
|
||||
unpcklpd xmm2, xmm0
|
||||
movaps xmm0, xmm13
|
||||
cvtsi2sd xmm0, r9
|
||||
unpcklpd xmm1, xmm0
|
||||
divpd xmm2, xmm1
|
||||
paddq xmm2, xmm14
|
||||
cvttsd2si rax, xmm2
|
||||
psrldq xmm2, 8
|
||||
mov rbx, rax
|
||||
imul rax, rdx
|
||||
sub r11, rax
|
||||
js rwz_div_fix_1
|
||||
rwz_div_fix_1_ret:
|
||||
|
||||
cvttsd2si rdx, xmm2
|
||||
mov rax, rdx
|
||||
imul rax, r9
|
||||
movd xmm2, r11d
|
||||
movd xmm4, ebx
|
||||
sub r8, rax
|
||||
js rwz_div_fix_2
|
||||
rwz_div_fix_2_ret:
|
||||
|
||||
movd xmm1, r8d
|
||||
movd xmm0, edx
|
||||
punpckldq xmm2, xmm1
|
||||
punpckldq xmm4, xmm0
|
||||
punpckldq xmm4, xmm2
|
||||
paddq xmm3, xmm4
|
||||
movdqa xmm0, xmm3
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm12
|
||||
sqrtpd xmm1, xmm0
|
||||
movd r9, xmm1
|
||||
movdqa xmm5, xmm1
|
||||
psrlq xmm5, 19
|
||||
test r9, 524287
|
||||
je rwz_sqrt_fix_1
|
||||
rwz_sqrt_fix_1_ret:
|
||||
|
||||
movd r9, xmm10
|
||||
psrldq xmm1, 8
|
||||
movd r8, xmm1
|
||||
test r8, 524287
|
||||
je rwz_sqrt_fix_2
|
||||
rwz_sqrt_fix_2_ret:
|
||||
|
||||
mov r12d, ecx
|
||||
mov r8d, ecx
|
||||
xor r12d, 16
|
||||
xor r8d, 32
|
||||
xor ecx, 48
|
||||
mov rax, r10
|
||||
mul r9
|
||||
movd xmm0, rax
|
||||
movd xmm3, rdx
|
||||
punpcklqdq xmm3, xmm0
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||
pxor xmm0, xmm3
|
||||
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||
xor rdx, [r8+rsi]
|
||||
xor rax, [r8+rsi+8]
|
||||
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||
paddq xmm3, xmm6
|
||||
paddq xmm1, xmm11
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [r8+rsi], xmm3
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||
|
||||
add rdi, rdx
|
||||
mov QWORD PTR [r13], rdi
|
||||
xor rdi, r10
|
||||
mov ecx, edi
|
||||
and ecx, 2097136
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov rdx, QWORD PTR [r13+8]
|
||||
add rbp, rax
|
||||
mov QWORD PTR [r13+8], rbp
|
||||
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||
xor rbp, rdx
|
||||
mov r13, QWORD PTR [rsp]
|
||||
movdqa xmm3, xmm7
|
||||
mov rdx, QWORD PTR [rsp+8]
|
||||
movdqa xmm8, xmm6
|
||||
mov r10, QWORD PTR [rsp+256]
|
||||
movdqa xmm7, xmm9
|
||||
mov r11, QWORD PTR [rsp+264]
|
||||
movdqa xmm6, xmm10
|
||||
mov r9, r15
|
||||
dec r14d
|
||||
jne rwz_main_loop_double
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+272]
|
||||
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+184]
|
||||
movaps xmm6, XMMWORD PTR [r11-24]
|
||||
movaps xmm7, XMMWORD PTR [r11-40]
|
||||
movaps xmm8, XMMWORD PTR [r11-56]
|
||||
movaps xmm9, XMMWORD PTR [r11-72]
|
||||
movaps xmm10, XMMWORD PTR [r11-88]
|
||||
movaps xmm11, XMMWORD PTR [r11-104]
|
||||
movaps xmm12, XMMWORD PTR [r11-120]
|
||||
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp rwz_cnv2_double_mainloop_asm_endp
|
||||
|
||||
rwz_div_fix_1:
|
||||
dec rbx
|
||||
add r11, rdx
|
||||
jmp rwz_div_fix_1_ret
|
||||
|
||||
rwz_div_fix_2:
|
||||
dec rdx
|
||||
add r8, r9
|
||||
jmp rwz_div_fix_2_ret
|
||||
|
||||
rwz_sqrt_fix_1:
|
||||
movd r8, xmm3
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
dec r9
|
||||
mov r11d, -1022
|
||||
shl r11, 32
|
||||
mov rax, r9
|
||||
shr r9, 19
|
||||
shr rax, 20
|
||||
mov rdx, r9
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+r11+1]
|
||||
add rax, r11
|
||||
imul rdx, rax
|
||||
sub rdx, r8
|
||||
adc r9, 0
|
||||
movd xmm5, r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp rwz_sqrt_fix_1_ret
|
||||
|
||||
rwz_sqrt_fix_2:
|
||||
psrldq xmm3, 8
|
||||
movd r11, xmm3
|
||||
dec r8
|
||||
mov ebx, -1022
|
||||
shl rbx, 32
|
||||
mov rax, r8
|
||||
shr r8, 19
|
||||
shr rax, 20
|
||||
mov rdx, r8
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+rbx+1]
|
||||
add rax, rbx
|
||||
imul rdx, rax
|
||||
sub rdx, r11
|
||||
adc r8, 0
|
||||
movd xmm0, r8
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp rwz_sqrt_fix_2_ret
|
||||
|
||||
rwz_cnv2_double_mainloop_asm_endp:
|
186
src/crypto/asm/win/cnv2_main_loop_rwz_all.inc
Normal file
186
src/crypto/asm/win/cnv2_main_loop_rwz_all.inc
Normal file
|
@ -0,0 +1,186 @@
|
|||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov esi, 393216
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
mov r13d, -2147483647
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movd xmm4, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movd xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movd xmm3, QWORD PTR [r9+104]
|
||||
movaps XMMWORD PTR [rsp+64], xmm6
|
||||
movaps XMMWORD PTR [rsp+48], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
and r10d, 2097136
|
||||
movd xmm5, rax
|
||||
|
||||
xor eax, eax
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movd xmm8, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movd xmm0, rcx
|
||||
punpcklqdq xmm5, xmm0
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
|
||||
ALIGN(64)
|
||||
rwz_main_loop:
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
mov rdi, r15
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
movd xmm0, r11
|
||||
movd xmm7, r8
|
||||
punpcklqdq xmm7, xmm0
|
||||
aesenc xmm6, xmm7
|
||||
movd rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm2, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm1, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
mov r10, r9
|
||||
xor r10d, 32
|
||||
movd rcx, xmm3
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
xor rdi, rax
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
xor rdi, QWORD PTR [r9+rbx]
|
||||
lea r14, QWORD PTR [r9+rbx]
|
||||
mov r12, QWORD PTR [r14+8]
|
||||
xor edx, edx
|
||||
lea r9d, DWORD PTR [ecx+ecx]
|
||||
add r9d, ebp
|
||||
movdqa xmm0, xmm6
|
||||
psrldq xmm0, 8
|
||||
or r9d, r13d
|
||||
movd rax, xmm0
|
||||
div r9
|
||||
xorps xmm3, xmm3
|
||||
mov eax, eax
|
||||
shl rdx, 32
|
||||
add rdx, rax
|
||||
lea r9, QWORD PTR [rdx+rbp]
|
||||
mov r15, rdx
|
||||
mov rax, r9
|
||||
shr rax, 12
|
||||
movd xmm0, rax
|
||||
paddq xmm0, xmm8
|
||||
sqrtsd xmm3, xmm0
|
||||
psubq xmm3, XMMWORD PTR [rsp+16]
|
||||
movd rdx, xmm3
|
||||
test edx, 524287
|
||||
je rwz_sqrt_fixup
|
||||
psrlq xmm3, 19
|
||||
rwz_sqrt_fixup_ret:
|
||||
|
||||
mov ecx, r10d
|
||||
mov rax, rdi
|
||||
mul rbp
|
||||
movd xmm2, rdx
|
||||
xor rdx, [rcx+rbx]
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
movd xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
punpcklqdq xmm2, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
xor r9d, 48
|
||||
xor r10d, 16
|
||||
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm0, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||
paddq xmm2, xmm5
|
||||
paddq xmm1, xmm7
|
||||
movdqa xmm5, xmm4
|
||||
movdqu XMMWORD PTR [r9+rbx], xmm2
|
||||
movdqa xmm4, xmm6
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
movdqu xmm6, [rdi+rbx]
|
||||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne rwz_main_loop
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
mov rbx, QWORD PTR [rsp+160]
|
||||
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
add rsp, 80
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
jmp cnv2_rwz_main_loop_endp
|
||||
|
||||
rwz_sqrt_fixup:
|
||||
dec rdx
|
||||
mov r13d, -1022
|
||||
shl r13, 32
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
add rax, r13
|
||||
not r13
|
||||
sub rcx, r13
|
||||
mov r13d, -2147483647
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movd xmm3, rdx
|
||||
jmp rwz_sqrt_fixup_ret
|
||||
|
||||
cnv2_rwz_main_loop_endp:
|
447
src/crypto/variant4_random_math.h
Normal file
447
src/crypto/variant4_random_math.h
Normal file
|
@ -0,0 +1,447 @@
|
|||
#ifndef VARIANT4_RANDOM_MATH_H
|
||||
#define VARIANT4_RANDOM_MATH_H
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include "c_blake256.h"
|
||||
}
|
||||
|
||||
enum V4_Settings
|
||||
{
|
||||
// Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications
|
||||
TOTAL_LATENCY = 15 * 3,
|
||||
|
||||
// Always generate at least 60 instructions
|
||||
NUM_INSTRUCTIONS_MIN = 60,
|
||||
|
||||
// Never generate more than 70 instructions (final RET instruction doesn't count here)
|
||||
NUM_INSTRUCTIONS_MAX = 70,
|
||||
|
||||
// Available ALUs for MUL
|
||||
// Modern CPUs typically have only 1 ALU which can do multiplications
|
||||
ALU_COUNT_MUL = 1,
|
||||
|
||||
// Total available ALUs
|
||||
// Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code
|
||||
ALU_COUNT = 3,
|
||||
};
|
||||
|
||||
enum V4_InstructionList
|
||||
{
|
||||
MUL, // a*b
|
||||
ADD, // a+b + C, C is an unsigned 32-bit constant
|
||||
SUB, // a-b
|
||||
ROR, // rotate right "a" by "b & 31" bits
|
||||
ROL, // rotate left "a" by "b & 31" bits
|
||||
XOR, // a^b
|
||||
RET, // finish execution
|
||||
V4_INSTRUCTION_COUNT = RET,
|
||||
};
|
||||
|
||||
// V4_InstructionDefinition is used to generate code from random data
|
||||
// Every random sequence of bytes is a valid code
|
||||
//
|
||||
// There are 9 registers in total:
|
||||
// - 4 variable registers
|
||||
// - 5 constant registers initialized from loop variables
|
||||
// This is why dst_index is 2 bits
|
||||
enum V4_InstructionDefinition
|
||||
{
|
||||
V4_OPCODE_BITS = 3,
|
||||
V4_DST_INDEX_BITS = 2,
|
||||
V4_SRC_INDEX_BITS = 3,
|
||||
};
|
||||
|
||||
struct V4_Instruction
|
||||
{
|
||||
uint8_t opcode;
|
||||
uint8_t dst_index;
|
||||
uint8_t src_index;
|
||||
uint32_t C;
|
||||
};
|
||||
|
||||
#ifndef FORCEINLINE
|
||||
#ifdef __GNUC__
|
||||
#define FORCEINLINE __attribute__((always_inline)) inline
|
||||
#elif _MSC_VER
|
||||
#define FORCEINLINE __forceinline
|
||||
#else
|
||||
#define FORCEINLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef UNREACHABLE_CODE
|
||||
#ifdef __GNUC__
|
||||
#define UNREACHABLE_CODE __builtin_unreachable()
|
||||
#elif _MSC_VER
|
||||
#define UNREACHABLE_CODE __assume(false)
|
||||
#else
|
||||
#define UNREACHABLE_CODE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU:
|
||||
// every switch-case will point to the same destination on every iteration of Cryptonight main loop
|
||||
//
|
||||
// This is about as fast as it can get without using low-level machine code generation
|
||||
template<typename v4_reg>
|
||||
static void v4_random_math(const struct V4_Instruction* code, v4_reg* r)
|
||||
{
|
||||
enum
|
||||
{
|
||||
REG_BITS = sizeof(v4_reg) * 8,
|
||||
};
|
||||
|
||||
#define V4_EXEC(i) \
|
||||
{ \
|
||||
const struct V4_Instruction* op = code + i; \
|
||||
const v4_reg src = r[op->src_index]; \
|
||||
v4_reg* dst = r + op->dst_index; \
|
||||
switch (op->opcode) \
|
||||
{ \
|
||||
case MUL: \
|
||||
*dst *= src; \
|
||||
break; \
|
||||
case ADD: \
|
||||
*dst += src + op->C; \
|
||||
break; \
|
||||
case SUB: \
|
||||
*dst -= src; \
|
||||
break; \
|
||||
case ROR: \
|
||||
{ \
|
||||
const uint32_t shift = src % REG_BITS; \
|
||||
*dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \
|
||||
} \
|
||||
break; \
|
||||
case ROL: \
|
||||
{ \
|
||||
const uint32_t shift = src % REG_BITS; \
|
||||
*dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \
|
||||
} \
|
||||
break; \
|
||||
case XOR: \
|
||||
*dst ^= src; \
|
||||
break; \
|
||||
case RET: \
|
||||
return; \
|
||||
default: \
|
||||
UNREACHABLE_CODE; \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define V4_EXEC_10(j) \
|
||||
V4_EXEC(j + 0) \
|
||||
V4_EXEC(j + 1) \
|
||||
V4_EXEC(j + 2) \
|
||||
V4_EXEC(j + 3) \
|
||||
V4_EXEC(j + 4) \
|
||||
V4_EXEC(j + 5) \
|
||||
V4_EXEC(j + 6) \
|
||||
V4_EXEC(j + 7) \
|
||||
V4_EXEC(j + 8) \
|
||||
V4_EXEC(j + 9)
|
||||
|
||||
// Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency
|
||||
// I've checked all block heights < 10,000,000 and here is the distribution of program sizes:
|
||||
//
|
||||
// 60 27960
|
||||
// 61 105054
|
||||
// 62 2452759
|
||||
// 63 5115997
|
||||
// 64 1022269
|
||||
// 65 1109635
|
||||
// 66 153145
|
||||
// 67 8550
|
||||
// 68 4529
|
||||
// 69 102
|
||||
|
||||
// Unroll 70 instructions here
|
||||
V4_EXEC_10(0); // instructions 0-9
|
||||
V4_EXEC_10(10); // instructions 10-19
|
||||
V4_EXEC_10(20); // instructions 20-29
|
||||
V4_EXEC_10(30); // instructions 30-39
|
||||
V4_EXEC_10(40); // instructions 40-49
|
||||
V4_EXEC_10(50); // instructions 50-59
|
||||
V4_EXEC_10(60); // instructions 60-69
|
||||
|
||||
#undef V4_EXEC_10
|
||||
#undef V4_EXEC
|
||||
}
|
||||
|
||||
// If we don't have enough data available, generate more
|
||||
static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size)
|
||||
{
|
||||
if (*data_index + bytes_needed > data_size)
|
||||
{
|
||||
hash_extra_blake(data, data_size, (char*) data);
|
||||
*data_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Generates as many random math operations as possible with given latency and ALU restrictions
|
||||
// "code" array must have space for NUM_INSTRUCTIONS_MAX+1 instructions
|
||||
static int v4_random_math_init(struct V4_Instruction* code, PowVariant variant, const uint64_t height)
|
||||
{
|
||||
// MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle
|
||||
// These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake
|
||||
//
|
||||
// AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors
|
||||
// Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors
|
||||
// AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same
|
||||
// Source: https://www.agner.org/optimize/instruction_tables.pdf
|
||||
const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 };
|
||||
|
||||
// Instruction latencies for theoretical ASIC implementation
|
||||
const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 };
|
||||
|
||||
// Available ALUs for each instruction
|
||||
const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
|
||||
|
||||
int8_t data[32];
|
||||
memset(data, 0, sizeof(data));
|
||||
uint64_t tmp = SWAP64LE(height);
|
||||
memcpy(data, &tmp, sizeof(uint64_t));
|
||||
if (variant == POW_V4)
|
||||
{
|
||||
data[20] = -38;
|
||||
}
|
||||
|
||||
// Set data_index past the last byte in data
|
||||
// to trigger full data update with blake hash
|
||||
// before we start using it
|
||||
size_t data_index = sizeof(data);
|
||||
|
||||
int code_size;
|
||||
|
||||
// There is a small chance (1.8%) that register R8 won't be used in the generated program
|
||||
// So we keep track of it and try again if it's not used
|
||||
bool r8_used;
|
||||
do {
|
||||
int latency[9];
|
||||
int asic_latency[9];
|
||||
|
||||
// Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution
|
||||
// byte 0: current value of the destination register
|
||||
// byte 1: instruction opcode
|
||||
// byte 2: current value of the source register
|
||||
//
|
||||
// Registers R4-R8 are constant and are treated as having the same value because when we do
|
||||
// the same operation twice with two constant source registers, it can be optimized into a single operation
|
||||
uint32_t inst_data[9] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF };
|
||||
|
||||
bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT];
|
||||
bool is_rotation[V4_INSTRUCTION_COUNT];
|
||||
bool rotated[4];
|
||||
int rotate_count = 0;
|
||||
|
||||
memset(latency, 0, sizeof(latency));
|
||||
memset(asic_latency, 0, sizeof(asic_latency));
|
||||
memset(alu_busy, 0, sizeof(alu_busy));
|
||||
memset(is_rotation, 0, sizeof(is_rotation));
|
||||
memset(rotated, 0, sizeof(rotated));
|
||||
is_rotation[ROR] = true;
|
||||
is_rotation[ROL] = true;
|
||||
|
||||
int num_retries = 0;
|
||||
code_size = 0;
|
||||
|
||||
int total_iterations = 0;
|
||||
r8_used = (variant == POW_WOW);
|
||||
|
||||
// Generate random code to achieve minimal required latency for our abstract CPU
|
||||
// Try to get this latency for all 4 registers
|
||||
while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64))
|
||||
{
|
||||
// Fail-safe to guarantee loop termination
|
||||
++total_iterations;
|
||||
if (total_iterations > 256)
|
||||
break;
|
||||
|
||||
check_data(&data_index, 1, data, sizeof(data));
|
||||
|
||||
const uint8_t c = ((uint8_t*)data)[data_index++];
|
||||
|
||||
// MUL = opcodes 0-2
|
||||
// ADD = opcode 3
|
||||
// SUB = opcode 4
|
||||
// ROR/ROL = opcode 5, shift direction is selected randomly
|
||||
// XOR = opcodes 6-7
|
||||
uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1);
|
||||
if (opcode == 5)
|
||||
{
|
||||
check_data(&data_index, 1, data, sizeof(data));
|
||||
opcode = (data[data_index++] >= 0) ? ROR : ROL;
|
||||
}
|
||||
else if (opcode >= 6)
|
||||
{
|
||||
opcode = XOR;
|
||||
}
|
||||
else
|
||||
{
|
||||
opcode = (opcode <= 2) ? MUL : (opcode - 2);
|
||||
}
|
||||
|
||||
uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1);
|
||||
uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1);
|
||||
|
||||
const int a = dst_index;
|
||||
int b = src_index;
|
||||
|
||||
// Don't do ADD/SUB/XOR with the same register
|
||||
if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b))
|
||||
{
|
||||
// a is always < 4, so we don't need to check bounds here
|
||||
b = (variant == POW_WOW) ? (a + 4) : 8;
|
||||
src_index = b;
|
||||
}
|
||||
|
||||
// Don't do rotation with the same destination twice because it's equal to a single rotation
|
||||
if (is_rotation[opcode] && rotated[a])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized:
|
||||
// 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations
|
||||
// 2xXOR(a, b) = NOP
|
||||
if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find which ALU is available (and when) for this instruction
|
||||
int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b];
|
||||
int alu_index = -1;
|
||||
while (next_latency < TOTAL_LATENCY)
|
||||
{
|
||||
for (int i = op_ALUs[opcode] - 1; i >= 0; --i)
|
||||
{
|
||||
if (!alu_busy[next_latency][i])
|
||||
{
|
||||
// ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check
|
||||
if ((opcode == ADD) && alu_busy[next_latency + 1][i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rotation can only start when previous rotation is finished, so do an additional availability check
|
||||
if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode]))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
alu_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (alu_index >= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
++next_latency;
|
||||
}
|
||||
|
||||
// Don't generate instructions that leave some register unchanged for more than 7 cycles
|
||||
if (next_latency > latency[a] + 7)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
next_latency += op_latency[opcode];
|
||||
|
||||
if (next_latency <= TOTAL_LATENCY)
|
||||
{
|
||||
if (is_rotation[opcode])
|
||||
{
|
||||
++rotate_count;
|
||||
}
|
||||
|
||||
// Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined
|
||||
alu_busy[next_latency - op_latency[opcode]][alu_index] = true;
|
||||
latency[a] = next_latency;
|
||||
|
||||
// ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple
|
||||
asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode];
|
||||
|
||||
rotated[a] = is_rotation[opcode];
|
||||
|
||||
inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16);
|
||||
|
||||
code[code_size].opcode = opcode;
|
||||
code[code_size].dst_index = dst_index;
|
||||
code[code_size].src_index = src_index;
|
||||
code[code_size].C = 0;
|
||||
|
||||
if (src_index == 8)
|
||||
{
|
||||
r8_used = true;
|
||||
}
|
||||
|
||||
if (opcode == ADD)
|
||||
{
|
||||
// ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too
|
||||
alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true;
|
||||
|
||||
// ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
|
||||
check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
|
||||
uint32_t t;
|
||||
memcpy(&t, data + data_index, sizeof(uint32_t));
|
||||
code[code_size].C = SWAP32LE(t);
|
||||
data_index += sizeof(uint32_t);
|
||||
}
|
||||
|
||||
++code_size;
|
||||
if (code_size >= NUM_INSTRUCTIONS_MIN)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
++num_retries;
|
||||
}
|
||||
}
|
||||
|
||||
// ASIC has more execution resources and can extract as much parallelism from the code as possible
|
||||
// We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC
|
||||
// Get this latency for at least 1 of the 4 registers
|
||||
const int prev_code_size = code_size;
|
||||
while ((code_size < NUM_INSTRUCTIONS_MAX) && (asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY))
|
||||
{
|
||||
int min_idx = 0;
|
||||
int max_idx = 0;
|
||||
for (int i = 1; i < 4; ++i)
|
||||
{
|
||||
if (asic_latency[i] < asic_latency[min_idx]) min_idx = i;
|
||||
if (asic_latency[i] > asic_latency[max_idx]) max_idx = i;
|
||||
}
|
||||
|
||||
const uint8_t pattern[3] = { ROR, MUL, MUL };
|
||||
const uint8_t opcode = pattern[(code_size - prev_code_size) % 3];
|
||||
latency[min_idx] = latency[max_idx] + op_latency[opcode];
|
||||
asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode];
|
||||
|
||||
code[code_size].opcode = opcode;
|
||||
code[code_size].dst_index = min_idx;
|
||||
code[code_size].src_index = max_idx;
|
||||
code[code_size].C = 0;
|
||||
++code_size;
|
||||
}
|
||||
|
||||
// There is ~98.15% chance that loop condition is false, so this loop will execute only 1 iteration most of the time
|
||||
// It never does more than 4 iterations for all block heights < 10,000,000
|
||||
} while (!r8_used || (code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX));
|
||||
|
||||
// It's guaranteed that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here
|
||||
// Add final instruction to stop the interpreter
|
||||
code[code_size].opcode = RET;
|
||||
code[code_size].dst_index = 0;
|
||||
code[code_size].src_index = 0;
|
||||
code[code_size].C = 0;
|
||||
|
||||
return code_size;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -260,6 +260,14 @@ bool Client::parseJob(const rapidjson::Value ¶ms, int *code)
|
|||
|
||||
job.setPowVariant(powVariant);
|
||||
|
||||
if (params.HasMember("height")) {
|
||||
const rapidjson::Value &variant = params["height"];
|
||||
|
||||
if (variant.IsUint64()) {
|
||||
job.setHeight(variant.GetUint64());
|
||||
}
|
||||
}
|
||||
|
||||
if (m_job != job) {
|
||||
m_jobs++;
|
||||
m_job = std::move(job);
|
||||
|
|
|
@ -62,6 +62,7 @@ Job::Job(int poolId, bool nicehash) :
|
|||
m_size(0),
|
||||
m_diff(0),
|
||||
m_target(0),
|
||||
m_height(0),
|
||||
m_powVariant(PowVariant::POW_AUTODETECT)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -54,9 +54,11 @@ public:
|
|||
inline uint32_t *nonce() { return reinterpret_cast<uint32_t*>(m_blob + 39); }
|
||||
inline uint32_t diff() const { return (uint32_t) m_diff; }
|
||||
inline uint64_t target() const { return m_target; }
|
||||
inline uint64_t height() const { return m_height; }
|
||||
inline void setNicehash(bool nicehash) { m_nicehash = nicehash; }
|
||||
inline void setThreadId(int threadId) { m_threadId = threadId; }
|
||||
inline void setPowVariant(PowVariant powVariant) { m_powVariant = powVariant; }
|
||||
inline void setHeight(uint64_t height) { m_height = height; }
|
||||
|
||||
static bool fromHex(const char* in, unsigned int len, unsigned char* out);
|
||||
static inline uint32_t *nonce(uint8_t *blob) { return reinterpret_cast<uint32_t*>(blob + 39); }
|
||||
|
@ -76,6 +78,7 @@ private:
|
|||
size_t m_size;
|
||||
uint64_t m_diff;
|
||||
uint64_t m_target;
|
||||
uint64_t m_height;
|
||||
PowVariant m_powVariant;
|
||||
};
|
||||
|
||||
|
|
|
@ -170,11 +170,11 @@ void Network::onResultAccepted(Client *client, const SubmitResult &result, const
|
|||
void Network::setJob(Client *client, const Job &job)
|
||||
{
|
||||
if (m_options->colors()) {
|
||||
LOG_INFO("\x1B[01;35mnew job\x1B[0m from \x1B[01;37m%s:%d\x1B[0m with diff \x1B[01;37m%d\x1B[0m and PoW \x1B[01;37m%s",
|
||||
LOG_INFO("\x1B[01;35mnew job\x1B[0m from \x1B[01;37m%s:%d\x1B[0m with diff \x1B[01;37m%d\x1B[0m variant \x1B[01;37m%s",
|
||||
client->host(), client->port(), job.diff(), getPowVariantName(job.powVariant()).c_str());
|
||||
}
|
||||
else {
|
||||
LOG_INFO("new job from %s:%d with diff %d and PoW %s", client->host(), client->port(), job.diff(), getPowVariantName(job.powVariant()).c_str());
|
||||
LOG_INFO("new job from %s:%d with diff %d variant %s", client->host(), client->port(), job.diff(), getPowVariantName(job.powVariant()).c_str());
|
||||
}
|
||||
|
||||
m_state.powVariant = job.powVariant();
|
||||
|
|
|
@ -36,14 +36,14 @@
|
|||
#define APP_DESC "XMRigCC CPU miner"
|
||||
#define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id"
|
||||
#endif
|
||||
#define APP_VERSION "1.8.13 (based on XMRig)"
|
||||
#define APP_VERSION "1.9.0 (based on XMRig)"
|
||||
#define APP_DOMAIN ""
|
||||
#define APP_SITE "https://github.com/Bendr0id/xmrigCC"
|
||||
#define APP_KIND "cpu"
|
||||
|
||||
#define APP_VER_MAJOR 1
|
||||
#define APP_VER_MINOR 8
|
||||
#define APP_VER_BUILD 13
|
||||
#define APP_VER_MINOR 9
|
||||
#define APP_VER_BUILD 0
|
||||
#define APP_VER_REV 0
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
|
|
@ -141,7 +141,7 @@ void MultiWorker::start()
|
|||
*Job::nonce(m_state->blob + i * m_state->job.size()) = ++m_state->nonces[i];
|
||||
}
|
||||
|
||||
CryptoNight::hash(m_hashFactor, Options::i()->asmOptimization(), m_state->job.powVariant(), m_state->blob, m_state->job.size(), m_hash, scratchPads);
|
||||
CryptoNight::hash(m_hashFactor, Options::i()->asmOptimization(), m_state->job.height(), m_state->job.powVariant(), m_state->blob, m_state->job.size(), m_hash, scratchPads);
|
||||
|
||||
for (size_t i=0; i < m_hashFactor; ++i) {
|
||||
if (*reinterpret_cast<uint64_t *>(m_hash + 24 + i * 32) < m_state->job.target()) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue