diff --git a/CHANGELOG.md b/CHANGELOG.md
index 18acfca9..7f0966ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+# 1.9.0
+- Integrated Monero CN-R variant so called CNv4, aka CN-R, aka CNv5, aka Cryptonight-R #233 (algo: "cryptonight", variant: "r")
+- Integrated Wownero CN-R variant #233 (algo: "cryptonight", variant: "wow")
+- Integrated Graft variant (algo: "cryptonight", variant: "rwz" OR variant: "graft")
+- Integrated X-Cash variant #234 (algo: "cryptonight", variant: "double" OR variant: "heavyx" OR variant: "xcash")
+- Integrated Zelerius variant (algo: "cryptonight", variant: "zls" OR variant: "zelerius")
+- Add miner version column to the Dashboard (version turns red when its outdated)
+- Fixed crash when remote logging is disabled
# 1.8.13
- Integrated HOSP variant (algo: "cryptonight", variant: "hosp")
- Added ASM code/optimization for HOSP and RTO on Intel CPUs
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e96f979b..757b26b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,6 +54,7 @@ set(SOURCES_CRYPTO
src/crypto/c_jh.c
src/crypto/c_skein.c
src/crypto/CryptoNight.cpp
+ src/crypto/CryptoNightR_gen.cpp
)
set(SOURCES_COMMON
@@ -131,7 +132,7 @@ if (WIN32)
add_definitions(-DBOOST_ALL_NO_LIB)
endif(WIN32)
-find_package(Boost 1.63.0 COMPONENTS system REQUIRED)
+find_package(Boost 1.62.0 COMPONENTS system REQUIRED)
include(cmake/flags.cmake)
diff --git a/cmake/asm.cmake b/cmake/asm.cmake
index abd4030c..b5067939 100644
--- a/cmake/asm.cmake
+++ b/cmake/asm.cmake
@@ -56,6 +56,40 @@ configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/
configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_fastv2_sandybridge.inc")
configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_fastv2_soft_aes_sandybridge.inc")
+# CN XCASH
+set(ALGO "xcash")
+set(ITERATIONS "1048576") #0x100000
+set(MASK "2097136") #0x1FFFF0
+
+configure_file("src/crypto/asm/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_ivybridge.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_bulldozer.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_ryzen.inc")
+configure_file("src/crypto/asm/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/cnv2_double_main_loop_xcash_sandybridge.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/cnv2_main_loop_xcash_soft_aes_sandybridge.inc")
+
+configure_file("src/crypto/asm/win/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_ivybridge.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_bulldozer.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_ryzen.inc")
+configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_xcash_sandybridge.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_xcash_soft_aes_sandybridge.inc")
+
+# CN ZELERIUS
+set(ALGO "zelerius")
+set(ITERATIONS "393216") #0x60000
+set(MASK "2097136") #0x1FFFF0
+
+configure_file("src/crypto/asm/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_ivybridge.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_bulldozer.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_ryzen.inc")
+configure_file("src/crypto/asm/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/cnv2_double_main_loop_zelerius_sandybridge.inc")
+configure_file("src/crypto/asm/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/cnv2_main_loop_zelerius_soft_aes_sandybridge.inc")
+
+configure_file("src/crypto/asm/win/cnv2_main_loop_ivybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_ivybridge.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_bulldozer.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_bulldozer.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_ryzen.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_ryzen.inc")
+configure_file("src/crypto/asm/win/cnv2_double_main_loop_sandybridge.inc.in" "src/crypto/asm/win/cnv2_double_main_loop_zelerius_sandybridge.inc")
+configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "src/crypto/asm/win/cnv2_main_loop_zelerius_soft_aes_sandybridge.inc")
+
# CN LITE
set(ALGO "lite")
@@ -99,16 +133,19 @@ configure_file("src/crypto/asm/win/cnv2_main_loop_soft_aes_sandybridge.inc.in" "
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
enable_language(ASM_MASM)
- set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop.asm")
+ set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop.asm"
+ "src/crypto/asm/win/CryptonightR_template.asm")
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
include_directories(${CMAKE_BINARY_DIR}/src/crypto/asm/win)
else()
enable_language(ASM)
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
- set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop_win_gcc.S")
+ set(XMRIG_ASM_FILE "src/crypto/asm/win/cn_main_loop_win_gcc.S"
+ "src/crypto/asm/win/CryptonightR_template.S")
else()
- set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop.S")
+ set(XMRIG_ASM_FILE "src/crypto/asm/cn_main_loop.S"
+ "src/crypto/asm/CryptonightR_template.S")
endif()
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
diff --git a/index.html b/index.html
index 9a8d953c..ec011264 100644
--- a/index.html
+++ b/index.html
@@ -65,6 +65,9 @@
var currentServerTime = 0;
var clockDrift = 0;
+ var latestVersion = 0;
+ var currentVersion = 0;
+
$.fn.dataTable.ext.search.push(
function( settings, data, dataIndex ) {
@@ -108,6 +111,7 @@
orderable: false
},
{data: "client_status.client_id", render: clientInfo},
+ {data: "client_status.version", render: version},
{data: "client_status.current_pool"},
{data: "client_status.current_status", render: clientStatus},
{data: "client_status.current_algo_name", render: algoAndPowVariantName},
@@ -674,6 +678,16 @@
}
}
+ function version( data, type, row ) {
+ var clientVersion = parseInt(row.client_status.version.split('.').join(""));
+
+ if (latestVersion > clientVersion) {
+ return '' + data + '
';
+ } else {
+ return data;
+ }
+ }
+
function clientStatus( data, type, row ) {
var lastStatus = row.client_status.last_status_update * 1000;
@@ -822,6 +836,7 @@
|
Miner Id |
+ Version |
Pool |
Status |
Algo / PoW |
@@ -861,6 +876,7 @@
|
|
|
+ |
diff --git a/src/App.cpp b/src/App.cpp
index b240431e..45c30757 100644
--- a/src/App.cpp
+++ b/src/App.cpp
@@ -155,10 +155,14 @@ int App::start()
return EINVAL;
} else {
if (Options::i()->colors()) {
- LOG_INFO(WHITE_BOLD("%s hash self-test... ") GREEN_BOLD("successful") ".", m_options->algoName());
+ LOG_INFO(WHITE_BOLD("%s hash self-test... %s."),
+ m_options->algoName(),
+ Options::i()->skipSelfCheck() ? YELLOW_BOLD("skipped") : GREEN_BOLD("successful"));
}
else {
- LOG_INFO("%s hash self-test... successful.", m_options->algoName());
+ LOG_INFO("%s hash self-test... %s.",
+ m_options->algoName(),
+ Options::i()->skipSelfCheck() ? "skipped" : "successful");
}
}
diff --git a/src/Cpu_arm.cpp b/src/Cpu_arm.cpp
index db6ffa30..7be95170 100644
--- a/src/Cpu_arm.cpp
+++ b/src/Cpu_arm.cpp
@@ -30,7 +30,11 @@
void CpuImpl::initCommon()
{
- memcpy(m_brand, "Unknown", 7);
+# ifdef XMRIG_ARMv8
+ memcpy(m_brand, "ARMv8", 5);
+# else
+ memcpy(m_brand, "ARMv7", 5);
+# endif
# if defined(XMRIG_ARMv8)
m_flags |= Cpu::X86_64;
diff --git a/src/Mem.cpp b/src/Mem.cpp
index a9a233b4..cd82339c 100644
--- a/src/Mem.cpp
+++ b/src/Mem.cpp
@@ -67,9 +67,17 @@ ScratchPadMem Mem::create(ScratchPad** scratchPads, int threadId)
allocate(scratchPadMem, m_useHugePages);
for (size_t i = 0; i < getThreadHashFactor(threadId); ++i) {
- ScratchPad* scratchPad = static_cast(_mm_malloc(sizeof(ScratchPad), 4096));
+ auto* scratchPad = static_cast(_mm_malloc(sizeof(ScratchPad), 4096));
scratchPad->memory = scratchPadMem.memory + (i * scratchPadSize);
+ auto* p = reinterpret_cast(allocateExecutableMemory(0x4000));
+ scratchPad->generated_code = reinterpret_cast(p);
+ scratchPad->generated_code_double = reinterpret_cast(p + 0x2000);
+
+ scratchPad->generated_code_data.variant = PowVariant::LAST_ITEM;
+ scratchPad->generated_code_data.height = (uint64_t)(-1);
+ scratchPad->generated_code_double_data = scratchPad->generated_code_data;
+
scratchPads[i] = scratchPad;
}
diff --git a/src/Mem.h b/src/Mem.h
index 790bdd7e..94f74b22 100644
--- a/src/Mem.h
+++ b/src/Mem.h
@@ -75,6 +75,9 @@ public:
static ScratchPadMem create(ScratchPad** scratchPads, int threadId);
static void release(ScratchPad** scratchPads, ScratchPadMem& scratchPadMem, int threadId);
+ static void *allocateExecutableMemory(size_t size);
+ static void flushInstructionCache(void *p, size_t size);
+
static inline size_t hashFactor() { return m_hashFactor; }
static inline size_t getThreadHashFactor(int threadId)
{
diff --git a/src/Mem_unix.cpp b/src/Mem_unix.cpp
index 8acac2fa..53309406 100644
--- a/src/Mem_unix.cpp
+++ b/src/Mem_unix.cpp
@@ -86,3 +86,19 @@ void Mem::release(ScratchPadMem &scratchPadMem)
_mm_free(scratchPadMem.memory);
}
}
+
+void *Mem::allocateExecutableMemory(size_t size)
+{
+# if defined(__APPLE__)
+ return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
+# else
+ return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+# endif
+}
+
+void Mem::flushInstructionCache(void *p, size_t size)
+{
+# ifndef __FreeBSD__
+ __builtin___clear_cache(reinterpret_cast(p), reinterpret_cast(p) + size);
+# endif
+}
diff --git a/src/Mem_win.cpp b/src/Mem_win.cpp
index 1a8e582d..94ad8e06 100644
--- a/src/Mem_win.cpp
+++ b/src/Mem_win.cpp
@@ -182,4 +182,14 @@ void Mem::release(ScratchPadMem &scratchPadMem)
else {
_mm_free(scratchPadMem.memory);
}
+}
+
+void *Mem::allocateExecutableMemory(size_t size)
+{
+ return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
+}
+
+void Mem::flushInstructionCache(void *p, size_t size)
+{
+ ::FlushInstructionCache(GetCurrentProcess(), p, size);
}
\ No newline at end of file
diff --git a/src/Options.cpp b/src/Options.cpp
index 87405c4a..3619b293 100644
--- a/src/Options.cpp
+++ b/src/Options.cpp
@@ -73,7 +73,7 @@ Options:\n"
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
-R, --retry-pause=N time to pause between retries (default: 5)\n\
- --pow-variant=V specificy the PoW variat to use: -> 'auto' (default), '0' (v0), '1' (v1, aka cnv7), '2' (v2, aka cnv8), 'ipbc' (tube), 'xao', 'xtl' (including autodetect for > v5), 'rto', 'xfh', 'upx', 'turtle', 'hosp'\n\
+ --pow-variant=V specificy the PoW variat to use: \n'auto' (default), '0', '1', '2', 'ipbc', 'xao', 'xtl', 'rto', 'xfh', 'upx', 'turtle', 'hosp', 'r', 'wow', 'double (xcash)', 'zls' (zelerius), 'rwz' (graft)\n\
for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations\n\
--asm-optimization=V specificy the ASM optimization to use: -> 'auto' (default), 'intel', 'ryzen', 'bulldozer', 'off' \n\
--multihash-factor=N number of hash blocks to process at a time (don't set or 0 enables automatic selection of optimal number of hash blocks)\n\
@@ -92,7 +92,8 @@ Options:\n"
--api-access-token=T access token for API\n\
--api-worker-id=ID custom worker-id for API\n\
--reboot-cmd command/bat to execute to Reboot miner\n\
- --force-pow-variant disable pow/variant parsing from pool\n"
+ --force-pow-variant skip pow/variant parsing from pool\n\
+ --skip-self-check skip self check on startup\n"
# ifndef XMRIG_NO_CC
"\
--cc-url=URL url of the CC Server\n\
@@ -179,6 +180,7 @@ static struct option const options[] = {
{ "force-pow-variant", 0, nullptr, 1016 },
{ "pow-variant", 1, nullptr, 1017 },
{ "variant", 1, nullptr, 1017 },
+ { "skip-self-check", 0, nullptr, 1018 },
{ "api-port", 1, nullptr, 4000 },
{ "api-access-token", 1, nullptr, 4001 },
{ "api-worker-id", 1, nullptr, 4002 },
@@ -237,6 +239,7 @@ static struct option const config_options[] = {
{ "force-pow-variant", 0, nullptr, 1016 },
{ "pow-variant", 1, nullptr, 1017 },
{ "variant", 1, nullptr, 1017 },
+ { "skip-self-check", 0, nullptr, 1018 },
{ "doublehash-thread-mask", 1, nullptr, 4013 },
{ "multihash-thread-mask", 1, nullptr, 4013 },
{ "asm-optimization", 1, nullptr, 4020 },
@@ -331,7 +334,10 @@ constexpr static const char *pow_variant_names[] = {
"fast2",
"upx",
"turtle",
- "hosp"
+ "hosp",
+ "wow",
+ "r",
+ "xcash"
};
constexpr static const char *asm_optimization_names[] = {
@@ -380,6 +386,7 @@ Options::Options(int argc, char **argv) :
m_ccPushPeriodicStatus(false),
m_ccPushZeroHashrateMiners(false),
m_forcePowVariant(false),
+ m_skipSelfCheck(false),
m_fileName(Platform::defaultConfigName()),
m_apiToken(nullptr),
m_apiWorkerId(nullptr),
@@ -643,11 +650,14 @@ bool Options::parseArg(int key, const char *arg)
return parseBoolean(key, true);
case 1016: /* --force-pow-variant */
- return parseBoolean(key, false);
+ return parseBoolean(key, true);
case 1017: /* --pow-variant/--variant */
return parsePowVariant(arg);
+ case 1018: /* --skip-self-check */
+ return parseBoolean(key, true);
+
case 4016: /* --cc-use-tls */
return parseBoolean(key, true);
@@ -912,6 +922,10 @@ bool Options::parseBoolean(int key, bool enable)
m_forcePowVariant = enable;
break;
+ case 1018: /* --skip-self-check */
+ m_skipSelfCheck = enable;
+ break;
+
case 2000: /* --colors */
m_colors = enable;
break;
@@ -1206,6 +1220,31 @@ bool Options::parsePowVariant(const char *powVariant)
break;
}
+ if (i == ARRAY_SIZE(pow_variant_names) - 1 && !strcmp(powVariant, "wow")) {
+ m_powVariant = POW_WOW;
+ break;
+ }
+
+ if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "4") || !strcmp(powVariant, "r") || !strcmp(powVariant, "cnv4") || !strcmp(powVariant, "cnv5"))) {
+ m_powVariant = POW_V4;
+ break;
+ }
+
+ if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "xcash") || !strcmp(powVariant, "heavyx") || !strcmp(powVariant, "double"))) {
+ m_powVariant = POW_DOUBLE;
+ break;
+ }
+
+ if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "zelerius") || !strcmp(powVariant, "zls") || !strcmp(powVariant, "zlx"))) {
+ m_powVariant = POW_ZELERIUS;
+ break;
+ }
+
+ if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "rwz") || !strcmp(powVariant, "graft"))) {
+ m_powVariant = POW_RWZ;
+ break;
+ }
+
if (i == ARRAY_SIZE(pow_variant_names) - 1) {
showUsage(1);
return false;
diff --git a/src/Options.h b/src/Options.h
index a02044f6..902eed3d 100644
--- a/src/Options.h
+++ b/src/Options.h
@@ -84,7 +84,8 @@ public:
inline bool ccPushZeroHashrateMiners() const { return m_ccPushZeroHashrateMiners; }
inline bool ccUsePushover() const { return ccPushoverUser() && ccPushoverToken(); }
inline bool ccUseTelegram() const { return ccTelegramBotToken() && ccTelegramChatId(); }
- inline bool forcePowVariant() const { return m_forcePowVariant; };
+ inline bool forcePowVariant() const { return m_forcePowVariant; }
+ inline bool skipSelfCheck() const { return m_skipSelfCheck; }
inline const char *fileName() const { return m_fileName; }
inline const char *apiToken() const { return m_apiToken; }
inline const char *apiWorkerId() const { return m_apiWorkerId; }
@@ -171,6 +172,7 @@ private:
bool m_ccPushPeriodicStatus;
bool m_ccPushZeroHashrateMiners;
bool m_forcePowVariant;
+ bool m_skipSelfCheck;
const char* m_fileName;
char *m_apiToken;
char *m_apiWorkerId;
diff --git a/src/PowVariant.h b/src/PowVariant.h
index a03fbd22..17ddec11 100644
--- a/src/PowVariant.h
+++ b/src/PowVariant.h
@@ -39,6 +39,11 @@ enum PowVariant
POW_UPX,
POW_TURTLE,
POW_HOSP,
+ POW_WOW,
+ POW_V4,
+ POW_DOUBLE,
+ POW_ZELERIUS,
+ POW_RWZ,
LAST_ITEM
};
@@ -74,6 +79,16 @@ inline std::string getPowVariantName(PowVariant powVariant)
return "turtle";
case POW_HOSP:
return "hosp";
+ case POW_WOW:
+ return "wow";
+ case POW_V4:
+ return "r";
+ case POW_DOUBLE:
+ return "double";
+ case POW_ZELERIUS:
+ return "zls";
+ case POW_RWZ:
+ return "rwz";
case POW_AUTODETECT:
default:
return "-1";
@@ -149,6 +164,16 @@ inline PowVariant parseVariant(const std::string variant)
powVariant = PowVariant::POW_TURTLE;
} else if (variant == "hosp" || variant == "hospital") {
powVariant = PowVariant::POW_HOSP;
+ } else if (variant == "wow" || variant == "wownero") {
+ powVariant = PowVariant::POW_WOW;
+ } else if (variant == "r" || variant == "4" || variant == "cnv4" || variant == "cnv5") {
+ powVariant = PowVariant::POW_V4;
+ } else if (variant == "xcash" || variant == "heavyx" || variant == "double") {
+ powVariant = PowVariant::POW_DOUBLE;
+ } else if (variant == "zelerius" || variant == "zls" || variant == "zlx") {
+ powVariant = PowVariant::POW_ZELERIUS;
+ } else if (variant == "rwz" || variant == "graft") {
+ powVariant = PowVariant::POW_RWZ;
}
return powVariant;
diff --git a/src/config.json b/src/config.json
index 50f1f9ca..c7a89a1f 100644
--- a/src/config.json
+++ b/src/config.json
@@ -4,7 +4,7 @@
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
- "pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy (xao), xtl (including autodetect for > v5), msr, xhv, rto, xfh, upx, turtle, hosp
+ "pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), '0', '1', '2', 'ipbc', 'xao', 'xtl', 'rto', 'xfh', 'upx', 'turtle', 'hosp', 'r', 'wow', 'double (xcash)', 'zls' (zelerius), 'rwz' (graft)
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, bulldozer, off
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
@@ -21,6 +21,7 @@
"syslog": false, // use system log for output messages
"reboot-cmd" : "", // command to execute to reboot the OS
"force-pow-variant" : false, // force pow variant, dont parse pow/variant from pool job
+ "skip-self-check" : false, // skip the self check on startup
"pools": [
{
"url": "donate2.graef.in:80", // URL of mining server
diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp
index 0040a164..cd4b6699 100644
--- a/src/crypto/CryptoNight.cpp
+++ b/src/crypto/CryptoNight.cpp
@@ -23,6 +23,7 @@
* along with this program. If not, see .
*/
+#include
#include "crypto/CryptoNight.h"
#if defined(XMRIG_ARM)
@@ -34,282 +35,398 @@
#include "crypto/CryptoNight_test.h"
template
-static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
# if !defined(XMRIG_ARMv7)
- if (powVersion == PowVariant::POW_V1) {
+ if (variant == PowVariant::POW_V1) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
#else
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
}
#endif
- } else if (powVersion == PowVariant::POW_V2) {
+ } else if (variant == PowVariant::POW_V2) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
#else
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
(asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
(asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
}
#endif
-} else if (powVersion == PowVariant::POW_ALLOY) {
- CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
-} else if (powVersion == PowVariant::POW_XTL) {
+ } else if (variant == PowVariant::POW_V4) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+#else
+ if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2) {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V4, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+ }
+#endif
+ } else if (variant == PowVariant::POW_WOW) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+#else
+ if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2) {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+ }
+#endif
+ } else if (variant == PowVariant::POW_ALLOY) {
+ CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ALLOY, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+ } else if (variant == PowVariant::POW_XTL) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
#else
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
}
#endif
-} else if (powVersion == PowVariant::POW_FAST_2) {
+ } else if (variant == PowVariant::POW_FAST_2) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
#else
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
(asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
(asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
}
#endif
-} else if (powVersion == PowVariant::POW_MSR) {
+ } else if (variant == PowVariant::POW_DOUBLE) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+ if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
+ (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
+ (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
+ CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_ZELERIUS) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+ if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
+ (asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
+ (asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_RWZ) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false,POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+ if ((asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS <= 2)) {
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_MSR) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
#else
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
}
#endif
-} else if (powVersion == PowVariant::POW_RTO || powVersion == PowVariant::POW_HOSP) {
+ } else if (variant == PowVariant::POW_RTO) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
#else
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RTO, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
}
#endif
-} else if (powVersion == PowVariant::POW_XFH) {
- CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
-} else {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
-}
-# endif
-}
-
-template
-static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
- if (powVersion == PowVariant::POW_V1) {
+ } else if (variant == PowVariant::POW_HOSP) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
#else
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_HOSP, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
}
#endif
- } else if (powVersion == PowVariant::POW_V2) {
-#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
-#else
- if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
- } else {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
- }
-#endif
- } else if (powVersion == PowVariant::POW_FAST_2) {
-#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
-#else
- if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
- } else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
- }
-#endif
- } else if (powVersion == PowVariant::POW_ALLOY) {
- CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
- } else if (powVersion == PowVariant::POW_XTL) {
-#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-#else
- if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
- } else {
- CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
- }
-#endif
- } else if (powVersion == PowVariant::POW_MSR) {
-#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-#else
- if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
- } else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
- }
-#endif
- } else if (powVersion == PowVariant::POW_RTO || powVersion == PowVariant::POW_HOSP) {
-#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
-#else
- if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
- } else {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
- }
-#endif
- } else if (powVersion == PowVariant::POW_XFH) {
- CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
+ } else if (variant == PowVariant::POW_XFH) {
+ CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
} else {
- CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
- }
-}
-
-template
-static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
-# if !defined(XMRIG_ARMv7)
- if (powVersion == PowVariant::POW_V1) {
-#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-#else
- if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
- } else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
- }
-#endif
- } else if (powVersion == PowVariant::POW_TUBE) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
- } else if (powVersion == PowVariant::POW_UPX) {
-#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
-#else
- if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
- } else {
- CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
- }
-#endif
- } else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
}
# endif
}
template
-static void cryptonight_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
- if (powVersion == PowVariant::POW_V1) {
+static void cryptonight_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+ if (variant == PowVariant::POW_V1) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
#else
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
}
#endif
- } else if (powVersion == PowVariant::POW_TUBE) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
- } else if (powVersion == PowVariant::POW_UPX) {
+ } else if (variant == PowVariant::POW_V2) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
#else
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
}
#endif
+ } else if (variant == PowVariant::POW_V4) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+#else
+ if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V4, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V4, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+ }
+#endif
+ } else if (variant == PowVariant::POW_WOW) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_WOW,NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+#else
+ if (asmOptimization != AsmOptimization::ASM_OFF && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4_asm(input, size, output, scratchPad, height, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_WOW, NUM_HASH_BLOCKS>::hashPowV4(input, size, output, scratchPad, height);
+ }
+#endif
+ } else if (variant == PowVariant::POW_FAST_2) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_FAST_2, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_DOUBLE) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_DOUBLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_ZELERIUS) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ZELERIUS, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_RWZ) {
+ CryptoNightMultiHash<0x60000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RWZ, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ } else if (variant == PowVariant::POW_ALLOY) {
+ CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_ALLOY, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+ } else if (variant == PowVariant::POW_XTL) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XTL, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_MSR) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_MSR, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_RTO) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RTO, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_RTO, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_HOSP) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_HOSP, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_HOSP, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_XFH) {
+ CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
} else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
}
}
template
-static void cryptonight_super_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+# if !defined(XMRIG_ARMv7)
+ if (variant == PowVariant::POW_V1) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_TUBE) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_TUBE, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+ } else if (variant == PowVariant::POW_UPX) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ }
+#endif
+ } else {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+ }
+# endif
+}
+
+template
+static void cryptonight_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+ if (variant == PowVariant::POW_V1) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V1, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ }
+#endif
+ } else if (variant == PowVariant::POW_TUBE) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_TUBE, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
+ } else if (variant == PowVariant::POW_UPX) {
+#if defined(XMRIG_ARM)
+ CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+#else
+ if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
+ CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
+ } else {
+ CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_UPX, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
+ }
+#endif
+ } else {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
+ }
+}
+
+template
+static void cryptonight_super_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
}
template
-static void cryptonight_super_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_super_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
}
template
-static void cryptonight_ultra_lite_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_ultra_lite_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
# if !defined(XMRIG_ARMv7)
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
#else
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
(asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
(asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
- CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, false, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
}
#endif
# endif
}
template
-static void cryptonight_ultra_lite_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_ultra_lite_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
#if defined(XMRIG_ARM)
- CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
#else
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
- CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization, powVersion);
+ CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3_asm(input, size, output, scratchPad, asmOptimization);
} else {
- CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x10000, POW_DEFAULT_INDEX_SHIFT, MEMORY_ULTRA_LITE, 0x1FFF0, true, POW_TURTLE, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
}
#endif
}
template
-static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+static void cryptonight_heavy_aesni(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
# if !defined(XMRIG_ARMv7)
- if (powVersion == PowVariant::POW_XHV) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
+ if (variant == PowVariant::POW_XHV) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, POW_XHV, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
}
- else if (powVersion == PowVariant::POW_TUBE) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
+ else if (variant == PowVariant::POW_TUBE) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, POW_TUBE, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
}
else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
}
# endif
}
template
-static void cryptonight_heavy_softaes(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
- if (powVersion == PowVariant::POW_XHV) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyHaven(input, size, output, scratchPad);
+static void cryptonight_heavy_softaes(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad) {
+ if (variant == PowVariant::POW_XHV) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, POW_XHV, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
}
- else if (powVersion == PowVariant::POW_TUBE) {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
+ else if (variant == PowVariant::POW_TUBE) {
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, POW_TUBE, NUM_HASH_BLOCKS>::hashHeavyTube(input, size, output, scratchPad);
}
else {
- CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
+ CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_HEAVY, 0x3FFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
}
}
-void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
+void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad);
template
void setCryptoNightHashMethods(Options::Algo algo, bool aesni)
@@ -377,15 +494,16 @@ bool CryptoNight::init(int algo, bool aesni)
}
setCryptoNightHashMethods(static_cast(algo), aesni);
- return selfTest(algo);
+
+ return Options::i()->skipSelfCheck() ? true : selfCheck(algo);
}
-void CryptoNight::hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
+void CryptoNight::hash(size_t factor, AsmOptimization asmOptimization, uint64_t height, PowVariant variant, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPad)
{
- cryptonight_hash_ctx[factor-1](asmOptimization, powVersion, input, size, output, scratchPad);
+ cryptonight_hash_ctx[factor-1](asmOptimization, height, variant, input, size, output, scratchPad);
}
-bool CryptoNight::selfTest(int algo)
+bool CryptoNight::selfCheck(int algo)
{
if (cryptonight_hash_ctx[0] == nullptr
#if MAX_NUM_HASH_BLOCKS > 1
@@ -413,6 +531,14 @@ bool CryptoNight::selfTest(int algo)
ScratchPad* scratchPad = static_cast(_mm_malloc(sizeof(ScratchPad), 4096));
scratchPad->memory = (uint8_t *) _mm_malloc(MEMORY * 6, 16);
+ auto* p = reinterpret_cast(Mem::allocateExecutableMemory(0x4000));
+ scratchPad->generated_code = reinterpret_cast(p);
+ scratchPad->generated_code_double = reinterpret_cast(p + 0x2000);
+
+ scratchPad->generated_code_data.variant = PowVariant::LAST_ITEM;
+ scratchPad->generated_code_data.height = (uint64_t)(-1);
+ scratchPad->generated_code_double_data = scratchPad->generated_code_data;
+
scratchPads[i] = scratchPad;
}
@@ -427,129 +553,128 @@ bool CryptoNight::selfTest(int algo)
if (algo == Options::ALGO_CRYPTONIGHT_HEAVY) {
// cn-heavy
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
- cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
resultHeavy = resultHeavy && memcmp(output, test_output_heavy, 96) == 0;
#endif
// cn-heavy haven
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
- cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_XHV, test_input, 76, output, scratchPads);
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_haven, 96) == 0;
#endif
// cn-heavy bittube
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
- cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
resultHeavy = resultHeavy && memcmp(output, test_output_heavy_tube, 96) == 0;
#endif
} else if (algo == Options::ALGO_CRYPTONIGHT_LITE) {
// cn-lite v0
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_v0_lite, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_v0_lite, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
- cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_v0_lite, 96) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 3
- cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_v0_lite, 128) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 4
- cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_v0_lite, 160) == 0;
#endif
// cn-lite v7 tests
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
- resultLite = resultLite && memcmp(output, test_output_v1_lite, 32) == 0;
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ resultLite = resultLite && memcmp(output, test_output_v1_lite, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_v1_lite, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
- cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_v1_lite, 96) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 3
- cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_v1_lite, 128) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 4
- cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_v1_lite, 160) == 0;
#endif
-
// cn-lite ibpc tests
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
- cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 96) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 3
- cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 128) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 4
- cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_TUBE, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 160) == 0;
#endif
// cn-lite upx
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_UPX, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_UPX, test_input, 76, output, scratchPads);
resultLite = resultLite && memcmp(output, test_output_upx, 32) == 0;
} else if (algo == Options::ALGO_CRYPTONIGHT_SUPERLITE) {
@@ -559,123 +684,173 @@ bool CryptoNight::selfTest(int algo)
} else if (algo == Options::ALGO_CRYPTONIGHT_ULTRALITE) {
// cn ultralite (cnv8 + turtle)
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
resultUltraLite = resultUltraLite && memcmp(output, test_output_turtle, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_TURTLE, test_input, 76, output, scratchPads);
resultUltraLite = resultUltraLite && memcmp(output, test_output_turtle, 64) == 0;
#endif
} else {
// cn v0 aka orignal
-
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V0,test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V0,test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v0, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v0, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
- cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v0, 96) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 3
- cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v0, 128) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 4
- cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V0, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V0, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v0, 160) == 0;
#endif
// cn v7 aka cnv1
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v1, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v1, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
- cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v1, 96) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 3
- cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v1, 128) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 4
- cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V1, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V1, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v1, 160) == 0;
#endif
// cnv7 + xtl
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XTL,test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_XTL,test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_xtl, 32) == 0;
// cnv7 + msr aka cn-fast
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_MSR,test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_MSR,test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_msr, 32) == 0;
// cnv7 + alloy
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_ALLOY,test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_ALLOY,test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_alloy, 32) == 0;
// cnv7 + hosp/rto
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_HOSP,test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_HOSP,test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_hosp, 32) == 0;
// cnv8 aka cnv2
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v2, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v2, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
- cryptonight_hash_ctx[2](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[2](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v2, 96) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 3
- cryptonight_hash_ctx[3](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[3](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v2, 128) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 4
- cryptonight_hash_ctx[4](asmOptimization, PowVariant::POW_V2, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[4](asmOptimization, 0, PowVariant::POW_V2, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_v2, 160) == 0;
#endif
// cn xfh aka cn-heavy-superfast
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XFH, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_XFH, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_xfh, 32) == 0;
// cnv8 + xtl aka cn-fast2
- cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_xtl_v9, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
- cryptonight_hash_ctx[1](asmOptimization, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_FAST_2, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_xtl_v9, 64) == 0;
#endif
+
+ // cnv8 + xcash
+
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_DOUBLE, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_xcash, 32) == 0;
+
+ // cnv8 + zelerius
+
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_ZELERIUS, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_zelerius, 32) == 0;
+
+ // cnv8 + rwz
+
+ cryptonight_hash_ctx[0](asmOptimization, 0, PowVariant::POW_RWZ, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_rwz, 32) == 0;
+
+ #if MAX_NUM_HASH_BLOCKS > 1
+ cryptonight_hash_ctx[1](asmOptimization, 0, PowVariant::POW_RWZ, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_rwz, 64) == 0;
+ #endif
+
+ // cnv9 aka cnv4 aka cnv5 aka cnr
+
+ cryptonight_hash_ctx[0](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_v4, 32) == 0;
+
+ #if MAX_NUM_HASH_BLOCKS > 1
+ cryptonight_hash_ctx[1](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_v4, 64) == 0;
+ #endif
+
+ #if MAX_NUM_HASH_BLOCKS > 2
+ cryptonight_hash_ctx[2](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_v4, 96) == 0;
+ #endif
+
+ #if MAX_NUM_HASH_BLOCKS > 3
+ cryptonight_hash_ctx[3](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_v4, 128) == 0;
+ #endif
+
+ #if MAX_NUM_HASH_BLOCKS > 4
+ cryptonight_hash_ctx[4](asmOptimization, 10000, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_v4, 160) == 0;
+ #endif
+
+ cryptonight_hash_ctx[0](asmOptimization, 10001, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_v4_1, 32) == 0;
+
+ cryptonight_hash_ctx[0](asmOptimization, 10002, PowVariant::POW_V4, test_input, 76, output, scratchPads);
+ result = result && memcmp(output, test_output_v4_2, 32) == 0;
}
for (size_t i = 0; i < MAX_NUM_HASH_BLOCKS; ++i) {
diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h
index 10415ca9..aaf29145 100644
--- a/src/crypto/CryptoNight.h
+++ b/src/crypto/CryptoNight.h
@@ -42,8 +42,25 @@
#define POW_DEFAULT_INDEX_SHIFT 3
#define POW_XLT_V4_INDEX_SHIFT 4
+#if defined _MSC_VER || defined XMRIG_ARM
+#define ABI_ATTRIBUTE
+#else
+#define ABI_ATTRIBUTE __attribute__((ms_abi))
+#endif
+
+struct ScratchPad;
+typedef void(*cn_mainloop_fun_ms_abi)(ScratchPad*) ABI_ATTRIBUTE;
+typedef void(*cn_mainloop_double_fun_ms_abi)(ScratchPad*, ScratchPad*) ABI_ATTRIBUTE;
+
+struct cryptonight_r_data {
+ int variant;
+ uint64_t height;
+
+ bool match(const int v, const uint64_t h) const { return (v == variant) && (h == height); }
+};
+
struct ScratchPad {
- alignas(16) uint8_t state[224]; // 224 instead of 200 to maintain aligned to 16 byte boundaries
+ alignas(16) uint8_t state[224];
alignas(16) uint8_t* memory;
// Additional stuff for asm impl
@@ -51,6 +68,11 @@ struct ScratchPad {
const void* input;
uint8_t* variant_table;
const uint32_t* t_fn;
+
+ cn_mainloop_fun_ms_abi generated_code;
+ cn_mainloop_double_fun_ms_abi generated_code_double;
+ cryptonight_r_data generated_code_data;
+ cryptonight_r_data generated_code_double_data;
};
alignas(64) static uint8_t variant1_table[256];
@@ -63,12 +85,12 @@ class CryptoNight
{
public:
static bool init(int algo, bool aesni);
- static void hash(size_t factor, AsmOptimization asmOptimization, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
+ static void hash(size_t factor, AsmOptimization asmOptimization, uint64_t height, PowVariant powVersion, const uint8_t* input, size_t size, uint8_t* output, ScratchPad** scratchPads);
public:
private:
- static bool selfTest(int algo);
+ static bool selfCheck(int algo);
};
diff --git a/src/crypto/CryptoNightR_gen.cpp b/src/crypto/CryptoNightR_gen.cpp
new file mode 100644
index 00000000..d856cade
--- /dev/null
+++ b/src/crypto/CryptoNightR_gen.cpp
@@ -0,0 +1,190 @@
+/* XMRig
+ * Copyright 2010 Jeff Garzik
+ * Copyright 2012-2014 pooler
+ * Copyright 2014 Lucas Jones
+ * Copyright 2014-2016 Wolf9466
+ * Copyright 2016 Jay D Dee
+ * Copyright 2017-2018 XMR-Stak ,
+ * Copyright 2018 Lee Clagett
+ * Copyright 2018-2019 SChernykh
+ * Copyright 2016-2019 XMRig ,
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#include
+
+typedef void(*void_func)();
+
+#include "crypto/asm/CryptonightR_template.h"
+#include "Mem.h"
+
+#if !defined XMRIG_ARM && !defined XMRIG_NO_ASM
+
+#include "crypto/CryptoNight_x86.h"
+
+static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)())
+{
+ const ptrdiff_t size = reinterpret_cast(p2) - reinterpret_cast(p1);
+ if (size > 0) {
+ memcpy(p, reinterpret_cast(p1), size);
+ p += size;
+ }
+}
+
+static inline void add_random_math(uint8_t* &p, const V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, AsmOptimization ASM)
+{
+ uint32_t prev_rot_src = (uint32_t)(-1);
+
+ for (int i = 0;; ++i) {
+ const V4_Instruction inst = code[i];
+ if (inst.opcode == RET) {
+ break;
+ }
+
+ uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
+ uint8_t dst_index = inst.dst_index;
+ uint8_t src_index = inst.src_index;
+
+ const uint32_t a = inst.dst_index;
+ const uint32_t b = inst.src_index;
+ const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
+
+ switch (inst.opcode) {
+ case ROR:
+ case ROL:
+ if (b != prev_rot_src) {
+ prev_rot_src = b;
+ add_code(p, instructions_mov[c], instructions_mov[c + 1]);
+ }
+ break;
+ }
+
+ if (a == prev_rot_src) {
+ prev_rot_src = (uint32_t)(-1);
+ }
+
+ void_func begin = instructions[c];
+
+ if ((ASM = ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) {
+ // AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
+ // Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
+ uint8_t* prefix = reinterpret_cast(begin);
+
+ if (*prefix == 0x49) {
+ *(p++) = 0x41;
+ }
+
+ begin = reinterpret_cast(prefix + 1);
+ }
+
+ add_code(p, begin, instructions[c + 1]);
+
+ if (inst.opcode == ADD) {
+ *(uint32_t*)(p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
+ if (is_64_bit) {
+ prev_rot_src = (uint32_t)(-1);
+ }
+ }
+ }
+}
+
+void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+ uint8_t* p0 = reinterpret_cast(machine_code);
+ uint8_t* p = p0;
+
+ add_code(p, CryptonightWOW_template_part1, CryptonightWOW_template_part2);
+ add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+ add_code(p, CryptonightWOW_template_part2, CryptonightWOW_template_part3);
+ *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_template_mainloop) - ((const uint8_t*)CryptonightWOW_template_part1)) - (p - p0));
+ add_code(p, CryptonightWOW_template_part3, CryptonightWOW_template_end);
+
+ Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+ uint8_t* p0 = reinterpret_cast(machine_code);
+ uint8_t* p = p0;
+
+ add_code(p, CryptonightR_template_part1, CryptonightR_template_part2);
+ add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+ add_code(p, CryptonightR_template_part2, CryptonightR_template_part3);
+ *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
+ add_code(p, CryptonightR_template_part3, CryptonightR_template_end);
+
+ Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+ uint8_t* p0 = reinterpret_cast(machine_code);
+ uint8_t* p = p0;
+
+ add_code(p, CryptonightWOW_template_double_part1, CryptonightWOW_template_double_part2);
+ add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+ add_code(p, CryptonightWOW_template_double_part2, CryptonightWOW_template_double_part3);
+ add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+ add_code(p, CryptonightWOW_template_double_part3, CryptonightWOW_template_double_part4);
+ *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_template_double_mainloop) - ((const uint8_t*)CryptonightWOW_template_double_part1)) - (p - p0));
+ add_code(p, CryptonightWOW_template_double_part4, CryptonightWOW_template_double_end);
+
+ Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+ uint8_t* p0 = reinterpret_cast(machine_code);
+ uint8_t* p = p0;
+
+ add_code(p, CryptonightR_template_double_part1, CryptonightR_template_double_part2);
+ add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+ add_code(p, CryptonightR_template_double_part2, CryptonightR_template_double_part3);
+ add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+ add_code(p, CryptonightR_template_double_part3, CryptonightR_template_double_part4);
+ *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0));
+ add_code(p, CryptonightR_template_double_part4, CryptonightR_template_double_end);
+
+ Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+ uint8_t* p0 = reinterpret_cast(machine_code);
+ uint8_t* p = p0;
+
+ add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2);
+ add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+ add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3);
+ *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0));
+ add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end);
+
+ Mem::flushInstructionCache(machine_code, p - p0);
+}
+
+void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM)
+{
+ uint8_t* p0 = reinterpret_cast(machine_code);
+ uint8_t* p = p0;
+
+ add_code(p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
+ add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM);
+ add_code(p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
+ *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
+ add_code(p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
+
+ Mem::flushInstructionCache(machine_code, p - p0);
+}
+#endif
\ No newline at end of file
diff --git a/src/crypto/CryptoNight_arm.h b/src/crypto/CryptoNight_arm.h
index b0e31ae6..6c3b2fd7 100644
--- a/src/crypto/CryptoNight_arm.h
+++ b/src/crypto/CryptoNight_arm.h
@@ -36,11 +36,26 @@
#endif
+#define SWAP32LE(x) x
+#define SWAP64LE(x) x
+#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
+
+#ifndef NOINLINE
+#ifdef __GNUC__
+#define NOINLINE __attribute__ ((noinline))
+#elif _MSC_VER
+#define NOINLINE __declspec(noinline)
+#else
+#define NOINLINE
+#endif
+#endif
+
#include
#include
#include "crypto/CryptoNight.h"
#include "crypto/soft_aes.h"
+#include "variant4_random_math.h"
extern "C"
@@ -111,11 +126,11 @@ static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i
#define EXTRACT64(X) _mm_cvtsi128_si64(X)
-# define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax) \
+# define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax, reverse) \
{ \
- const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x10))); \
+ const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((l) + ((idx) ^ (reverse ? 0x30 : 0x10)))); \
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x20))); \
- const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x30))); \
+ const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((l) + ((idx) ^ (reverse ? 0x10 : 0x30)))); \
vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(bx1))); \
vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(bx0))); \
vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(ax))); \
@@ -136,18 +151,52 @@ static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i
sqrt_result##idx += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
}
-# define SHUFFLE_PHASE_2(l, idx, bx0, bx1, ax, lo, hi) \
+# define SHUFFLE_PHASE_2(l, idx, bx0, bx1, ax, lo, hi, reverse) \
{ \
const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \
const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x20))); \
const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x30))); \
hi ^= ((uint64_t*)((l) + ((idx) ^ 0x20)))[0]; \
lo ^= ((uint64_t*)((l) + ((idx) ^ 0x20)))[1]; \
+ if (reverse) { \
+ vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x10)), vaddq_u64(chunk1, vreinterpretq_u64_u8(bx1))); \
+ vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x20)), vaddq_u64(chunk3, vreinterpretq_u64_u8(bx0))); \
+ } else { \
+ vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(bx1))); \
+ vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(bx0))); \
+ } \
+ vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(ax))); \
+}
+
+# define SHUFFLE_V4(l, idx, bx0, bx1, ax, cx) \
+{ \
+ const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x10))); \
+ const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x20))); \
+ const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((l) + ((idx) ^ 0x30))); \
vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(bx1))); \
vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(bx0))); \
vst1q_u64((uint64_t*)((l) + ((idx) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(ax))); \
+ cx = veorq_u64(veorq_u64(cx, chunk3), veorq_u64(chunk1, chunk2)); \
}
+# define VARIANT4_RANDOM_MATH_INIT(idx, h) \
+ uint32_t r##idx[9]; \
+ struct V4_Instruction code##idx[256]; \
+ r##idx[0] = (uint32_t)(h[12]); \
+ r##idx[1] = (uint32_t)(h[12] >> 32); \
+ r##idx[2] = (uint32_t)(h[13]); \
+ r##idx[3] = (uint32_t)(h[13] >> 32); \
+ v4_random_math_init(code##idx, VARIANT, height);
+
+# define VARIANT4_RANDOM_MATH(idx, al, ah, cl, bx0, bx1) \
+ cl ^= (r##idx[0] + r##idx[1]) | ((uint64_t)(r##idx[2] + r##idx[3]) << 32); \
+ r##idx[4] = static_cast(al); \
+ r##idx[5] = static_cast(ah); \
+ r##idx[6] = static_cast(_mm_cvtsi128_si32(bx0)); \
+ r##idx[7] = static_cast(_mm_cvtsi128_si32(bx1)); \
+ r##idx[8] = static_cast(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \
+ v4_random_math(code##idx, r##idx); \
+
#if defined (__arm64__) || defined (__aarch64__)
static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
@@ -640,8 +689,7 @@ static inline void cn_implode_scratchpad_heavy(const __m128i* input, __m128i* ou
_mm_store_si128(output + 11, xout7);
}
-// n-Loop version. Seems to be little bit slower then the hardcoded one.
-template
+template
class CryptoNightMultiHash
{
public:
@@ -650,79 +698,7 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
- __m128i ax[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size,
- scratchPad[hashBlock]->state, 200);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- if (SOFT_AES) {
- cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
- } else {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- uint64_t hi, lo, cl, ch;
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- bx[hashBlock] = cx[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ //dummy
}
inline static void hashPowV2(const uint8_t* __restrict__ input,
@@ -730,200 +706,24 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- uint64_t tweak1_2[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
- __m128i ax[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state,
- 200);
- tweak1_2[hashBlock] = (*reinterpret_cast(input + 35 + hashBlock * size) ^
- *(reinterpret_cast(scratchPad[hashBlock]->state) + 24));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- if (SOFT_AES) {
- cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
- } else {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- const uint8_t tmp = reinterpret_cast(&l[hashBlock][idx[hashBlock] & MASK])[11];
- static const uint32_t table = 0x75310;
- const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
- ((uint8_t *) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- uint64_t hi, lo, cl, ch;
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cl = ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- bx[hashBlock] = cx[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ //dummy
}
- // multi
inline static void hashPowV3(const uint8_t* __restrict__ input,
size_t size,
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- uint64_t sqrt_result[NUM_HASH_BLOCKS];
- uint64_t division_result_xmm[NUM_HASH_BLOCKS];
- __m128i bx0[NUM_HASH_BLOCKS];
- __m128i bx1[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
- __m128i ax[NUM_HASH_BLOCKS];
+ //dummy
+ }
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size,
- scratchPad[hashBlock]->state, 200);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx0[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- bx1[hashBlock] = _mm_set_epi64x(h[hashBlock][9] ^ h[hashBlock][11], h[hashBlock][8] ^ h[hashBlock][10]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-
- division_result_xmm[hashBlock] = h[hashBlock][12];
- sqrt_result[hashBlock] = h[hashBlock][13];
- }
-
- uint64_t sqrt_result0;
- uint64_t division_result_xmm0;
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- if (SOFT_AES) {
- cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
- } else {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- SHUFFLE_PHASE_1(l[hashBlock], idx[hashBlock] & MASK, bx0[hashBlock], bx1[hashBlock], ax[hashBlock])
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx0[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- uint64_t hi, lo, cl, ch;
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
-
- sqrt_result0 = sqrt_result[hashBlock];
- division_result_xmm0 = division_result_xmm[hashBlock];
-
- INTEGER_MATH_V2(0, cl, cx[hashBlock])
-
- sqrt_result[hashBlock] = sqrt_result0;
- division_result_xmm[hashBlock] = division_result_xmm0;
-
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- SHUFFLE_PHASE_2(l[hashBlock], idx[hashBlock] & MASK, bx0[hashBlock], bx1[hashBlock], ax[hashBlock], lo, hi)
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- bx1[hashBlock] = bx0[hashBlock];
- bx0[hashBlock] = cx[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ inline static void hashPowV4(const uint8_t* __restrict__ input,
+ size_t size,
+ uint8_t* __restrict__ output,
+ ScratchPad** __restrict__ scratchPad,
+ uint64_t height)
+ {
+ // dummy
}
inline static void hashLiteTube(const uint8_t* __restrict__ input,
@@ -931,87 +731,7 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- uint64_t tweak1_2[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state,
- 200);
- tweak1_2[hashBlock] = (*reinterpret_cast(input + 35 + hashBlock * size) ^
- *(reinterpret_cast(scratchPad[hashBlock]->state) + 24));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] =
- _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- __m128i cx;
-
- if (SOFT_AES) {
- cx = soft_aesenc((uint32_t*) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
- } else {
- cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
- cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
- }
-
- _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx));
-
- const uint8_t tmp = reinterpret_cast(&l[hashBlock][idx[hashBlock] & MASK])[11];
- static const uint32_t table = 0x75310;
- const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
- ((uint8_t*) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
-
- idx[hashBlock] = EXTRACT64(cx);
- bx[hashBlock] = cx;
-
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] ^= ((uint64_t*) &l[hashBlock][idx[hashBlock] &
- MASK])[0];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ //dummy
}
inline static void hashHeavy(const uint8_t* __restrict__ input,
@@ -1019,161 +739,7 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size,
- scratchPad[hashBlock]->state, 200);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad_heavy((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- __m128i cx;
-
- if (SOFT_AES) {
- cx = soft_aesenc((uint32_t*) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
- } else {
- cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
- cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
- }
-
- _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx));
-
- idx[hashBlock] = EXTRACT64(cx);
- bx[hashBlock] = cx;
-
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- const int64x2_t x = vld1q_s64(reinterpret_cast(&l[hashBlock][idx[hashBlock] & MASK]));
- const int64_t n = vgetq_lane_s64(x, 0);
- const int32_t d = vgetq_lane_s32(x, 2);
- const int64_t q = n / (d | 0x5);
-
- ((int64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
-
- idx[hashBlock] = d ^ q;
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad_heavy((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
- }
-
- inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
- size_t size,
- uint8_t* __restrict__ output,
- ScratchPad** __restrict__ scratchPad)
- {
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size,
- scratchPad[hashBlock]->state, 200);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad_heavy((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- __m128i cx;
-
- if (SOFT_AES) {
- cx = soft_aesenc((uint32_t*) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
- } else {
- cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
- cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
- }
-
- _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx));
-
- idx[hashBlock] = EXTRACT64(cx);
- bx[hashBlock] = cx;
-
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- const int64x2_t x = vld1q_s64(reinterpret_cast(&l[hashBlock][idx[hashBlock] & MASK]));
- const int64_t n = vgetq_lane_s64(x, 0);
- const int32_t d = vgetq_lane_s32(x, 2);
- const int64_t q = n / (d | 0x5);
-
- ((int64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
-
- idx[hashBlock] = (~d) ^ q;
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad_heavy((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ //dummy
}
inline static void hashHeavyTube(const uint8_t* __restrict__ input,
@@ -1181,125 +747,12 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- uint64_t tweak1_2[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state,
- 200);
- tweak1_2[hashBlock] = (*reinterpret_cast(reinterpret_cast(input) + 35 +
- hashBlock * size) ^
- *(reinterpret_cast(scratchPad[hashBlock]->state) + 24));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad_heavy((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- union alignas(16)
- {
- uint32_t k[4];
- uint64_t v64[2];
- };
- alignas(16) uint32_t x[4];
-
-#define BYTE(p, i) ((unsigned char*)&p)[i]
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- __m128i cx;
-
- cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
-
- const __m128i& key = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- _mm_store_si128((__m128i*) k, key);
- cx = _mm_xor_si128(cx, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
- _mm_store_si128((__m128i*) x, cx);
-
- k[0] ^= saes_table[0][BYTE(x[0], 0)] ^ saes_table[1][BYTE(x[1], 1)] ^ saes_table[2][BYTE(x[2], 2)] ^
- saes_table[3][BYTE(x[3], 3)];
- x[0] ^= k[0];
- k[1] ^= saes_table[0][BYTE(x[1], 0)] ^ saes_table[1][BYTE(x[2], 1)] ^ saes_table[2][BYTE(x[3], 2)] ^
- saes_table[3][BYTE(x[0], 3)];
- x[1] ^= k[1];
- k[2] ^= saes_table[0][BYTE(x[2], 0)] ^ saes_table[1][BYTE(x[3], 1)] ^ saes_table[2][BYTE(x[0], 2)] ^
- saes_table[3][BYTE(x[1], 3)];
- x[2] ^= k[2];
- k[3] ^= saes_table[0][BYTE(x[3], 0)] ^ saes_table[1][BYTE(x[0], 1)] ^ saes_table[2][BYTE(x[1], 2)] ^
- saes_table[3][BYTE(x[2], 3)];
-
- cx = _mm_load_si128((__m128i*) k);
-
- _mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK], _mm_xor_si128(bx[hashBlock], cx));
-
- const uint8_t tmp = reinterpret_cast(&l[hashBlock][idx[hashBlock] & MASK])[11];
- static const uint32_t table = 0x75310;
- const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
- ((uint8_t*) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
-
- idx[hashBlock] = EXTRACT64(cx);
- bx[hashBlock] = cx;
-
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] ^= ((uint64_t*) &l[hashBlock][idx[hashBlock] &
- MASK])[0];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- const int64x2_t x = vld1q_s64(reinterpret_cast(&l[hashBlock][idx[hashBlock] & MASK]));
- const int64_t n = vgetq_lane_s64(x, 0);
- const int32_t d = vgetq_lane_s32(x, 2);
- const int64_t q = n / (d | 0x5);
-
- ((int64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
-
- idx[hashBlock] = d ^ q;
- }
- }
-
-#undef BYTE
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad_heavy((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ //dummy
}
};
-template
-class CryptoNightMultiHash
+template
+class CryptoNightMultiHash
{
public:
inline static void hash(const uint8_t* __restrict__ input,
@@ -1462,7 +915,7 @@ public:
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
}
- SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
+ SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
@@ -1476,7 +929,85 @@ public:
lo = __umul128(idx0, cl, &hi);
- SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+ SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+ ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+
+ bx10 = bx00;
+ bx00 = cx0;
+ }
+
+ cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
+
+ keccakf(h0, 24);
+
+ extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+ }
+
+ // single
+ inline static void hashPowV4(const uint8_t* __restrict__ input,
+ size_t size,
+ uint8_t* __restrict__ output,
+ ScratchPad** __restrict__ scratchPad,
+ uint64_t height)
+
+ {
+ keccak(input, (int) size, scratchPad[0]->state, 200);
+
+ const uint8_t* l0 = scratchPad[0]->memory;
+ uint64_t* h0 = reinterpret_cast(scratchPad[0]->state);
+
+ cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
+
+ uint64_t al0 = h0[0] ^h0[4];
+ uint64_t ah0 = h0[1] ^h0[5];
+
+ __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+ __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+
+ uint64_t idx0 = h0[0] ^h0[4];
+
+ VARIANT4_RANDOM_MATH_INIT(0, h0)
+
+ for (size_t i = 0; i < ITERATIONS; i++) {
+ __m128i cx0;
+ const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+
+ if (SOFT_AES) {
+ cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+ } else {
+ cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+ cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
+ }
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+
+ _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+
+ idx0 = EXTRACT64(cx0);
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+ ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+ if (VARIANT == POW_V4) {
+ al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+ ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+ }
+
+ lo = __umul128(idx0, cl, &hi);
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
al0 += hi;
ah0 += lo;
@@ -1628,7 +1159,11 @@ public:
((int64_t*) &l[idx & MASK])[0] = n ^ q;
- idx = d ^ q;
+ if (VARIANT == POW_XHV) {
+ idx = (~d) ^ q;
+ } else {
+ idx = d ^ q;
+ }
}
cn_implode_scratchpad_heavy((__m128i*) scratchPad[0]->memory, (__m128i*) scratchPad[0]->state);
@@ -1636,75 +1171,6 @@ public:
extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
}
- inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
- size_t size,
- uint8_t* __restrict__ output,
- ScratchPad** __restrict__ scratchPad)
- {
- const uint8_t* l;
- uint64_t* h;
- uint64_t al;
- uint64_t ah;
- __m128i bx;
- uint64_t idx;
-
- keccak(static_cast(input), (int) size, scratchPad[0]->state, 200);
-
- l = scratchPad[0]->memory;
- h = reinterpret_cast(scratchPad[0]->state);
-
- cn_explode_scratchpad_heavy((__m128i*) h, (__m128i*) l);
-
- al = h[0] ^ h[4];
- ah = h[1] ^ h[5];
- bx = _mm_set_epi64x(h[3] ^ h[7], h[2] ^ h[6]);
- idx = h[0] ^ h[4];
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- __m128i cx;
-
- if (SOFT_AES) {
- cx = soft_aesenc((uint32_t*) &l[idx & MASK], _mm_set_epi64x(ah, al));
- } else {
- cx = _mm_load_si128((__m128i*) &l[idx & MASK]);
- cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al));
- }
-
- _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx));
- idx = EXTRACT64(cx);
- bx = cx;
-
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*) &l[idx & MASK])[0];
- ch = ((uint64_t*) &l[idx & MASK])[1];
- lo = __umul128(idx, cl, &hi);
-
- al += hi;
- ah += lo;
-
- ((uint64_t*) &l[idx & MASK])[0] = al;
- ((uint64_t*) &l[idx & MASK])[1] = ah;
-
- ah ^= ch;
- al ^= cl;
- idx = al;
-
- const int64x2_t x = vld1q_s64(reinterpret_cast(&l[idx & MASK]));
- const int64_t n = vgetq_lane_s64(x, 0);
- const int32_t d = vgetq_lane_s32(x, 2);
- const int64_t q = n / (d | 0x5);
-
- ((int64_t*) &l[idx & MASK])[0] = n ^ q;
-
- idx = (~d) ^ q;
- }
-
- cn_implode_scratchpad_heavy((__m128i*) l, (__m128i*) h);
- keccakf(h, 24);
- extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
- }
-
-
inline static void hashHeavyTube(const uint8_t* __restrict__ input,
size_t size,
uint8_t* __restrict__ output,
@@ -1809,8 +1275,8 @@ public:
};
-template
-class CryptoNightMultiHash
+template
+class CryptoNightMultiHash
{
public:
inline static void hash(const uint8_t* __restrict__ input,
@@ -2067,8 +1533,8 @@ public:
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
}
- SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
- SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
+ SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -2084,7 +1550,7 @@ public:
lo = __umul128(idx0, cl, &hi);
- SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+ SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
al0 += hi;
ah0 += lo;
@@ -2107,7 +1573,136 @@ public:
lo = __umul128(idx1, cl, &hi);
- SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+ SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
+
+ al1 += hi;
+ ah1 += lo;
+
+ ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+ ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+ ah1 ^= ch;
+ al1 ^= cl;
+ idx1 = al1;
+
+ bx11 = bx01;
+ bx01 = cx1;
+ }
+
+ cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
+ cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
+
+ keccakf(h0, 24);
+ keccakf(h1, 24);
+
+ extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+ extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+ }
+
+ // double
+ inline static void hashPowV4(const uint8_t* __restrict__ input,
+ size_t size,
+ uint8_t* __restrict__ output,
+ ScratchPad** __restrict__ scratchPad,
+ uint64_t height)
+ {
+ keccak(input, (int) size, scratchPad[0]->state, 200);
+ keccak(input + size, (int) size, scratchPad[1]->state, 200);
+
+ const uint8_t* l0 = scratchPad[0]->memory;
+ const uint8_t* l1 = scratchPad[1]->memory;
+ uint64_t* h0 = reinterpret_cast(scratchPad[0]->state);
+ uint64_t* h1 = reinterpret_cast(scratchPad[1]->state);
+
+ cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
+ cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
+
+ uint64_t al0 = h0[0] ^h0[4];
+ uint64_t al1 = h1[0] ^h1[4];
+ uint64_t ah0 = h0[1] ^h0[5];
+ uint64_t ah1 = h1[1] ^h1[5];
+
+ __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+ __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+
+ __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+ __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+
+ uint64_t idx0 = h0[0] ^h0[4];
+ uint64_t idx1 = h1[0] ^h1[4];
+
+ VARIANT4_RANDOM_MATH_INIT(0, h0)
+ VARIANT4_RANDOM_MATH_INIT(1, h1)
+
+ for (size_t i = 0; i < ITERATIONS; i++) {
+ __m128i cx0;
+ __m128i cx1;
+
+ const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+ const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+
+ if (SOFT_AES) {
+ cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+ cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], ax1);
+ } else {
+ cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+ cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+
+ cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
+ cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+ }
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+ SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+
+ _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+ _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+
+ idx0 = EXTRACT64(cx0);
+ idx1 = EXTRACT64(cx1);
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+ ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+ if (VARIANT == POW_V4) {
+ al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+ ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+ }
+
+ lo = __umul128(idx0, cl, &hi);
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+ ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+
+ bx10 = bx00;
+ bx00 = cx0;
+
+
+ cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+ ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11)
+
+ if (VARIANT == POW_V4) {
+ al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+ ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+ }
+
+ lo = __umul128(idx1, cl, &hi);
+
+ SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1);
al1 += hi;
ah1 += lo;
@@ -2318,118 +1913,12 @@ public:
((int64_t*) &l0[idx0 & MASK])[0] = n0 ^ q0;
- idx0 = d0 ^ q0;
-
-
- cl = ((uint64_t*) &l1[idx1 & MASK])[0];
- ch = ((uint64_t*) &l1[idx1 & MASK])[1];
- lo = __umul128(idx1, cl, &hi);
-
- al1 += hi;
- ah1 += lo;
-
- ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
- ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
-
- ah1 ^= ch;
- al1 ^= cl;
- idx1 = al1;
-
- const int64x2_t x1 = vld1q_s64(reinterpret_cast(&l1[idx1 & MASK]));
- const int64_t n1 = vgetq_lane_s64(x1, 0);
- const int32_t d1 = vgetq_lane_s32(x1, 2);
- const int64_t q1 = n1 / (d1 | 0x5);
-
- ((int64_t*) &l1[idx1 & MASK])[0] = n1 ^ q1;
-
- idx1 = d1 ^ q1;
- }
-
- cn_implode_scratchpad_heavy((__m128i*) l0, (__m128i*) h0);
- cn_implode_scratchpad_heavy((__m128i*) l1, (__m128i*) h1);
-
- keccakf(h0, 24);
- keccakf(h1, 24);
-
- extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
- extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
- }
-
- inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
- size_t size,
- uint8_t* __restrict__ output,
- ScratchPad** __restrict__ scratchPad)
- {
- keccak(input, (int) size, scratchPad[0]->state, 200);
- keccak(input + size, (int) size, scratchPad[1]->state, 200);
-
- const uint8_t* l0 = scratchPad[0]->memory;
- const uint8_t* l1 = scratchPad[1]->memory;
- uint64_t* h0 = reinterpret_cast(scratchPad[0]->state);
- uint64_t* h1 = reinterpret_cast(scratchPad[1]->state);
-
- cn_explode_scratchpad_heavy((__m128i*) h0, (__m128i*) l0);
- cn_explode_scratchpad_heavy((__m128i*) h1, (__m128i*) l1);
-
- uint64_t al0 = h0[0] ^h0[4];
- uint64_t al1 = h1[0] ^h1[4];
- uint64_t ah0 = h0[1] ^h0[5];
- uint64_t ah1 = h1[1] ^h1[5];
-
- __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
- __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
-
- uint64_t idx0 = h0[0] ^h0[4];
- uint64_t idx1 = h1[0] ^h1[4];
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- __m128i cx0;
- __m128i cx1;
-
- if (SOFT_AES) {
- cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
- cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], _mm_set_epi64x(ah1, al1));
+ if (VARIANT == POW_XHV) {
+ idx0 = (~d0) ^ q0;
} else {
- cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
- cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
-
- cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
- cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+ idx0 = d0 ^ q0;
}
- _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
- _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
-
- idx0 = EXTRACT64(cx0);
- idx1 = EXTRACT64(cx1);
-
- bx0 = cx0;
- bx1 = cx1;
-
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*) &l0[idx0 & MASK])[0];
- ch = ((uint64_t*) &l0[idx0 & MASK])[1];
- lo = __umul128(idx0, cl, &hi);
-
- al0 += hi;
- ah0 += lo;
-
- ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
- ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
-
- ah0 ^= ch;
- al0 ^= cl;
- idx0 = al0;
-
- const int64x2_t x0 = vld1q_s64(reinterpret_cast(&l0[idx0 & MASK]));
- const int64_t n0 = vgetq_lane_s64(x0, 0);
- const int32_t d0 = vgetq_lane_s32(x0, 2);
- const int64_t q0 = n0 / (d0 | 0x5);
-
- ((int64_t*) &l0[idx0 & MASK])[0] = n0 ^ q0;
-
- idx0 = (~d0) ^ q0;
-
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
@@ -2451,7 +1940,11 @@ public:
((int64_t*) &l1[idx1 & MASK])[0] = n1 ^ q1;
- idx1 = (~d1) ^ q1;
+ if (VARIANT == POW_XHV) {
+ idx1 = (~d1) ^ q1;
+ } else {
+ idx1 = d1 ^ q1;
+ }
}
cn_implode_scratchpad_heavy((__m128i*) l0, (__m128i*) h0);
@@ -2635,8 +2128,8 @@ public:
}
};
-template
-class CryptoNightMultiHash
+template
+class CryptoNightMultiHash
{
public:
inline static void hash(const uint8_t* __restrict__ input,
@@ -2986,9 +2479,9 @@ public:
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
}
- SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
- SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
- SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2)
+ SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -3006,7 +2499,7 @@ public:
lo = __umul128(idx0, cl, &hi);
- SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+ SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
al0 += hi;
ah0 += lo;
@@ -3029,7 +2522,7 @@ public:
lo = __umul128(idx1, cl, &hi);
- SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+ SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
al1 += hi;
ah1 += lo;
@@ -3052,7 +2545,185 @@ public:
lo = __umul128(idx2, cl, &hi);
- SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi)
+ SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
+
+ al2 += hi;
+ ah2 += lo;
+
+ ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
+ ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
+
+ ah2 ^= ch;
+ al2 ^= cl;
+ idx2 = al2;
+
+ bx12 = bx02;
+ bx02 = cx2;
+ }
+
+ cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
+ cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
+ cn_implode_scratchpad((__m128i*) l2, (__m128i*) h2);
+
+ keccakf(h0, 24);
+ keccakf(h1, 24);
+ keccakf(h2, 24);
+
+ extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+ extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+ extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
+ }
+
+ // triple
+ inline static void hashPowV4(const uint8_t* __restrict__ input,
+ size_t size,
+ uint8_t* __restrict__ output,
+ ScratchPad** __restrict__ scratchPad,
+ uint64_t height)
+ {
+ keccak(input, (int) size, scratchPad[0]->state, 200);
+ keccak(input + size, (int) size, scratchPad[1]->state, 200);
+ keccak(input + 2 * size, (int) size, scratchPad[2]->state, 200);
+
+ const uint8_t* l0 = scratchPad[0]->memory;
+ const uint8_t* l1 = scratchPad[1]->memory;
+ const uint8_t* l2 = scratchPad[2]->memory;
+ uint64_t* h0 = reinterpret_cast(scratchPad[0]->state);
+ uint64_t* h1 = reinterpret_cast(scratchPad[1]->state);
+ uint64_t* h2 = reinterpret_cast(scratchPad[2]->state);
+
+ cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
+ cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
+ cn_explode_scratchpad((__m128i*) h2, (__m128i*) l2);
+
+ uint64_t al0 = h0[0] ^h0[4];
+ uint64_t al1 = h1[0] ^h1[4];
+ uint64_t al2 = h2[0] ^h2[4];
+ uint64_t ah0 = h0[1] ^h0[5];
+ uint64_t ah1 = h1[1] ^h1[5];
+ uint64_t ah2 = h2[1] ^h2[5];
+
+ __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+ __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+ __m128i bx02 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+
+ __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+ __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+ __m128i bx12 = _mm_set_epi64x(h2[9] ^ h2[11], h2[8] ^ h2[10]);
+
+ uint64_t idx0 = h0[0] ^h0[4];
+ uint64_t idx1 = h1[0] ^h1[4];
+ uint64_t idx2 = h2[0] ^h2[4];
+
+ VARIANT4_RANDOM_MATH_INIT(0, h0)
+ VARIANT4_RANDOM_MATH_INIT(1, h1)
+ VARIANT4_RANDOM_MATH_INIT(2, h2)
+
+ for (size_t i = 0; i < ITERATIONS; i++) {
+ __m128i cx0;
+ __m128i cx1;
+ __m128i cx2;
+
+ const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+ const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+ const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+
+ if (SOFT_AES) {
+ cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+ cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], ax1);
+ cx2 = soft_aesenc((uint32_t*) &l2[idx2 & MASK], ax2);
+ } else {
+ cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+ cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+ cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
+
+ cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
+ cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+ cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
+ }
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+ SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+ SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+
+ _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+ _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+ _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx02, cx2));
+
+ idx0 = EXTRACT64(cx0);
+ idx1 = EXTRACT64(cx1);
+ idx2 = EXTRACT64(cx2);
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+ ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+ if (VARIANT == POW_V4) {
+ al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+ ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+ }
+
+ lo = __umul128(idx0, cl, &hi);
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+ ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+
+ bx10 = bx00;
+ bx00 = cx0;
+
+
+ cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+ ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11)
+
+ if (VARIANT == POW_V4) {
+ al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+ ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+ }
+
+ lo = __umul128(idx1, cl, &hi);
+
+ SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1);
+
+ al1 += hi;
+ ah1 += lo;
+
+ ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+ ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+ ah1 ^= ch;
+ al1 ^= cl;
+ idx1 = al1;
+
+ bx11 = bx01;
+ bx01 = cx1;
+
+
+ cl = ((uint64_t*) &l2[idx2 & MASK])[0];
+ ch = ((uint64_t*) &l2[idx2 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(2, al2, ah2, cl, bx02, bx12)
+
+ if (VARIANT == POW_V4) {
+ al2 ^= r2[2] | ((uint64_t)(r2[3]) << 32);
+ ah2 ^= r2[0] | ((uint64_t)(r2[1]) << 32);
+ }
+
+ lo = __umul128(idx2, cl, &hi);
+
+ SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
al2 += hi;
ah2 += lo;
@@ -3326,162 +2997,12 @@ public:
((int64_t*) &l0[idx0 & MASK])[0] = n0 ^ q0;
- idx0 = d0 ^ q0;
-
- cl = ((uint64_t*) &l1[idx1 & MASK])[0];
- ch = ((uint64_t*) &l1[idx1 & MASK])[1];
- lo = __umul128(idx1, cl, &hi);
-
- al1 += hi;
- ah1 += lo;
-
- ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
- ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
-
- ah1 ^= ch;
- al1 ^= cl;
- idx1 = al1;
-
- const int64x2_t x1 = vld1q_s64(reinterpret_cast(&l1[idx1 & MASK]));
- const int64_t n1 = vgetq_lane_s64(x1, 0);
- const int32_t d1 = vgetq_lane_s32(x1, 2);
- const int64_t q1 = n1 / (d1 | 0x5);
-
- ((int64_t*) &l1[idx1 & MASK])[0] = n1 ^ q1;
-
- idx1 = d1 ^ q1;
-
-
- cl = ((uint64_t*) &l2[idx2 & MASK])[0];
- ch = ((uint64_t*) &l2[idx2 & MASK])[1];
- lo = __umul128(idx2, cl, &hi);
-
- al2 += hi;
- ah2 += lo;
-
- ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
- ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
-
- ah2 ^= ch;
- al2 ^= cl;
- idx2 = al2;
-
-
- const int64x2_t x2 = vld1q_s64(reinterpret_cast(&l2[idx2 & MASK]));
- const int64_t n2 = vgetq_lane_s64(x2, 0);
- const int32_t d2 = vgetq_lane_s32(x2, 2);
- const int64_t q2 = n2 / (d2 | 0x5);
-
- ((int64_t*) &l2[idx2 & MASK])[0] = n2 ^ q2;
-
- idx2 = d2 ^ q2;
- }
-
- cn_implode_scratchpad_heavy((__m128i*) l0, (__m128i*) h0);
- cn_implode_scratchpad_heavy((__m128i*) l1, (__m128i*) h1);
- cn_implode_scratchpad_heavy((__m128i*) l2, (__m128i*) h2);
-
- keccakf(h0, 24);
- keccakf(h1, 24);
- keccakf(h2, 24);
-
- extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
- extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
- extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
- }
-
- inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
- size_t size,
- uint8_t* __restrict__ output,
- ScratchPad** __restrict__ scratchPad)
- {
- keccak(input, (int) size, scratchPad[0]->state, 200);
- keccak(input + size, (int) size, scratchPad[1]->state, 200);
- keccak(input + 2 * size, (int) size, scratchPad[2]->state, 200);
-
- const uint8_t* l0 = scratchPad[0]->memory;
- const uint8_t* l1 = scratchPad[1]->memory;
- const uint8_t* l2 = scratchPad[2]->memory;
- uint64_t* h0 = reinterpret_cast(scratchPad[0]->state);
- uint64_t* h1 = reinterpret_cast(scratchPad[1]->state);
- uint64_t* h2 = reinterpret_cast(scratchPad[2]->state);
-
- cn_explode_scratchpad_heavy((__m128i*) h0, (__m128i*) l0);
- cn_explode_scratchpad_heavy((__m128i*) h1, (__m128i*) l1);
- cn_explode_scratchpad_heavy((__m128i*) h2, (__m128i*) l2);
-
- uint64_t al0 = h0[0] ^h0[4];
- uint64_t al1 = h1[0] ^h1[4];
- uint64_t al2 = h2[0] ^h2[4];
- uint64_t ah0 = h0[1] ^h0[5];
- uint64_t ah1 = h1[1] ^h1[5];
- uint64_t ah2 = h2[1] ^h2[5];
-
- __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
- __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
- __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
-
- uint64_t idx0 = h0[0] ^h0[4];
- uint64_t idx1 = h1[0] ^h1[4];
- uint64_t idx2 = h2[0] ^h2[4];
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- __m128i cx0;
- __m128i cx1;
- __m128i cx2;
-
- if (SOFT_AES) {
- cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
- cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], _mm_set_epi64x(ah1, al1));
- cx2 = soft_aesenc((uint32_t*) &l2[idx2 & MASK], _mm_set_epi64x(ah2, al2));
+ if (VARIANT == POW_XHV) {
+ idx0 = (~d0) ^ q0;
} else {
- cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
- cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
- cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
-
- cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
- cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
- cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
+ idx0 = d0 ^ q0;
}
- _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
- _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
- _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
-
- idx0 = EXTRACT64(cx0);
- idx1 = EXTRACT64(cx1);
- idx2 = EXTRACT64(cx2);
-
- bx0 = cx0;
- bx1 = cx1;
- bx2 = cx2;
-
-
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*) &l0[idx0 & MASK])[0];
- ch = ((uint64_t*) &l0[idx0 & MASK])[1];
- lo = __umul128(idx0, cl, &hi);
-
- al0 += hi;
- ah0 += lo;
-
- ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
- ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
-
- ah0 ^= ch;
- al0 ^= cl;
- idx0 = al0;
-
- const int64x2_t x0 = vld1q_s64(reinterpret_cast(&l0[idx0 & MASK]));
- const int64_t n0 = vgetq_lane_s64(x0, 0);
- const int32_t d0 = vgetq_lane_s32(x0, 2);
- const int64_t q0 = n0 / (d0 | 0x5);
-
- ((int64_t*) &l0[idx0 & MASK])[0] = n0 ^ q0;
-
- idx0 = (~d0) ^ q0;
-
-
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
@@ -3503,7 +3024,11 @@ public:
((int64_t*) &l1[idx1 & MASK])[0] = n1 ^ q1;
- idx1 = (~d1) ^ q1;
+ if (VARIANT == POW_XHV) {
+ idx1 = (~d1) ^ q1;
+ } else {
+ idx1 = d1 ^ q1;
+ }
cl = ((uint64_t*) &l2[idx2 & MASK])[0];
ch = ((uint64_t*) &l2[idx2 & MASK])[1];
@@ -3519,6 +3044,7 @@ public:
al2 ^= cl;
idx2 = al2;
+
const int64x2_t x2 = vld1q_s64(reinterpret_cast(&l2[idx2 & MASK]));
const int64_t n2 = vgetq_lane_s64(x2, 0);
const int32_t d2 = vgetq_lane_s32(x2, 2);
@@ -3526,7 +3052,11 @@ public:
((int64_t*) &l2[idx2 & MASK])[0] = n2 ^ q2;
- idx2 = (~d2) ^ q2;
+ if (VARIANT == POW_XHV) {
+ idx2 = (~d2) ^ q2;
+ } else {
+ idx2 = d2 ^ q2;
+ }
}
cn_implode_scratchpad_heavy((__m128i*) l0, (__m128i*) h0);
@@ -3780,8 +3310,8 @@ public:
}
};
-template
-class CryptoNightMultiHash
+template
+class CryptoNightMultiHash
{
public:
inline static void hash(const uint8_t* __restrict__ input,
@@ -4220,10 +3750,10 @@ public:
cx3 = _mm_aesenc_si128(cx3, ax3);
}
- SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
- SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
- SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2)
- SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3)
+ SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -4243,7 +3773,7 @@ public:
lo = __umul128(idx0, cl, &hi);
- SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+ SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
al0 += hi;
ah0 += lo;
@@ -4266,7 +3796,7 @@ public:
lo = __umul128(idx1, cl, &hi);
- SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+ SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
al1 += hi;
ah1 += lo;
@@ -4289,7 +3819,7 @@ public:
lo = __umul128(idx2, cl, &hi);
- SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi);
+ SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
al2 += hi;
ah2 += lo;
@@ -4312,7 +3842,235 @@ public:
lo = __umul128(idx3, cl, &hi);
- SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi);
+ SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
+
+ al3 += hi;
+ ah3 += lo;
+
+ ((uint64_t*) &l3[idx3 & MASK])[0] = al3;
+ ((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
+
+ ah3 ^= ch;
+ al3 ^= cl;
+ idx3 = al3;
+
+ bx13 = bx03;
+ bx03 = cx3;
+ }
+
+ cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
+ cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
+ cn_implode_scratchpad((__m128i*) l2, (__m128i*) h2);
+ cn_implode_scratchpad((__m128i*) l3, (__m128i*) h3);
+
+ keccakf(h0, 24);
+ keccakf(h1, 24);
+ keccakf(h2, 24);
+ keccakf(h3, 24);
+
+ extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+ extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+ extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
+ extra_hashes[scratchPad[3]->state[0] & 3](scratchPad[3]->state, 200, output + 96);
+ }
+
+ // quadruple
+ inline static void hashPowV4(const uint8_t* __restrict__ input,
+ size_t size,
+ uint8_t* __restrict__ output,
+ ScratchPad** __restrict__ scratchPad,
+ uint64_t height)
+ {
+ keccak(input, (int) size, scratchPad[0]->state, 200);
+ keccak(input + size, (int) size, scratchPad[1]->state, 200);
+ keccak(input + 2 * size, (int) size, scratchPad[2]->state, 200);
+ keccak(input + 3 * size, (int) size, scratchPad[3]->state, 200);
+
+ const uint8_t* l0 = scratchPad[0]->memory;
+ const uint8_t* l1 = scratchPad[1]->memory;
+ const uint8_t* l2 = scratchPad[2]->memory;
+ const uint8_t* l3 = scratchPad[3]->memory;
+ uint64_t* h0 = reinterpret_cast(scratchPad[0]->state);
+ uint64_t* h1 = reinterpret_cast(scratchPad[1]->state);
+ uint64_t* h2 = reinterpret_cast(scratchPad[2]->state);
+ uint64_t* h3 = reinterpret_cast(scratchPad[3]->state);
+
+ cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
+ cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
+ cn_explode_scratchpad((__m128i*) h2, (__m128i*) l2);
+ cn_explode_scratchpad((__m128i*) h3, (__m128i*) l3);
+
+ uint64_t al0 = h0[0] ^h0[4];
+ uint64_t al1 = h1[0] ^h1[4];
+ uint64_t al2 = h2[0] ^h2[4];
+ uint64_t al3 = h3[0] ^h3[4];
+ uint64_t ah0 = h0[1] ^h0[5];
+ uint64_t ah1 = h1[1] ^h1[5];
+ uint64_t ah2 = h2[1] ^h2[5];
+ uint64_t ah3 = h3[1] ^h3[5];
+
+ __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+ __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+ __m128i bx02 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+ __m128i bx03 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
+
+ __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+ __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+ __m128i bx12 = _mm_set_epi64x(h2[9] ^ h2[11], h2[8] ^ h2[10]);
+ __m128i bx13 = _mm_set_epi64x(h3[9] ^ h3[11], h3[8] ^ h3[10]);
+
+ uint64_t idx0 = h0[0] ^h0[4];
+ uint64_t idx1 = h1[0] ^h1[4];
+ uint64_t idx2 = h2[0] ^h2[4];
+ uint64_t idx3 = h3[0] ^h3[4];
+
+ VARIANT4_RANDOM_MATH_INIT(0, h0)
+ VARIANT4_RANDOM_MATH_INIT(1, h1)
+ VARIANT4_RANDOM_MATH_INIT(2, h2)
+ VARIANT4_RANDOM_MATH_INIT(3, h3)
+
+ for (size_t i = 0; i < ITERATIONS; i++) {
+ __m128i cx0;
+ __m128i cx1;
+ __m128i cx2;
+ __m128i cx3;
+
+ const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+ const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+ const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+ const __m128i ax3 = _mm_set_epi64x(ah3, al3);
+
+ if (SOFT_AES) {
+ cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+ cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], ax1);
+ cx2 = soft_aesenc((uint32_t*) &l2[idx2 & MASK], ax2);
+ cx3 = soft_aesenc((uint32_t*) &l3[idx3 & MASK], ax3);
+ } else {
+ cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+ cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+ cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
+ cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
+
+ cx0 = _mm_aesenc_si128(cx0, ax0);
+ cx1 = _mm_aesenc_si128(cx1, ax1);
+ cx2 = _mm_aesenc_si128(cx2, ax2);
+ cx3 = _mm_aesenc_si128(cx3, ax3);
+ }
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+ SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+ SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+ SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3)
+
+ _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+ _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+ _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx02, cx2));
+ _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx03, cx3));
+
+ idx0 = EXTRACT64(cx0);
+ idx1 = EXTRACT64(cx1);
+ idx2 = EXTRACT64(cx2);
+ idx3 = EXTRACT64(cx3);
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+ ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+ if (VARIANT == POW_V4) {
+ al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+ ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+ }
+
+ lo = __umul128(idx0, cl, &hi);
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+ ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+
+ bx10 = bx00;
+ bx00 = cx0;
+
+
+ cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+ ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11)
+
+ if (VARIANT == POW_V4) {
+ al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+ ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+ }
+
+
+ lo = __umul128(idx1, cl, &hi);
+
+ SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+
+ al1 += hi;
+ ah1 += lo;
+
+ ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+ ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+ ah1 ^= ch;
+ al1 ^= cl;
+ idx1 = al1;
+
+ bx11 = bx01;
+ bx01 = cx1;
+
+
+ cl = ((uint64_t*) &l2[idx2 & MASK])[0];
+ ch = ((uint64_t*) &l2[idx2 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(2, al2, ah2, cl, bx02, bx12)
+
+ if (VARIANT == POW_V4) {
+ al2 ^= r2[2] | ((uint64_t)(r2[3]) << 32);
+ ah2 ^= r2[0] | ((uint64_t)(r2[1]) << 32);
+ }
+
+ lo = __umul128(idx2, cl, &hi);
+
+ SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+
+ al2 += hi;
+ ah2 += lo;
+
+ ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
+ ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
+
+ ah2 ^= ch;
+ al2 ^= cl;
+ idx2 = al2;
+
+ bx12 = bx02;
+ bx02 = cx2;
+
+
+ cl = ((uint64_t*) &l3[idx3 & MASK])[0];
+ ch = ((uint64_t*) &l3[idx3 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(3, al3, ah3, cl, bx03, bx13)
+
+ if (VARIANT == POW_V4) {
+ al3 ^= r3[2] | ((uint64_t)(r3[3]) << 32);
+ ah3 ^= r3[0] | ((uint64_t)(r3[1]) << 32);
+ }
+
+ lo = __umul128(idx3, cl, &hi);
+
+ SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3)
al3 += hi;
ah3 += lo;
@@ -4550,14 +4308,6 @@ public:
// not supported
}
- inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
- size_t size,
- uint8_t* __restrict__ output,
- ScratchPad** __restrict__ scratchPad)
- {
- // not supported
- }
-
inline static void hashHeavyTube(const uint8_t* __restrict__ input,
size_t size,
uint8_t* __restrict__ output,
@@ -4567,8 +4317,8 @@ public:
}
};
-template
-class CryptoNightMultiHash
+template
+class CryptoNightMultiHash
{//
public:
inline static void hash(const uint8_t* __restrict__ input,
@@ -5095,11 +4845,11 @@ public:
cx4 = _mm_aesenc_si128(cx4, ax4);
}
- SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0)
- SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1)
- SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2)
- SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3)
- SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4)
+ SHUFFLE_PHASE_1(l0, (idx0&MASK), bx00, bx10, ax0, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l1, (idx1&MASK), bx01, bx11, ax1, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l2, (idx2&MASK), bx02, bx12, ax2, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l3, (idx3&MASK), bx03, bx13, ax3, VARIANT == POW_RWZ)
+ SHUFFLE_PHASE_1(l4, (idx4&MASK), bx04, bx14, ax4, VARIANT == POW_RWZ)
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
@@ -5121,7 +4871,7 @@ public:
lo = __umul128(idx0, cl, &hi);
- SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi);
+ SHUFFLE_PHASE_2(l0, (idx0&MASK), bx00, bx10, ax0, lo, hi, VARIANT == POW_RWZ)
al0 += hi;
ah0 += lo;
@@ -5144,7 +4894,7 @@ public:
lo = __umul128(idx1, cl, &hi);
- SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi);
+ SHUFFLE_PHASE_2(l1, (idx1&MASK), bx01, bx11, ax1, lo, hi, VARIANT == POW_RWZ)
al1 += hi;
ah1 += lo;
@@ -5167,7 +4917,7 @@ public:
lo = __umul128(idx2, cl, &hi);
- SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi);
+ SHUFFLE_PHASE_2(l2, (idx2&MASK), bx02, bx12, ax2, lo, hi, VARIANT == POW_RWZ)
al2 += hi;
ah2 += lo;
@@ -5190,7 +4940,7 @@ public:
lo = __umul128(idx3, cl, &hi);
- SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi);
+ SHUFFLE_PHASE_2(l3, (idx3&MASK), bx03, bx13, ax3, lo, hi, VARIANT == POW_RWZ)
al3 += hi;
ah3 += lo;
@@ -5213,7 +4963,283 @@ public:
lo = __umul128(idx4, cl, &hi);
- SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi);
+ SHUFFLE_PHASE_2(l4, (idx4&MASK), bx04, bx14, ax4, lo, hi, VARIANT == POW_RWZ)
+
+ al4 += hi;
+ ah4 += lo;
+
+ ((uint64_t*) &l4[idx4 & MASK])[0] = al4;
+ ((uint64_t*) &l4[idx4 & MASK])[1] = ah4;
+
+ ah4 ^= ch;
+ al4 ^= cl;
+ idx4 = al4;
+
+ bx14 = bx04;
+ bx04 = cx4;
+ }
+
+ cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
+ cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
+ cn_implode_scratchpad((__m128i*) l2, (__m128i*) h2);
+ cn_implode_scratchpad((__m128i*) l3, (__m128i*) h3);
+ cn_implode_scratchpad((__m128i*) l4, (__m128i*) h4);
+
+ keccakf(h0, 24);
+ keccakf(h1, 24);
+ keccakf(h2, 24);
+ keccakf(h3, 24);
+ keccakf(h4, 24);
+
+ extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
+ extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
+ extra_hashes[scratchPad[2]->state[0] & 3](scratchPad[2]->state, 200, output + 64);
+ extra_hashes[scratchPad[3]->state[0] & 3](scratchPad[3]->state, 200, output + 96);
+ extra_hashes[scratchPad[4]->state[0] & 3](scratchPad[4]->state, 200, output + 128);
+ }
+
+ // quintuple
+ inline static void hashPowV4(const uint8_t* __restrict__ input,
+ size_t size,
+ uint8_t* __restrict__ output,
+ ScratchPad** __restrict__ scratchPad,
+ uint64_t height)
+ {
+ keccak(input, (int) size, scratchPad[0]->state, 200);
+ keccak(input + size, (int) size, scratchPad[1]->state, 200);
+ keccak(input + 2 * size, (int) size, scratchPad[2]->state, 200);
+ keccak(input + 3 * size, (int) size, scratchPad[3]->state, 200);
+ keccak(input + 4 * size, (int) size, scratchPad[4]->state, 200);
+
+ const uint8_t* l0 = scratchPad[0]->memory;
+ const uint8_t* l1 = scratchPad[1]->memory;
+ const uint8_t* l2 = scratchPad[2]->memory;
+ const uint8_t* l3 = scratchPad[3]->memory;
+ const uint8_t* l4 = scratchPad[4]->memory;
+ uint64_t* h0 = reinterpret_cast(scratchPad[0]->state);
+ uint64_t* h1 = reinterpret_cast(scratchPad[1]->state);
+ uint64_t* h2 = reinterpret_cast(scratchPad[2]->state);
+ uint64_t* h3 = reinterpret_cast(scratchPad[3]->state);
+ uint64_t* h4 = reinterpret_cast(scratchPad[4]->state);
+
+ cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
+ cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
+ cn_explode_scratchpad((__m128i*) h2, (__m128i*) l2);
+ cn_explode_scratchpad((__m128i*) h3, (__m128i*) l3);
+ cn_explode_scratchpad((__m128i*) h4, (__m128i*) l4);
+
+ uint64_t al0 = h0[0] ^h0[4];
+ uint64_t al1 = h1[0] ^h1[4];
+ uint64_t al2 = h2[0] ^h2[4];
+ uint64_t al3 = h3[0] ^h3[4];
+ uint64_t al4 = h4[0] ^h4[4];
+ uint64_t ah0 = h0[1] ^h0[5];
+ uint64_t ah1 = h1[1] ^h1[5];
+ uint64_t ah2 = h2[1] ^h2[5];
+ uint64_t ah3 = h3[1] ^h3[5];
+ uint64_t ah4 = h4[1] ^h4[5];
+
+ __m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+ __m128i bx01 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+ __m128i bx02 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
+ __m128i bx03 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
+ __m128i bx04 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
+
+ __m128i bx10 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
+ __m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
+ __m128i bx12 = _mm_set_epi64x(h2[9] ^ h2[11], h2[8] ^ h2[10]);
+ __m128i bx13 = _mm_set_epi64x(h3[9] ^ h3[11], h3[8] ^ h3[10]);
+ __m128i bx14 = _mm_set_epi64x(h4[9] ^ h4[11], h4[8] ^ h4[10]);
+
+ uint64_t idx0 = h0[0] ^h0[4];
+ uint64_t idx1 = h1[0] ^h1[4];
+ uint64_t idx2 = h2[0] ^h2[4];
+ uint64_t idx3 = h3[0] ^h3[4];
+ uint64_t idx4 = h4[0] ^h4[4];
+
+ VARIANT4_RANDOM_MATH_INIT(0, h0)
+ VARIANT4_RANDOM_MATH_INIT(1, h1)
+ VARIANT4_RANDOM_MATH_INIT(2, h2)
+ VARIANT4_RANDOM_MATH_INIT(3, h3)
+ VARIANT4_RANDOM_MATH_INIT(4, h4)
+
+ for (size_t i = 0; i < ITERATIONS; i++) {
+ __m128i cx0;
+ __m128i cx1;
+ __m128i cx2;
+ __m128i cx3;
+ __m128i cx4;
+
+ const __m128i ax0 = _mm_set_epi64x(ah0, al0);
+ const __m128i ax1 = _mm_set_epi64x(ah1, al1);
+ const __m128i ax2 = _mm_set_epi64x(ah2, al2);
+ const __m128i ax3 = _mm_set_epi64x(ah3, al3);
+ const __m128i ax4 = _mm_set_epi64x(ah4, al4);
+
+ if (SOFT_AES) {
+ cx0 = soft_aesenc((uint32_t*) &l0[idx0 & MASK], ax0);
+ cx1 = soft_aesenc((uint32_t*) &l1[idx1 & MASK], ax1);
+ cx2 = soft_aesenc((uint32_t*) &l2[idx2 & MASK], ax2);
+ cx3 = soft_aesenc((uint32_t*) &l3[idx3 & MASK], ax3);
+ cx4 = soft_aesenc((uint32_t*) &l4[idx4 & MASK], ax4);
+ } else {
+ cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+ cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
+ cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
+ cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
+ cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]);
+
+ cx0 = _mm_aesenc_si128(cx0, ax0);
+ cx1 = _mm_aesenc_si128(cx1, ax1);
+ cx2 = _mm_aesenc_si128(cx2, ax2);
+ cx3 = _mm_aesenc_si128(cx3, ax3);
+ cx4 = _mm_aesenc_si128(cx4, ax4);
+ }
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0)
+ SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1)
+ SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2)
+ SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3)
+ SHUFFLE_V4(l4, (idx4&MASK), bx04, bx14, ax4, cx4)
+
+ _mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx00, cx0));
+ _mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx01, cx1));
+ _mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx02, cx2));
+ _mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx03, cx3));
+ _mm_store_si128((__m128i*) &l4[idx4 & MASK], _mm_xor_si128(bx04, cx4));
+
+ idx0 = EXTRACT64(cx0);
+ idx1 = EXTRACT64(cx1);
+ idx2 = EXTRACT64(cx2);
+ idx3 = EXTRACT64(cx3);
+ idx4 = EXTRACT64(cx4);
+
+ uint64_t hi, lo, cl, ch;
+ cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+ ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx10)
+
+ if (VARIANT == POW_V4) {
+ al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
+ ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
+ }
+
+ lo = __umul128(idx0, cl, &hi);
+
+ SHUFFLE_V4(l0, (idx0&MASK), bx00, bx10, ax0, cx0);
+
+ al0 += hi;
+ ah0 += lo;
+
+ ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+ ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+
+ ah0 ^= ch;
+ al0 ^= cl;
+ idx0 = al0;
+
+ bx10 = bx00;
+ bx00 = cx0;
+
+
+ cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+ ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx01, bx11)
+
+ if (VARIANT == POW_V4) {
+ al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
+ ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
+ }
+
+ lo = __umul128(idx1, cl, &hi);
+
+ SHUFFLE_V4(l1, (idx1&MASK), bx01, bx11, ax1, cx1);
+
+ al1 += hi;
+ ah1 += lo;
+
+ ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+ ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+
+ ah1 ^= ch;
+ al1 ^= cl;
+ idx1 = al1;
+
+ bx11 = bx01;
+ bx01 = cx1;
+
+
+ cl = ((uint64_t*) &l2[idx2 & MASK])[0];
+ ch = ((uint64_t*) &l2[idx2 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(2, al2, ah2, cl, bx02, bx12)
+
+ if (VARIANT == POW_V4) {
+ al2 ^= r2[2] | ((uint64_t)(r2[3]) << 32);
+ ah2 ^= r2[0] | ((uint64_t)(r2[1]) << 32);
+ }
+
+ lo = __umul128(idx2, cl, &hi);
+
+ SHUFFLE_V4(l2, (idx2&MASK), bx02, bx12, ax2, cx2);
+
+ al2 += hi;
+ ah2 += lo;
+
+ ((uint64_t*) &l2[idx2 & MASK])[0] = al2;
+ ((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
+
+ ah2 ^= ch;
+ al2 ^= cl;
+ idx2 = al2;
+
+ bx12 = bx02;
+ bx02 = cx2;
+
+
+ cl = ((uint64_t*) &l3[idx3 & MASK])[0];
+ ch = ((uint64_t*) &l3[idx3 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(3, al3, ah3, cl, bx03, bx13)
+
+ if (VARIANT == POW_V4) {
+ al3 ^= r3[2] | ((uint64_t)(r3[3]) << 32);
+ ah3 ^= r3[0] | ((uint64_t)(r3[1]) << 32);
+ }
+
+ lo = __umul128(idx3, cl, &hi);
+
+ SHUFFLE_V4(l3, (idx3&MASK), bx03, bx13, ax3, cx3);
+
+ al3 += hi;
+ ah3 += lo;
+
+ ((uint64_t*) &l3[idx3 & MASK])[0] = al3;
+ ((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
+
+ ah3 ^= ch;
+ al3 ^= cl;
+ idx3 = al3;
+
+ bx13 = bx03;
+ bx03 = cx3;
+
+
+ cl = ((uint64_t*) &l4[idx4 & MASK])[0];
+ ch = ((uint64_t*) &l4[idx4 & MASK])[1];
+
+ VARIANT4_RANDOM_MATH(4, al4, ah4, cl, bx04, bx14)
+
+ if (VARIANT == POW_V4) {
+ al4 ^= r4[2] | ((uint64_t)(r4[3]) << 32);
+ ah4 ^= r4[0] | ((uint64_t)(r4[1]) << 32);
+ }
+
+ lo = __umul128(idx4, cl, &hi);
+
+ SHUFFLE_V4(l4, (idx4&MASK), bx04, bx14, ax4, cx4);
al4 += hi;
ah4 += lo;
@@ -5496,14 +5522,6 @@ public:
// not supported
}
- inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
- size_t size,
- uint8_t* __restrict__ output,
- ScratchPad** __restrict__ scratchPad)
- {
- // not supported
- }
-
inline static void hashHeavyTube(const uint8_t* __restrict__ input,
size_t size,
uint8_t* __restrict__ output,
diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h
index 836f2822..373c51cc 100644
--- a/src/crypto/CryptoNight_test.h
+++ b/src/crypto/CryptoNight_test.h
@@ -138,6 +138,53 @@ const static uint8_t test_output_xtl_v9[64] = {
0xF1, 0xF0, 0x55, 0x34, 0x15, 0x29, 0x93, 0x04, 0x2D, 0xED, 0xD2, 0x33, 0x50, 0x6E, 0xBE, 0x25
};
+// CN XCASH
+const static uint8_t test_output_xcash[32] = {
+ 0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,
+ 0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21
+};
+
+// CN ZELERIUS
+const static uint8_t test_output_zelerius[32] = {
+ 0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E,
+ 0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2
+};
+
+// CN RWZ
+const static uint8_t test_output_rwz[64] = {
+ 0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85,
+ 0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8,
+ 0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52,
+ 0x49, 0x54, 0x0b, 0x91, 0xea, 0x61, 0x7f, 0x98, 0x7d, 0x39, 0x17, 0xb7, 0xd7, 0x65, 0xff, 0x75
+};
+
+// CN V9 aka CN V4/V5 aka CN-R (height 10000)
+const static uint8_t test_output_v4[160] = {
+ 0x90, 0x20, 0x14, 0x86, 0x1E, 0xCD, 0x01, 0xC5, 0x43, 0xB5, 0x61, 0xFA, 0xC8, 0x3D, 0xFF, 0x7D,
+ 0x76, 0x67, 0xC2, 0xD7, 0xB3, 0xD4, 0xE3, 0x4B, 0x4C, 0x7E, 0x6D, 0x04, 0x31, 0x79, 0xE6, 0x96,
+ 0xEA, 0xF4, 0x14, 0x76, 0x38, 0x94, 0x7C, 0xCE, 0x02, 0x50, 0x7A, 0x31, 0xB8, 0x4D, 0xDD, 0x3B,
+ 0x92, 0xAA, 0xC6, 0x49, 0xA1, 0x64, 0xA1, 0xA8, 0x7C, 0xD9, 0x43, 0x14, 0xC5, 0x12, 0x86, 0x61,
+ 0x0A, 0x18, 0xBD, 0x11, 0x36, 0x06, 0x31, 0x0D, 0x9D, 0xC0, 0x8C, 0x41, 0x88, 0xCB, 0x7C, 0xE9,
+ 0x5D, 0xD2, 0xBA, 0xA5, 0xFB, 0x0D, 0x2B, 0xA6, 0x6E, 0x7C, 0x78, 0x72, 0x38, 0xFE, 0x53, 0x17,
+ 0x1A, 0x96, 0x89, 0x0E, 0x14, 0xFF, 0x34, 0x42, 0xC0, 0x5A, 0xAB, 0xC0, 0x3F, 0x39, 0x4E, 0x43,
+ 0x91, 0x38, 0x67, 0x79, 0x5B, 0xAE, 0xCC, 0xA7, 0xDB, 0x4C, 0xFE, 0x8B, 0x75, 0x76, 0x1F, 0xC4,
+ 0x98, 0x71, 0xE6, 0xC1, 0x08, 0x9D, 0xED, 0xCC, 0x47, 0xC3, 0xF3, 0x7A, 0xA9, 0x4A, 0x3A, 0xB9,
+ 0xAC, 0xB8, 0x5C, 0x9F, 0xCC, 0xCB, 0xC1, 0x93, 0x9E, 0xC6, 0x6D, 0xCC, 0x45, 0xF4, 0xBA, 0xBD
+};
+
+// CN V9 aka CN V4/V5 aka CN-R (height 10001)
+const static uint8_t test_output_v4_1[32] = {
+ 0x82, 0x58, 0x7D, 0x63, 0x7B, 0x6C, 0x0C, 0x96, 0x6A, 0x50, 0xF6, 0xC0, 0xAB, 0xB5, 0xEA, 0x1A,
+ 0x58, 0x2B, 0xEA, 0x7E, 0xF0, 0x2F, 0x3C, 0xA1, 0x7C, 0x1C, 0x7C, 0x2E, 0xF9, 0xE5, 0x66, 0xF2
+};
+
+// CN V9 aka CN V4/V5 aka CN-R (height 10002)
+const static uint8_t test_output_v4_2[32] = {
+ 0x64, 0xB2, 0x4E, 0x48, 0x4A, 0x28, 0xBF, 0x11, 0xC4, 0x8A, 0x68, 0xE7, 0xB7, 0x4B, 0xFD, 0xA7,
+ 0xFB, 0x95, 0x66, 0x05, 0x0C, 0xF7, 0xFA, 0xA7, 0x4B, 0xD9, 0x18, 0x59, 0x88, 0x7F, 0x47, 0xA2
+};
+
+
// CN-LITE
const static uint8_t test_output_v0_lite[160] = {
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h
index 0c9127f1..cbe970e7 100644
--- a/src/crypto/CryptoNight_x86.h
+++ b/src/crypto/CryptoNight_x86.h
@@ -36,10 +36,24 @@
# define __restrict__ __restrict
#endif
+#define SWAP32LE(x) x
+#define SWAP64LE(x) x
+#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
+
+#ifndef NOINLINE
+#ifdef __GNUC__
+#define NOINLINE __attribute__ ((noinline))
+#elif _MSC_VER
+#define NOINLINE __declspec(noinline)
+#else
+#define NOINLINE
+#endif
+#endif
#include "crypto/CryptoNight.h"
#include "crypto/soft_aes.h"
#include "AsmOptimization.h"
+#include "variant4_random_math.h"
extern "C"
{
@@ -71,6 +85,19 @@ extern "C"
void cnv2_main_loop_ultralite_bulldozer_asm(ScratchPad* ctx0);
void cnv2_double_main_loop_ultralite_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
+ void cnv2_main_loop_xcash_ivybridge_asm(ScratchPad* ctx0);
+ void cnv2_main_loop_xcash_ryzen_asm(ScratchPad* ctx0);
+ void cnv2_main_loop_xcash_bulldozer_asm(ScratchPad* ctx0);
+ void cnv2_double_main_loop_xcash_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
+
+ void cnv2_main_loop_zelerius_ivybridge_asm(ScratchPad* ctx0);
+ void cnv2_main_loop_zelerius_ryzen_asm(ScratchPad* ctx0);
+ void cnv2_main_loop_zelerius_bulldozer_asm(ScratchPad* ctx0);
+ void cnv2_double_main_loop_zelerius_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
+
+ void cnv2_main_loop_rwz_all_asm(ScratchPad* ctx0);
+ void cnv2_double_main_loop_rwz_all_asm(ScratchPad* ctx0, ScratchPad* ctx1);
+
void cnv1_main_loop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
void cnv1_main_loop_lite_soft_aes_sandybridge_asm(ScratchPad* ctx0);
void cnv1_main_loop_fast_soft_aes_sandybridge_asm(ScratchPad* ctx0);
@@ -80,6 +107,16 @@ extern "C"
void cnv2_main_loop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
void cnv2_main_loop_fastv2_soft_aes_sandybridge_asm(ScratchPad* ctx0);
void cnv2_main_loop_ultralite_soft_aes_sandybridge_asm(ScratchPad* ctx);
+ void cnv2_main_loop_xcash_soft_aes_sandybridge_asm(ScratchPad* ctx);
+ void cnv2_main_loop_zelerius_soft_aes_sandybridge_asm(ScratchPad* ctx);
+
+ void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+ void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+ void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+
+ void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+ void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
+ void v4_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, AsmOptimization ASM);
#endif
}
@@ -148,24 +185,22 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin
}
#endif
-#ifdef _MSC_VER
-#else
-#endif
-
#ifdef _MSC_VER
# define SET_ROUNDING_MODE_UP() _control87(RC_UP, MCW_RC);
+# define SET_ROUNDING_MODE_DOWN() _control87(RC_DOWN, MCW_RC);
#else
# define SET_ROUNDING_MODE_UP() std::fesetround(FE_UPWARD);
+# define SET_ROUNDING_MODE_DOWN() fesetround(FE_DOWNWARD);
#endif
-# define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax) \
+# define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax, reverse) \
{ \
- const __m128i chunk1 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x10))); \
- const __m128i chunk2 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x20))); \
- const __m128i chunk3 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x30))); \
- _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x10)), _mm_add_epi64(chunk3, bx1)); \
- _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x20)), _mm_add_epi64(chunk1, bx0)); \
- _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x30)), _mm_add_epi64(chunk2, ax)); \
+ const __m128i chunk1 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x10))); \
+ const __m128i chunk2 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x20))); \
+ const __m128i chunk3 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x30))); \
+ _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x10)), _mm_add_epi64(chunk3, bx1)); \
+ _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x20)), _mm_add_epi64(chunk1, bx0)); \
+ _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x30)), _mm_add_epi64(chunk2, ax)); \
}
# define INTEGER_MATH_V2(idx, cl, cx) \
@@ -179,18 +214,47 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin
sqrt_result##idx = int_sqrt_v2(cx_ + division_result); \
}
-# define SHUFFLE_PHASE_2(l, idx, bx0, bx1, ax, lo, hi) \
+# define SHUFFLE_PHASE_2(l, idx, bx0, bx1, ax, lo, hi, reverse) \
{ \
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((l) + ((idx) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
const __m128i chunk2 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x20))); \
const __m128i chunk3 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x30))); \
hi ^= ((uint64_t*)((l) + ((idx) ^ 0x20)))[0]; \
lo ^= ((uint64_t*)((l) + ((idx) ^ 0x20)))[1]; \
+ _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x10)), _mm_add_epi64(chunk3, bx1)); \
+ _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x20)), _mm_add_epi64(chunk1, bx0)); \
+ _mm_store_si128((__m128i *)((l) + ((idx) ^ 0x30)), _mm_add_epi64(chunk2, ax)); \
+}
+
+# define SHUFFLE_V4(l, idx, bx0, bx1, ax, cx) \
+{ \
+ const __m128i chunk1 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x10))); \
+ const __m128i chunk2 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x20))); \
+ const __m128i chunk3 = _mm_load_si128((__m128i *)((l) + ((idx) ^ 0x30))); \
_mm_store_si128((__m128i *)((l) + ((idx) ^ 0x10)), _mm_add_epi64(chunk3, bx1)); \
_mm_store_si128((__m128i *)((l) + ((idx) ^ 0x20)), _mm_add_epi64(chunk1, bx0)); \
_mm_store_si128((__m128i *)((l) + ((idx) ^ 0x30)), _mm_add_epi64(chunk2, ax)); \
+ cx = _mm_xor_si128(_mm_xor_si128(cx, chunk3), _mm_xor_si128(chunk1, chunk2)); \
}
+# define VARIANT4_RANDOM_MATH_INIT(idx, h) \
+ uint32_t r##idx[9]; \
+ struct V4_Instruction code##idx[256]; \
+ r##idx[0] = (uint32_t)(h[12]); \
+ r##idx[1] = (uint32_t)(h[12] >> 32); \
+ r##idx[2] = (uint32_t)(h[13]); \
+ r##idx[3] = (uint32_t)(h[13] >> 32); \
+ v4_random_math_init(code##idx, VARIANT, height);
+
+# define VARIANT4_RANDOM_MATH(idx, al, ah, cl, bx0, bx1) \
+ cl ^= (r##idx[0] + r##idx[1]) | ((uint64_t)(r##idx[2] + r##idx[3]) << 32); \
+ r##idx[4] = static_cast(al); \
+ r##idx[5] = static_cast(ah); \
+ r##idx[6] = static_cast(_mm_cvtsi128_si32(bx0)); \
+ r##idx[7] = static_cast(_mm_cvtsi128_si32(bx1)); \
+ r##idx[8] = static_cast(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \
+ v4_random_math(code##idx, r##idx); \
+
static inline void do_blake_hash(const uint8_t *input, size_t len, uint8_t *output) {
blake256_hash(output, input, len);
}
@@ -592,7 +656,7 @@ return r;
}
// n-Loop version. Seems to be little bit slower then the hardcoded one.
-template
+template
class CryptoNightMultiHash
{
public:
@@ -601,78 +665,7 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
- __m128i ax[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- if (SOFT_AES) {
- cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
- } else {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- uint64_t hi, lo, cl, ch;
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- bx[hashBlock] = cx[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ // dummy
}
inline static void hashPowV2(const uint8_t* __restrict__ input,
@@ -680,220 +673,53 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- uint64_t tweak1_2[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
- __m128i ax[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
- tweak1_2[hashBlock] = (*reinterpret_cast(reinterpret_cast(input) + 35 + hashBlock * size) ^
- *(reinterpret_cast(scratchPad[hashBlock]->state) + 24));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- if (SOFT_AES) {
- cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
- } else {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK], _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- const uint8_t tmp = reinterpret_cast(&l[hashBlock][idx[hashBlock] & MASK])[11];
- static const uint32_t table = 0x75310;
- const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
- ((uint8_t *) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- uint64_t hi, lo, cl, ch;
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- bx[hashBlock] = cx[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ // dummy
}
inline static void hashPowV2_asm(const uint8_t* __restrict__ input,
size_t size,
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad,
- AsmOptimization asmOptimization,
- PowVariant powVariant)
+ AsmOptimization asmOptimization)
{
- // not supported
+ // dummy
}
- // multi
inline static void hashPowV3(const uint8_t* __restrict__ input,
size_t size,
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- uint64_t sqrt_result[NUM_HASH_BLOCKS];
- __m128i bx0[NUM_HASH_BLOCKS];
- __m128i bx1[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
- __m128i ax[NUM_HASH_BLOCKS];
- __m128i division_result_xmm[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx0[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- bx1[hashBlock] = _mm_set_epi64x(h[hashBlock][9] ^ h[hashBlock][11], h[hashBlock][8] ^ h[hashBlock][10]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
-
- division_result_xmm[hashBlock] = _mm_cvtsi64_si128(h[hashBlock][12]);
- sqrt_result[hashBlock] = h[hashBlock][13];
- }
-
- SET_ROUNDING_MODE_UP();
-
- uint64_t sqrt_result0;
- __m128i division_result_xmm0;
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- if (SOFT_AES) {
- cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
- } else {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- SHUFFLE_PHASE_1(l[hashBlock], idx[hashBlock] & MASK, bx0[hashBlock], bx1[hashBlock], ax[hashBlock])
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx0[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- uint64_t hi, lo, cl, ch;
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cl = ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t *) &l[hashBlock][idx[hashBlock] & MASK])[1];
-
- sqrt_result0 = sqrt_result[hashBlock];
- division_result_xmm0 = division_result_xmm[hashBlock];
-
- INTEGER_MATH_V2(0, cl, cx[hashBlock])
-
- sqrt_result[hashBlock] = sqrt_result0;
- division_result_xmm[hashBlock] = division_result_xmm0;
-
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- SHUFFLE_PHASE_2(l[hashBlock], idx[hashBlock] & MASK, bx0[hashBlock], bx1[hashBlock], ax[hashBlock], lo, hi)
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- bx1[hashBlock] = bx0[hashBlock];
- bx0[hashBlock] = cx[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ // dummy
}
inline static void hashPowV3_asm(const uint8_t* __restrict__ input,
size_t size,
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad,
- AsmOptimization asmOptimization,
- PowVariant powVariant)
+ AsmOptimization asmOptimization)
{
- // not supported
+ // dummy
+ }
+
+ inline static void hashPowV4(const uint8_t* __restrict__ input,
+ size_t size,
+ uint8_t* __restrict__ output,
+ ScratchPad** __restrict__ scratchPad,
+ uint64_t height)
+ {
+ // dummy
+ }
+
+ inline static void hashPowV4_asm(const uint8_t* __restrict__ input,
+ size_t size,
+ uint8_t* __restrict__ output,
+ ScratchPad** __restrict__ scratchPad,
+ uint64_t height,
+ AsmOptimization asmOptimization)
+ {
+ // dummy
}
inline static void hashLiteTube(const uint8_t* __restrict__ input,
@@ -901,94 +727,7 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- uint64_t tweak1_2[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
- __m128i ax[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
- tweak1_2[hashBlock] = (*reinterpret_cast(reinterpret_cast(input) + 35 + hashBlock * size) ^
- *(reinterpret_cast(scratchPad[hashBlock]->state) + 24));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- if (SOFT_AES) {
- cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
- } else {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- const uint8_t tmp = reinterpret_cast(&l[hashBlock][idx[hashBlock] & MASK])[11];
- static const uint32_t table = 0x75310;
- const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
- ((uint8_t *) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- uint64_t hi, lo, cl, ch;
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t*)&l[hashBlock][idx[hashBlock] & MASK])[1] ^= ((uint64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- bx[hashBlock] = cx[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ // dummy
}
inline static void hashHeavy(const uint8_t* __restrict__ input,
@@ -996,171 +735,7 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
- __m128i ax[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad_heavy((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- if (SOFT_AES) {
- cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
- } else {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- uint64_t hi, lo, cl, ch;
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- int64_t n = ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
- int32_t d = ((int32_t*)&l[hashBlock][idx[hashBlock] & MASK])[2];
- int64_t q = n / (d | 0x5);
-
- ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
- idx[hashBlock] = d ^ q;
-
- bx[hashBlock] = cx[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad_heavy((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
- }
-
- inline static void hashHeavyHaven(const uint8_t* __restrict__ input,
- size_t size,
- uint8_t* __restrict__ output,
- ScratchPad** __restrict__ scratchPad)
- {
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
- __m128i ax[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad_heavy((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- ax[hashBlock] = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- if (SOFT_AES) {
- cx[hashBlock] = soft_aesenc((uint32_t *) &l[hashBlock][idx[hashBlock] & MASK], ax[hashBlock]);
- } else {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- cx[hashBlock] = _mm_aesenc_si128(cx[hashBlock], ax[hashBlock]);
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- uint64_t hi, lo, cl, ch;
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- int64_t n = ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
- int32_t d = ((int32_t*)&l[hashBlock][idx[hashBlock] & MASK])[2];
- int64_t q = n / (d | 0x5);
-
- ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
- idx[hashBlock] = (~d) ^ q;
-
- bx[hashBlock] = cx[hashBlock];
- }
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad_heavy((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ // dummy
}
inline static void hashHeavyTube(const uint8_t* __restrict__ input,
@@ -1168,130 +743,12 @@ public:
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad)
{
- const uint8_t* l[NUM_HASH_BLOCKS];
- uint64_t* h[NUM_HASH_BLOCKS];
- uint64_t al[NUM_HASH_BLOCKS];
- uint64_t ah[NUM_HASH_BLOCKS];
- uint64_t idx[NUM_HASH_BLOCKS];
- uint64_t tweak1_2[NUM_HASH_BLOCKS];
- __m128i bx[NUM_HASH_BLOCKS];
- __m128i cx[NUM_HASH_BLOCKS];
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- keccak(static_cast(input) + hashBlock * size, (int) size, scratchPad[hashBlock]->state, 200);
- tweak1_2[hashBlock] = (*reinterpret_cast(reinterpret_cast(input) + 35 + hashBlock * size) ^
- *(reinterpret_cast(scratchPad[hashBlock]->state) + 24));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- l[hashBlock] = scratchPad[hashBlock]->memory;
- h[hashBlock] = reinterpret_cast(scratchPad[hashBlock]->state);
-
- cn_explode_scratchpad_heavy((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
-
- al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
- bx[hashBlock] = _mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
- idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
- }
-
- union alignas(16) {
- uint32_t k[4];
- uint64_t v64[2];
- };
- alignas(16) uint32_t x[4];
-
-#define BYTE(p, i) ((unsigned char*)&p)[i]
-
- for (size_t i = 0; i < ITERATIONS; i++) {
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cx[hashBlock] = _mm_load_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK]);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- const __m128i &key = _mm_set_epi64x(ah[hashBlock], al[hashBlock]);
-
- _mm_store_si128((__m128i *) k, key);
- cx[hashBlock] = _mm_xor_si128(cx[hashBlock], _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
- _mm_store_si128((__m128i *) x, cx[hashBlock]);
-
- k[0] ^= saes_table[0][BYTE(x[0], 0)] ^ saes_table[1][BYTE(x[1], 1)] ^ saes_table[2][BYTE(x[2], 2)] ^
- saes_table[3][BYTE(x[3], 3)];
- x[0] ^= k[0];
- k[1] ^= saes_table[0][BYTE(x[1], 0)] ^ saes_table[1][BYTE(x[2], 1)] ^ saes_table[2][BYTE(x[3], 2)] ^
- saes_table[3][BYTE(x[0], 3)];
- x[1] ^= k[1];
- k[2] ^= saes_table[0][BYTE(x[2], 0)] ^ saes_table[1][BYTE(x[3], 1)] ^ saes_table[2][BYTE(x[0], 2)] ^
- saes_table[3][BYTE(x[1], 3)];
- x[2] ^= k[2];
- k[3] ^= saes_table[0][BYTE(x[3], 0)] ^ saes_table[1][BYTE(x[0], 1)] ^ saes_table[2][BYTE(x[1], 2)] ^
- saes_table[3][BYTE(x[2], 3)];
-
- cx[hashBlock] = _mm_load_si128((__m128i *) k);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- _mm_store_si128((__m128i *) &l[hashBlock][idx[hashBlock] & MASK],
- _mm_xor_si128(bx[hashBlock], cx[hashBlock]));
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- const uint8_t tmp = reinterpret_cast(&l[hashBlock][idx[hashBlock] & MASK])[11];
- static const uint32_t table = 0x75310;
- const uint8_t index = (((tmp >> INDEX_SHIFT) & 6) | (tmp & 1)) << 1;
- ((uint8_t *) (&l[hashBlock][idx[hashBlock] & MASK]))[11] = tmp ^ ((table >> index) & 0x30);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- idx[hashBlock] = EXTRACT64(cx[hashBlock]);
- }
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- uint64_t hi, lo, cl, ch;
- cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
- ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
- lo = __umul128(idx[hashBlock], cl, &hi);
-
- al[hashBlock] += hi;
- ah[hashBlock] += lo;
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
- ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
-
- ah[hashBlock] ^= tweak1_2[hashBlock];
-
- ((uint64_t*)&l[hashBlock][idx[hashBlock] & MASK])[1] ^= ((uint64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
-
- ah[hashBlock] ^= ch;
- al[hashBlock] ^= cl;
- idx[hashBlock] = al[hashBlock];
-
- int64_t n = ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0];
- int32_t d = ((int32_t*)&l[hashBlock][idx[hashBlock] & MASK])[2];
- int64_t q = n / (d | 0x5);
-
- ((int64_t*)&l[hashBlock][idx[hashBlock] & MASK])[0] = n ^ q;
- idx[hashBlock] = d ^ q;
-
- bx[hashBlock] = cx[hashBlock];
- }
- }
-
-#undef BYTE
-
- for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
- cn_implode_scratchpad_heavy((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
- keccakf(h[hashBlock], 24);
- extra_hashes[scratchPad[hashBlock]->state[0] & 3](scratchPad[hashBlock]->state, 200,
- output + hashBlock * 32);
- }
+ // dummy
}
};
-template
-class CryptoNightMultiHash
+template
+class CryptoNightMultiHash
{
public:
inline static void hash(const uint8_t* __restrict__ input,
@@ -1425,8 +882,7 @@ public:
size_t size,
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad,
- AsmOptimization asmOptimization,
- PowVariant powVariant)
+ AsmOptimization asmOptimization)
{
keccak(static_cast(input), (int) size, scratchPad[0]->state, 200);
@@ -1447,7 +903,7 @@ public:
if (SOFT_AES) {
scratchPad[0]->t_fn = (const uint32_t*)saes_table;
- switch (powVariant)
+ switch (VARIANT)
{
case POW_MSR:
cnv1_main_loop_fast_soft_aes_sandybridge_asm(scratchPad[0]);
@@ -1468,7 +924,7 @@ public:
break;
}
} else {
- switch (powVariant)
+ switch (VARIANT)
{
case POW_MSR:
cnv1_main_loop_fast_sandybridge_asm(scratchPad[0]);
@@ -1533,7 +989,7 @@ public:
cx = _mm_aesenc_si128(cx, ax);
}
- SHUFFLE_PHASE_1(l, (idx&MASK), bx0, bx1, ax)
+ SHUFFLE_PHASE_1(l, (idx&MASK), bx0, bx1, ax, VARIANT == POW_RWZ)
_mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx0, cx));
@@ -1547,7 +1003,7 @@ public:
lo = __umul128(idx, cl, &hi);
- SHUFFLE_PHASE_2(l, (idx&MASK), bx0, bx1, ax, lo, hi)
+ SHUFFLE_PHASE_2(l, (idx&MASK), bx0, bx1, ax, lo, hi, VARIANT == POW_RWZ)
al += hi; // two fence statements are overhead
ah += lo;
@@ -1568,14 +1024,12 @@ public:
extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
}
-
// single asm
inline static void hashPowV3_asm(const uint8_t* __restrict__ input,
size_t size,
uint8_t* __restrict__ output,
ScratchPad** __restrict__ scratchPad,
- AsmOptimization asmOptimization,
- PowVariant powVariant)
+ AsmOptimization asmOptimization)
{
const uint8_t* l = scratchPad[0]->memory;
uint64_t* h = reinterpret_cast(scratchPad[0]->state);
@@ -1589,7 +1043,7 @@ public:
scratchPad[0]->input = input;
scratchPad[0]->t_fn = (const uint32_t*)saes_table;
- switch (powVariant)
+ switch (VARIANT)
{
case POW_FAST_2:
cnv2_main_loop_fastv2_soft_aes_sandybridge_asm(scratchPad[0]);
@@ -1597,12 +1051,18 @@ public:
case POW_TURTLE:
cnv2_main_loop_ultralite_soft_aes_sandybridge_asm(scratchPad[0]);
break;
+ case POW_DOUBLE:
+ cnv2_main_loop_xcash_soft_aes_sandybridge_asm(scratchPad[0]);
+ break;
+ case POW_ZELERIUS:
+ cnv2_main_loop_zelerius_soft_aes_sandybridge_asm(scratchPad[0]);
+ break;
default:
cnv2_main_loop_soft_aes_sandybridge_asm(scratchPad[0]);
break;
}
} else {
- switch (powVariant)
+ switch (VARIANT)
{
case POW_FAST_2:
cnv2_main_loop_fastv2_ivybridge_asm(scratchPad[0]);
@@ -1610,13 +1070,22 @@ public:
case POW_TURTLE:
cnv2_main_loop_ultralite_ivybridge_asm(scratchPad[0]);
break;
+ case POW_DOUBLE:
+ cnv2_main_loop_xcash_ivybridge_asm(scratchPad[0]);
+ break;
+ case POW_ZELERIUS:
+ cnv2_main_loop_zelerius_ivybridge_asm(scratchPad[0]);
+ break;
+ case POW_RWZ:
+ cnv2_main_loop_rwz_all_asm(scratchPad[0]);
+ break;
default:
cnv2_main_loop_ivybridge_asm(scratchPad[0]);
break;
}
}
} else if (asmOptimization == AsmOptimization::ASM_RYZEN) {
- switch (powVariant)
+ switch (VARIANT)
{
case POW_FAST_2:
cnv2_main_loop_fastv2_ryzen_asm(scratchPad[0]);
@@ -1624,12 +1093,21 @@ public:
case POW_TURTLE:
cnv2_main_loop_ultralite_ryzen_asm(scratchPad[0]);
break;
+ case POW_DOUBLE:
+ cnv2_main_loop_xcash_ryzen_asm(scratchPad[0]);
+ break;
+ case POW_ZELERIUS:
+ cnv2_main_loop_zelerius_ryzen_asm(scratchPad[0]);
+ break;
+ case POW_RWZ:
+ cnv2_main_loop_rwz_all_asm(scratchPad[0]);
+ break;
default:
cnv2_main_loop_ryzen_asm(scratchPad[0]);
break;
}
} else if (asmOptimization == AsmOptimization::ASM_BULLDOZER) {
- switch (powVariant)
+ switch (VARIANT)
{
case POW_FAST_2:
cnv2_main_loop_fastv2_bulldozer_asm(scratchPad[0]);
@@ -1637,6 +1115,15 @@ public:
case POW_TURTLE:
cnv2_main_loop_ultralite_bulldozer_asm(scratchPad[0]);
break;
+ case POW_DOUBLE:
+ cnv2_main_loop_xcash_bulldozer_asm(scratchPad[0]);
+ break;
+ case POW_ZELERIUS:
+ cnv2_main_loop_zelerius_bulldozer_asm(scratchPad[0]);
+ break;
+ case POW_RWZ:
+ cnv2_main_loop_rwz_all_asm(scratchPad[0]);
+ break;
default:
cnv2_main_loop_bulldozer_asm(scratchPad[0]);
break;
@@ -1649,6 +1136,140 @@ public:
extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
}
+ // single
+ inline static void hashPowV4(const uint8_t* __restrict__ input,
+ size_t size,
+ uint8_t* __restrict__ output,
+ ScratchPad** __restrict__ scratchPad,
+ uint64_t height)
+ {
+ keccak(static_cast